% % feedback directed compilation bibliography % compiled by rich vuduc, february 2000 % last update: april 17, 2000 % % $Revision$ @Article = {Knuth71:empirical, author = {Don Knuth}, title = {An empirical study of FORTRAN programs}, journal = {Software -- Practice \& Experience}, volume = {1}, number = {2}, month = {April--June}, year = {1971}, pages = {105--33} } % Knuth71:empirical % The paper that started it all, by everyone's favorite computer scientist. % Contains a survey of FORTRAN code, where the author looks in detail at % programmer styles and possible compiler optimizations that would be % effective on them. By the end, you are thoroughly convinced that programs % ought to be profiled. % % While I don't recommend anyone actually ``read'' this (lots of % painful-looking FORTRAN code), it is fascinating to look at! I have a % hard-copy if you are interested. % % Question: Do similar analyses exist for other languages? % @InProceedings = {Chang88:tracesel, author = {P. P. Chang and W. W. Hwu}, title = {Trace selection for compiling large C application programs to microcode}, booktitle = {Proceedings of the 21st Annual Workshop on Microprogramming and Microarchitecture}, month = {November--December}, year = {1988}, address = {San Diego, CA}, pages = {21--29} } % Chang88:tracesel % The authors describe a technique for selecting ``most frequently % executed paths'' from a weighted control flow graph (where weights % correspond to execution frequencies). @Article = {Chang91:profile, author = {P. P. Chang and S. A. Mahlke and W. W. Hwu}, title = {Using profile information to assist classic code optimizations}, journal = {Software -- Practice \& Experience}, volume = {21}, number = {12}, month = {December}, pages = {1301--1321}, year = {1991} } % Chang91:profile % Explains how to use basic block frequency counts to apply % the following classic compiler optimizations: constant % propagation, copy propagation, constant combining, % common subexpression elimination, redundant load/store % elimination, dead code removal, loop invariant code % removal, loop induction variable elimination, global % variable migration. The basic idea is to annotate the % control flow graph with the frequency counts, identify % the frequent paths, modify the graph to separate the % frequent and infrequent paths, and then perform the % classic optimizations across the basic blocks in the % frequent paths. This work was part of the IMPACT % compiler project at Illinois. % @Article{Chang92:inline, author = {P. Chang and S. Mahlke and W. Chen}, title = {Profile-guided automatic inline expansion for C programs}, journal = {Software--Practice \& Experience}, volume = {22}, number = {5}, pages = {349--369}, year = {1992} } % Chang92:inline % The authors consider an extension of the profiling compiler % described in Chang91:profile that results in a fully % automatic function inliner for C programs. The profile data % is used to decide when to inline (e.g., may inline a function % along one execution path but not necessarily another). % @Article{Chen94:profile, author = {W. Y. Chen and S. A. Mahlke and N. J. Warter and S. Anik and W. W. Hwu}, title = {Profile-assisted instruction scheduling}, journal = {International Journal for Parallel Programming}, volume = {22}, number = {2}, month = {April}, year = {1994}, pages = {151--181} } % Chen94:profile % Describes how profiling data can be used to assist code motion % between basic blocks, and reordering of load/store instructions. % In the former case, frequency counts are used; moving instructions % among the basic blocks allows the compiler to find better schedules. % In the latter case, the frequency of accessing certain addresses % are used; if memory addresses among certain instructions rarely % or never overlap, then they are considered to be "safe" to reorder % provided that the appropriate clean-up/fix-it code is provided for % the cases when this is not true. The idea is that the clean-up code % will not be executed frequently. In addition, they describe the % improvements in instruction cache behavior that is achievable. % This work was done as a part of the IMPACT compiler project. % @InProceedings{Wall91:profile, author = {David W. Wall}, title = {Predicting Program Behavior Using Real or Estimated Profiles}, booktitle = {Proceedings of the ACM SIGPLAN '91 Conference on Programming Language Design and Implementation}, address = {Toronto, Ontario, Canada}, month = {June}, year = {1991} } % Wall91:profile % This paper is cautionary note on the use of using profile data. % The concern is that using profile data collected on one set of % inputs may not be representative of % behavior on all inputs. % Wall proposes two ways to measure how well a profile estimates % actual program behavior: key matching vs. weight matching. % To Do: understand the differences between these two metrics. % @InProceedings{Chow99:FFD, author = {Kingsum Chow and Youfeng Wu}, title = {Feedback-Directed Selection and Characterization of Compiler Optimizations}, booktitle = {Proceedings of the 32nd Annual International Symposium on Microarchitecture, Second Workshop on Feedback-Directed Optimization}, address = {Haifa, Israel}, month = {November}, year = {1999} } % Chow99:FFD % An interesting paper that tackles the problem of how to determine % the effects of different compiler optimizations when the optimizations % interact and there is a combinatorial explosion. For example, if you % have 5 on/off compiler switches, then there are 2^5 combinations of % those switches---how do you distinguish the effects of the different % combinations? The main technique proposed is an apparently well-known % technique in experimental design called "functional factorial design." % The idea is to pick a subset of the possible combinations to test, % which still leaves ambiguous the effect of other combinations of flags, % and then use feedback to determine which to further investigate. % % To Do: Read and find out more about functional factorial design. % @InProceedings{Barnes99:FFD, author = {Ron Barnes}, title = {Feedback-Directed Data Cache Optimizations for the x86}, booktitle = {Proceedings of the 32nd Annual International Symposium on Microarchitecture, Second Workshop on Feedback-Directed Optimization}, address = {Haifa, Israel}, month = {November}, year = {1999} } % Barnes99:FFD % Describes how the author used profiling data to tune the use of % prefetch instructions. % To Do: Read % @Article{Calder99:valprof, author = {Brad Calder and Peter Feller and Alan Eustace}, title = {Value Profiling and Optimization}, journal = {Journal of Instruction-Level Parallelism}, volume = {1}, number = {6}, year = {1999} } % Calder99:valprof, % Author examines the use of value profiling (i.e., recording % the value of variables during the program run) to enable the % use of value optimizations (like constant propagation, code % specialization, partial evaluation, etc.) that are not known % during compile-time analyses. % To Do: % @InProceedings{Burtscher98:valuepred, author = {Martin Burtscher and Benjamin G. Zorn}, title = {Profile-Supported Confidence Estimation for Load-Value-Prediction}, booktitle = {Proceedings of International Conference on Parallel Architectures and Compilation Techniques}, address = {Paris, France}, month = {October}, year = {1998} } % Burtscher98:valuepred % To Do: Get this paper % @InProceedings{Ammons97:hwcounter, author = {Glenn Ammons and Thomas Ball and James R. Larus}, title = {Exploiting Hardware Performance Counters with Flow and Context Sensitive Profiling}, booktitle = {Proceedings of the ACM SIGPLAN '97 Conference on Programming Language Design and Implementation}, address = {Las Vegas, NV}, month = {June}, year = {1997} } % Ammons97:hwcounter % Explores the use of hardware counters during profiling instead % of just cycles/execution times. Specifically, the authors are % interested in attaching this extra data to their previously % proposed use of "path profiles." % @InProceedings{Ammons98:dfapath, author = {Glenn Ammons and James Larus}, title = {Improving Data-flow Analysis with Path Profiles}, booktitle = {Proceedings of the ACM SIGPLAN '98 Conference on Programming Language Design and Implementation}, address = {Montreal, Canada}, month = {June}, year = {1998} } % Ammons98:dfapath % Shows how to extend the Wegman-Zadeck data flow analysis for % constant propagation to use profiling information. The authors % use the work of Ball96:pathprof to identify ``hot paths,'' % transform an existing CFG into another in which the hot paths % have been duplicated, perform the data flow analysis, and patch % things up. This paper is heavy on details and theory (using the % lattice framework), but well worth the read and time to % understand! % @InProceedings{Ball92:optpath, author = {Thomas Ball and James Larus}, title = {Optimally profiling and tracing programs}, booktitle = {Proceedings of the ACM SIGPLAN '92 Conference on Programming Language Design and Implementation}, address = {Albequerque, NM}, month = {January}, year = {1992} } % Ball92:optpath % Discusses in detail an algorithm for determining an optimal (i.e., % minimal) number of insertion points at which to insert probes so as % to completely determine the edge/basic-block frequencies of an % arbitrary control flow graph. % @InProceedings{Ball96:pathprof, author = {Thomas Ball and James R. Larus}, title = {Efficient Path Profiling}, booktitle = {Proceedings of MICRO 96}, pages = {46--57}, address = {Paris, France}, month = {December}, year = {1996} } % Ball96:pathprof % The authors point out that traditional basic block and edge profiling % can only approximately the frequencies of actual path executions and % that it is generally perceived as being ok because recording all % possible program paths leads to a combinatorial explosion of % information to track. The authors propose a scheme for overcoming % this problem, enabling efficient tracking of paths. They suggest % that path profiles now enable better feedback-directed optimization % opportunities because you can also associate other hardware counter % info with paths. This is a simple and elegant scheme! % @TechReport{Ball99:progpath, author = {Thomas Ball and James Larus}, title = {Programs Follow Paths}, institution = {Microsoft Research}, type = {MSR-TR-99-01}, address = {Redmond, WA} month = {January}, year = {1999} } % Ball99:progpath % Motivation for analyzing paths and looking at so-called ``path-spectra.'' % You might call have called this ``The Case for Program Path Analysis.'' % This is a nice overview of what people have done with paths to improve % performance, debugging, and testing (e.g., helping to find Y2K bugs via % a concept of ``path spectra''), and is somewhat visionary in the scope % it suggests. Good, quick read to get an overview. % % (Path spectra: keep track of short paths, execute a program on some % inputs and see what paths are executed. Then run on a different set % of inputs and see what different paths are executed. A simple but neat % application!) % @InProceedings{Chen91:prefetch, author = {William Y. Chen and Scott A. Mahlke and Phua P. Chang and Wen-mei W. Hwu}, title = {Data Access Microarchitectures for Superscalar Processors with Compiler-Assisted Data Prefetching}, booktitle = {Proceedings of the 24th International Symposium on Microarchitecture}, month = {November}, year = {1991} } % Chen91:prefetch % To Do: Get this article % @InProceedings{Larus99:wpp, author = {James R. Larus}, title = {Whole Program Paths}, booktitle = {Proceedings of the SIGPLAN '99 Conference on Programming Languages Design and Implementation}, address = {Atlanta, GA}, month = {May}, year = {1999} } % Larus99:wpp % The author proposes the concept of a "whole program path," which % he claims is a complete, compact record of a program's control % flow. He contrasts WPPs with traditional paths which are % acyclic sequences of basic blocks that are limited to loop and % procedure boundaries. WPPs promise to allow global optimizations. % To Do: Read (read efficient path profiling paper first) % @TechReport{Aigner95:cppinline, author = {Gerald Aigner and Urs Holzle}, title = {Eliminating Virtual Function Calls in C++ programs}, institution = {University of California at Santa Barbara, CS Department}, type = {TRCS 95-22}, address = {Santa Barbara, CA} month = {December}, year = {1995} } % Aigner95:cppinline % The authors describe the use of profiling to estimate the frequency % of virtual function calls, and perform in-lining on frequently % executed functions. Analysis can not discover opportunities to do % this. % To Do: Read @InProceedings{Graham82:gprof, author = {S. L. Graham and P. B. Kessler and M. K. McKusick}, title = {gprof: A Call Graph Execution Profiler}, booktitle = {Proceedings of the SIGPLAN '82 Symposium on Compiler Construction, SIGPLAN Notices}, volume = {17}, number = {6}, pages = {120--126}, month = {June}, year = {1982} } % Graham82:gprof % Discusses efficient profiling of the program call graph. Includes a % good summary of the issues involved in profiling. Overhead of their % scheme is relatively low (<=~ 30%). Basic idea is to insert calls to % a monitor routine which examines the stack to determine the caller; % using this is an easy way to get histogram frequencies. The authors % also build a call graph by tracking caller/callee pairs, again in % the monitoring routine and via a stack trace. The scheme collects % execution counts, times, and uses a call graph to report these % hierarchically. % @InProceedings = {Gupta98:profparred, author = {R. Gupta and D. Berson and J.Z. Fang}, title = {Path Profile Guided Partial Redundancy Elimination Using Speculation}, booktitle = {Proceedings of the IEEE International Conference on Computer Languages}, pages = {230--239}, address = {Chicago, Illinois}, month = {May}, year = {1998} } % Gupta98:profparred % This, and the other Gupta papers (below) seem important and worth % reading. However, they all assume a fairly detailed knowledge of % instruction scheduling and compilation for VLIW (which is their % primary target), and due to time constraints I did not read in % depth. (They all seem to lack sufficient empirical data to make % definitive conclusions, anyway.) % % To Do: Read! @InProceedings = {Gupta97:profdfa, author = {R. Gupta and D. Berson and J.Z. Fang}, title = {Resource-Sensitive Profile-Directed Data Flow Analysis for Code Optimization}, booktitle = {Proceedings of the 30th Annual IEEE/ACM International Symposium on Microarchitecture}, address = {Research Triangle Park, NC}, month = {December}, year = {1997}, pages = {558--568} } % Gupta97:profdfa % To Do: read! @InProceedings = {Gupta97:profdcelim, author = {R. Gupta and D. Berson and J.Z. Fang}, title = {Path Profile Guided Partial Dead Code Elimination}, booktitle = {Proceedings of International Conference on Parallel Architectures and Compilation Techniques}, pages = {102--115}, address = {San Francisco, CA}, month = {November}, year = {1997} } % Gupta97:profdcelim % To Do: read! @InProceedings{Hwu89:branches, author = {W. W. Hwu and T. M. Conte and P. P. Chang}, title = {Comparing Software and Hardware Schemes for Reducing the Cost of Branches}, booktitle = {Proceedings of the 16th Annual International Symposium on Computer Architecture}, address = {Jerusalem, Israel}, month = {May}, year = {1989} } % Hwu89:branches % To Do: Read % % % $Log$ % % eof %