@Misc{wwn, author = {{American Media, Inc.}}, title = {Weekly World News}, howpublished = {Possibly defunct}, url = {http://weeklyworldnews.com/}, } @Book{IEEE754, author = "{IEEE Standards Committee 754}", title = "{IEEE} Standard for binary floating-point arithmetic, {ANSI}/{IEEE} {S}tandard 754-1985", year = "1985", publisher = "Institute of Electrical and Electronics Engineers, New York", note = "Reprinted in {SIGPLAN} {N}otices, 22(2):9-25, 1987", url = {http://grouper.ieee.org/groups/754/}, } @Book{IEEE754r, author = "{IEEE Standards Committee 754r}", title = "{IEEE} Standard for binary floating-point arithmetic, {ANSI}/{IEEE} {S}tandard 754r-2008", year = "2008", publisher = "Institute of Electrical and Electronics Engineers, New York", url = {http://grouper.ieee.org/groups/754/}, } @ARTICLE{newBLAS, AUTHOR = {Blackford, L. S. and Demmel, James W. and Dongarra, Jack J. and Duff, Iain S. and Hammarling, Sven and Henry, Greg and Heroux, M. and Kaufman, L. and Lumsdaine, A. and Petitet, A. and Pozo, R. and Remington, K. and Whaley, R. Clint}, TITLE = {An Updated Set of {B}asic {L}inear {A}lgebra {S}ubroutines ({BLAS})}, JOURNAL = {ACM Transactions on Mathematical Software}, VOLUME = {28}, NUMBER = {2}, MONTH = {June}, YEAR = {2002}, doi = {10.1145/567806.567807} } @article{xblas:toms, author = {Xiaoye S. Li and James W. Demmel and David H. Bailey and Greg Henry and Yozo Hida and Jimmy Iskandar and William Kahan and Suh Y. Kang and Anil Kapur and Michael C. Martin and Brandon J. Thompson and Teresa Tung and Daniel J. Yoo}, title = {Design, implementation and testing of extended and mixed precision BLAS}, journal = {ACM Trans. Math. Softw.}, volume = {28}, number = {2}, year = {2002}, issn = {0098-3500}, pages = {152--205}, doi = {10.1145/567806.567808}, publisher = {ACM}, address = {New York, NY, USA}, } @TechReport{xblas:lawn, author = {Xiaoye S. Li and James W. Demmel and David H. Bailey and Greg Henry and Yozo Hida and Jimmy Iskandar and William Kahan and Suh Y. Kang and Anil Kapur and Michael C. Martin and Brandon J. Thompson and Teresa Tung and Daniel J. Yoo}, title = {Design, implementation and testing of extended and mixed precision BLAS}, year = 2000, month = oct, type = "LAPACK Working Note", note = {Software available through \url{http://crd.lbl.gov/~xiaoye/XBLAS}}, URL = "http://www.netlib.org/lapack/lawnspdf/lawn149.pdf", institution = {Netlib}, number = 149, } % This file was created with JabRef 2.3.1. % Encoding: UTF8 @Article{Almohamad:1993, author = "H. A. Almohamad and S. O. Duffuaa", title = "A Linear Programming Approach for the Weighted Graph Matching Problem", journal = "IEEE Trans. Pattern Anal. Mach. Intell", year = 1993, volume = 15, pages = "522--525", abstract = "A linear programming (LP) approach is proposed for the weighted graph matching problem. A linear program is obtained by formulating the graph matching problem in $L_1$ norm and then transforming the resulting quadratic optimization problem to a linear one. The linear program is solved using a simplex-based algorithm. Then, approximate 0-1 integer solutions are obtained by applying the Hungarian method on the real solutions of the linear program. The complexity of the proposed algorithm is polynomial time, and it is $O(n^6 L)$ for matching graphs of size n. The developed algorithm is compared to two other algorithms. One is based on an eigendecomposition approach and the other on a symmetric polynomial transform. Experimental results showed that the LP approach is superior in matching graphs than both other methods.", } @Article{Alt:1991, author = "H. Alt and N. Blum and Kurt Mehlhorn and M. Paul", title = "Computing a maximum cardinality matching in a bipartite graph in time ${O}(n^{1.5} m / \log(n))$", journal = "Information Processing Letters", year = 1991, volume = 37, pages = "237--240", } @Article{Amodio:1999, author = "P. Amodio and F. Mazzia", title = "A New Approach to Backward Error Analysis of $LU$ Factorization", journal = "BIT Numerical Mathematics", year = "1999", volume = "39", pages = "385--402", number = "3", month = sep, abstract = "A new backward error analysis of $LU$ factorization is presented. It allows to obtain a sharper upper bound for the forward error and a new definition of the growth factor that we compare with the well known Wilkinson growth factor for some classes of matrices. Numerical experiments show that the new growth factor is often of order approximately $\log_2 n$ whereas Wilkinson's growth factor is of order n or $\sqrt n$.", doi = "10.1023/A:1022358300517", } @Misc{Anderson:1991, author = "Edward Anderson", title = "Robust triangular solves for use in condition estimation", year = 1991, type = "LAPACK Working Note", URL = "http://www.netlib.org/lapack/lawnspdf/lawn36.pdf", } @InProceedings{Anderson:1992, author = "Richard J. Anderson and João C. Setubal", title = "On the parallel implementation of Goldberg's maximum flow algorithm", crossref = "spaa14", pages = "168--177", doi = "10.1145/140901.140919", } @Book{Anderson:1999, title = "{LAPACK} Users' guide", publisher = "Society for Industrial and Applied Mathematics", address = "Philadelphia, PA, USA", year = 1999, author = "Edward Anderson and Zhaojun Bai and Christian H. Bischof and L. S. Blackford and James W. Demmel and Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Anne Greenbaum and A. McKenney and Danny C. Sorensen", pages = 430, edition = {third}, ISBN = "0-89871-447-8", URL = "http://www.netlib.org/lapack/lug/", } @Article{Arioli:1989, author = "Mario Arioli and James W. Demmel and Iain S. Duff", title = "Solving Sparse Linear Systems with Sparse Backward Error", journal = "SIAM Journal on Matrix Analysis and Applications", year = "1989", volume = "10", pages = "165--190", month = apr, doi = "10.1137/0610013", } @InProceedings{Arora:1996, author = "Sanjeev Arora and Alan Frieze and Haim Kaplan", title = "A new rounding procedure for the assignment problem with applications to dense graph arrangement problems", crossref = "focs37", pages = 21, abstract = "We present a randomized procedure for rounding fractional perfect matchings to (integral) matchings. If the original fractional matching satisfies any linear inequality, then with high probability, the new matching satisfies that linear inequality in an approximate sense. This extends the well-known LP rounding procedure of Raghavan and Thompson (1987), which is usually used to round fractional solutions of linear programs. It also solves an open problem of Luby and Nisan (1993) (``Design an NC procedure for converting near-optimum fractional matchings to near-optimum matchings.'') We use the rounding procedure to design $n^{\log n / \epsilon^2}$ time algorithms for the following: (i) an additive approximation to the 0-1 Quadratic Assignment problem (QAP); (ii) a $(1+\epsilon)$-approximation for ``dense'' instances of many well-known NP-hard problems, including (an optimization formulation of) GRAPH-ISOMORPHISM, MIN-CUT-LINEAR-ARRANGEMENT, MAX-ACYCLIC-SUBGRAPH, MIN-LINEAR-ARRANGEMENT, and BETWEENNESS. (A ``dense'' graph is one in which the number of edges is $\Omega(n^2)$; denseness for the other problems is defined in an analogous way).", citeseerurl = "http://citeseer.ist.psu.edu/arora01new.html", doi = "10.1109/SFCS.1996.548460", } @Article{Ashcraft:1990, author = "Cleve Ashcraft and Stanley C. Eisenstat and Joseph W. H. Liu", title = "A Fan-In Algorithm for Distributed Sparse Numerical Factorization", journal = "SIAM Journal on Scientific and Statistical Computing", year = 1990, volume = 11, pages = "593--599", month = may, doi = "10.1137/0911033", } @InProceedings{Bagherzadeh:1992, author = "N. Bagherzadeh and K. Hawk", title = "Parallel implementation of the auction algorithm on the Intel hypercube", crossref = "ipp6", pages = "443--447", abstract = "The authors present their experience in executing the auction algorithm on an iPSC/860 hypercube multiprocessor. They show the performance of the algorithm under synchronous and asynchronous computation models. In order to reduce the number of iterations for this algorithm and effectively increase the inherent parallelism in the auction algorithm, they propose and test a new technique called $\gamma$-scaling", doi = "10.1109/IPPS.1992.223005", } @Book{Bai:1995, title = "Lapack Users' Guide", publisher = "Society for Industrial and Applied Mathematic", year = 1995, author = "Z. Bai and C. Bischof and James W. Demmel and Edward Anderson and Jack J. Dongarra", pages = 325, edition = "second", month = feb, ISBN = "0-89871-345-5", } @Article{Bailey:1991, author = "David H. Bailey and King Lee and Horst Simon", title = "Using Strassen's algorithm to accelerate the solution of linear systems", journal = "The Journal of Supercomputing", year = "1991", volume = "4", pages = "357--371", abstract = "Strassen's algorithm for fast matrix-matrix multiplication has been implemented for matrices of arbitrary shapes on the CRAY-2 and CRAY Y-MP supercomputers. Several techniques have been used to reduce the scratch space requirement for this algorithm while simultaneously preserving a high level of performance. When the resulting Strassen-based matrix multiply routine is combined with some routines from the new LAPACK library, LU decomposition can be performed with rates significantly higher than those achieved by conventional means. We succeeded in factoring a 2048 $\times$ 2048 matrix on the CRAY Y-MP at a rate equivalent to 325 MFLOPS.", doi = "10.1007/BF00129836", } @Article{Balas:1991, author = "Egon Balas and Donald Miller and Joseph Pekny and Paolo Toth", title = "A parallel shortest augmenting path algorithm for the assignment problem", journal = "Journal of the ACM", year = 1991, volume = 38, pages = "985--1004", doi = "10.1145/115234.115349", } @Article{Bauer:1963, author = "Fritz L. Bauer", title = "Optimally scaled matrices", journal = "Numerische Mathematik", year = "1963", volume = "5", pages = "73--87", month = dec, doi = "10.1007/BF01385880", } @Article{Bendisch:1994, author = "Jürgen Bendisch and Ulrich Derigs and A. Metz", title = "An efficient matching algorithm applied in statistical physics", journal = "Discrete Appl. Math.", year = 1994, volume = 52, pages = "139--153", number = 2, address = "Amsterdam, The Netherlands, The Netherlands", doi = "10.1016/0166-218X(94)90078-7", ISSN = "0166-218X", publisher = "Elsevier Science Publishers B. V.", } @Article{Bertsekas:1981, author = "Dimitri P. Bertsekas", title = "A new algorithm for the assignment problem", journal = "Mathematical Programming", year = "1981", volume = "21", pages = "152--171", month = dec, abstract = "We propose a new algorithm for the classical assignment problem. The algorithm resembles in some ways the Hungarian method but differs substantially in other respects. The average computational complexity of an efficient implementation of the algorithm seems to be considerably better than the one of the Hungarian method. In a large number of randomly generated problems the algorithm has consistently outperformed an efficiently coded version of the Hungarian method by a broad margin. The factor of improvement increases with the problem dimension $N$ and reaches an order of magnitude for $N$ equal to several hundreds.", doi = "10.1007/BF01584237", } @Article{Bertsekas:1988, author = "Dimitri P. Bertsekas", title = "The auction algorithm: {A} distributed relaxation method for the assignment problem", journal = "Annals of Operations Research", year = 1988, volume = 14, pages = "105--123", month = dec, abstract = "We propose a massively parallelizable algorithm for the classical assignment problem. The algorithm operates like an auction whereby unassigned persons bid simultaneously for objects thereby raising their prices. Once all bids are in, objects are awarded to the highest bidder. The algorithm can also be interpreted as a Jacobi-like relaxation method for solving a dual problem. Its (sequential) worst-case complexity, for a particular implementation that uses scaling, is $O(N A \log(NC))$, where $N$ is the number of persons, $A$ is the number of pairs of persons and objects that can be assigned to each other, and $C$ is the maximum absolute object value. Computational results show that, for large problems, the algorithm is competitive with existing methods even without the benefit of parallelism. When executed on a parallel machine, the algorithm exhibits substantial speedup.", doi = "10.1007/BF02186476", } @Book{Bertsekas:1991, title = "Linear network optimization: algorithms and codes", publisher = "MIT Press", year = 1991, author = "Dimitri P. Bertsekas", pages = 359, ISBN = "0-262-02334-2", googlebookid = {7i3XQVNkJqIC}, } @Article{Bertsekas:1991a, author = "Dimitri P. Bertsekas and David A. Castañon", title = "Parallel synchronous and asynchronous implementations of the auction algorithm", journal = "Parallel Computing", year = "1991", volume = "17", pages = "707--732", month = sep, abstract = "In this paper we discuss the parallel implementation of the auction algorithm for the classical assignment problem. We show that the algorithm admits a totally asynchronous implementation and we consider several implementations on a shared memory machine, with varying degrees of synchronization. We also discuss and explore computationally the tradeoffs involved in using asynchronism to reduce the synchronization penalty.", doi = "10.1016/S0167-8191(05)80062-6", } @Article{Bertsekas:1992, author = "Dimitri P. Bertsekas and David A. Castañon", title = "A forward/reverse auction algorithm for asymmetric assignment problems", journal = "Computational Optimization and Applications", year = 1992, volume = 1, pages = "277--297", month = dec, abstract = "In this paper we consider the asymmetric assignment problem and we propose a new auction algorithm for its solution. The algorithm uses in a novel way the recently proposed idea of reverse auction, where, in addition to persons bidding for objects by raising their prices, we also have objects competing for persons by essentially offering discounts. In practice, the new algorithm apparently deals better with price wars than the currently existing auction algorithms. As a result, it tends to terminate substantially (and often dramatically) faster than its competitors.", doi = "10.1007/BF00249638", } @Misc{Bindel:2002, author = "David Bindel and E. Jason Riedy", title = "Exception Handling Interfaces, Implementations, and Evaluation", year = "2002", note = "Published: IEEE-754r revision meeting", URL = "http://grouper.ieee.org/groups/754/meeting-materials/2002-08-22-pres.pdf", } @Article{Bini:1980, author = "Dario Bini and Grazia Lotti", title = "Stability of fast algorithms for matrix multiplication", journal = "Numerische Mathematik", year = 1980, volume = 36, pages = "63--72", month = mar, abstract = "Summary Non commutative fast algorithms to compute $2\times 2$ matrix product are classified with regard to stability. An analysis of the rounding error propagation is presented for the $n\times n$ matrix multiplication algorithms obtained by recursive partitioning.", doi = "10.1007/BF01395989", } @Book{Bondy:1976, author = {John Adrian Bondy and U. S. R. Murty}, title = {Graph Theory with Applications}, publisher = {North-Holland}, year = {1976}, isbn = {0-444-19451-7}, url = "http://www.ecp6.jussieu.fr/pageperso/bondy/books/gtwa/gtwa.html", } @Article{Bunch:1971, author = "James R. Bunch", title = "Equilibration of Symmetric Matrices in the Max-Norm", journal = "Journal of the ACM", year = 1971, volume = 18, pages = "566--572", doi = "10.1145/321662.321670", } @Article{Bunch:1974, author = "James R. Bunch and John E. Hopcroft", title = "Triangular Factorization and Inversion by Fast Matrix Multiplication", journal = "Mathematics of Computation", year = "1974", volume = "28", pages = "231--236", abstract = "The fast matrix multiplication algorithm by Strassen is used to obtain the triangular factorization of a permutation of any nonsingular matrix of order $n$ in $< C_1 n^{\log_2 7}$ operations, and, hence, the invese of any nonsingular matrix in $< C_2 n^{log_2 7}$ operations.", ISSN = "00255718", URL = "http://links.jstor.org/sici?sici=0025-5718\%28197401\%2928\%3A125\%3C231\%3ATFAIBF\%3E2.0.CO\%3B2-U", } @Article{Bunch:1987, author = "James R. Bunch", title = "The weak and strong stability of algorithms in numerical linear algebra", journal = "Linear Algebra and its Applications", year = 1987, volume = "88-89", pages = "49--66", month = apr, abstract = "The stability of algorithms in numerical linear algebra is discussed. The concept of stability is extended to notions of weak stability and strong stability. Justifications are given for these extensions, and the implications of error analyses in terms of these definitions are discussed. The concept of weak stability helps to clarify some of the controversy which has arisen concerning the stability of algorithms for Toeplitz systems.", doi = "10.1016/0024-3795(87)90102-9", } @Book{Burkard:1980, title = "Assignment and Matching Problems: Solution Methods with {FORTRAN}-Programs", publisher = "Springer-Verlag New York, Inc", year = "1980", author = "Rainer E. Burkard and Ulrich Derigs", pages = "148", ISBN = "0-387-10267-1", } @Article{Burkard:2002, author = "Rainer E. Burkard", title = "Selected topics on assignment problems", journal = "Discrete Applied Mathematics", year = 2002, volume = 123, pages = "257--302", abstract = "We survey recent developments in the fields of bipartite matchings, linear sum assignment and bottleneck assignment problems and applications, multidimensional assignment problems, quadratic assignment problems, in particular lower bounds, special cases and asymptotic results, biquadratic and communication assignment problems.", doi = "10.1016/S0166-218X(01)00343-2", } @Article{Burkard:2003, author = {Rainer E. Burkard and Peter Butkovič}, title = {Max algebra and the linear assignment problem}, journal = {Mathematical Programming}, year = {2003}, volume = {98}, number = {1--3}, pages = {415--429}, month = sep, doi = {10.1007/s10107-003-0411-9}, abstract = {Max-algebra, where the classical arithmetic operations of addition and multiplication are replaced by $a \oplus b := \max(a, b)$ and $a \otimes b := a+b$ offers an attractive way for modelling discrete event systems and optimization problems in production and transportation. Moreover, it shows a strong similarity to classical linear algebra: for instance, it allows a consideration of linear equation systems and the eigenvalue problem. The max-algebraic permanent of a matrix $A$ corresponds to the maximum value of the classical linear assignment problem with cost matrix $A$. The analogue of van der Waerden's conjecture in max-algebra is proved. Moreover the role of the linear assignment problem in max-algebra is elaborated, in particular with respect to the uniqueness of solutions of linear equation systems, regularity of matrices and the minimal-dimensional realisation of discrete event systems. Further, the eigenvalue problem in max-algebra is discussed. It is intimately related to the best principal submatrix problem which is finally investigated: Given an integer $k$, $1 \leq k \leq n$, find a $(k\times k)$ principal submatrix of the given $(n\times n)$ matrix which yields among all principal submatrices of the same size the maximum (minimum) value for an assignment. For $k=1,2,\ldots,n$, the maximum assignment problem values of the principal $(k\times k)$ submatrices are the coefficients of the max-algebraic characteristic polynomial of the matrix for $A$. This problem can be used to model job rotations.}, } @Article{Carpaneto:1980, author = "Giorgio Carpaneto and Paolo Toth", title = "Algorithm 548: Solution of the Assignment Problem {[H]}", journal = "ACM Transactions on Mathematical Software", year = "1980", volume = "6", pages = "104--111", doi = "10.1145/355873.355883", } @Article{Carraresi:1986, author = "Paolo Carraresi and Claudio Sodini", title = "An efficient algorithm for the bipartite matching problem", journal = "European Journal of Operational Research", year = 1986, volume = 23, pages = "86--93", abstract = "The minimum cost bipartite matching problem is considered. An approach based on the solution of a sequence of shortest path sub-problems is proposed. The particular structure of the problem and the use of reduced costs make it possible to devise an efficient ``threshold'' algorithm to solve these sub-problems. The computational behaviour of the proposed procedure is analyzed.", doi = {10.1016/0377-2217(86)90218-3}, } @InProceedings{Chang:1998, author = "Yao-Wen Chang and Jai-Ming Lin and D. F. Wong", title = "Graph matching-based algorithms for {FPGA} segmentation design", pages = "34--39", crossref = "cad1998", } @Article{Chang:2001, author = "Xiao-Wen Chang and Christopher C. Paige", title = "Componentwise perturbation analyses for the {$QR$} factorization", journal = "Numerische Mathematik", year = 2001, volume = 88, pages = "319--345", month = apr, abstract = "Summary. This paper gives componentwise perturbation analyses for $Q$ and $R$ in the $QR$ factorization $A=QR$, $Q^T Q = I$, $R$ upper triangular, for a given real $m\times n$ matrix $A$ of rank $n$. Such specific analyses are important for example when the columns of $A$ are badly scaled. First order perturbation bounds are given for both $Q$ and $R$. The analyses more accurately reflect the sensitivity of the problem than previous such results. The condition number for $R$ is bounded for a fixed $n$ when the standard column pivoting strategy is used. This strategy also tends to improve the condition of $Q$, so usually the computed $Q$ and $R$ will both have higher accuracy when we use the standard column pivoting strategy. Practical condition estimators are derived. The assumptions on the form of the perturbation are explained and extended. Weaker rigorous bounds are also given.", doi = "10.1007/PL00005447", } @Misc{Cheng:2001, author = "Sheung Hun Cheng and Nicholas J. Higham", title = "Implementation for {LAPACK} of a Block Algorithm for Matrix 1-Norm Estimation", year = 2001, address = "Manchester, England", pages = 19, URL = "http://citeseer.ist.psu.edu/cheng01implementation.html", } @Article{Cheriyan:1989, author = "Joseph Cheriyan and Siddharth N. Maheshwari", title = "Analysis of Preflow Push Algorithms for Maximum Network Flow", journal = "SIAM Journal on Computing", year = 1989, volume = 18, pages = "1057--1086", month = dec, doi = "10.1137/0218072", } @Article{Chu:1985, author = "Eleanor Chu and Alan George", title = "A note on estimating the error in Gaussian elimination without pivoting", journal = "SIGNUM Newsletter", year = "1985", volume = "20", pages = "2--7", abstract = "This article deals with the problem of estimating the error in the computed solution to a system of equations when that solution is obtained by using Gaussian elimination without pivoting. The corresponding problem, where either partial or complete pivoting is used, has received considerable attention, and efficient and reliable methods have been developed. However, in the context of solving large sparse systems, it is often very attractive to apply Gaussian elimination without pivoting, even though it cannot be guaranteed a-priori that the computation is numerically stable. When this is done, it is important to be able to determine when serious numerical errors have occurred, and to be able to estimate the error in the computed solution. In this paper a method for achieving this goal is described. Results of a large number of numerical experiments suggest that the method is both inexpensive and reliable.", doi = {10.1145/1057941.1057942}, } @Book{Chung:1997, title = "Spectral graph theory", publisher = "Published for the Conference Board of the mathematical sciences by the American Mathematical Society", year = 1997, author = "Fan Chung", address = "Providence, Rhode Island", ISBN = "978-0-8218-0315-8", googlebookid = {YUc38_MCuhAC}, } @Misc{Cohen:2000, author = "Natalya Cohen", title = "A Parallel Pruning Technique for Highly Asymmetric Assignment Problems", month = jun, year = 2000, note = "We introduce a new two-phase technique to solve highly asymmetric assignment problems. In the first phase, the assignment problem is decomposed into subproblems which are solved in parallel. The first phase is used to exclude certain suboptimal assignments from consideration in the second phase. In the second phase, the optimal assignment is finalized. We show that the two-phase algorithm can reduce the theoretical time bound for solving an $n \times k$ assignment problem ($ n < k$) by a factor of $\sqrt{\frac{k}{n}}$.", doi = "10.1109/71.862206", } @Article{Cortes:2007, author = "V. Cortes and J. M. Peña", title = "Growth factor and expected growth factor of some pivoting strategies", journal = "Journal of Computational and Applied Mathematics", year = 2007, volume = 202, pages = "292--303", number = 2, month = may, abstract = "Several definitions of growth factors for Gaussian elimination are compared. Some new pivoting strategies, intermediate between partial pivoting and rook pivoting, are introduced. For random matrices, an approximation of the average normalized growth factor associated with several pivoting strategies is computed and analyzed. A stationary behavior of the expected growth factors of the new pivoting strategies is observed. Bounds for the growth factors of these pivoting strategies are provided. It is also shown that partial pivoting by columns produces small growth factors for matrices appearing in practical observations and for which the growth factors produced by partial pivoting are very large.", doi = "10.1016/j.cam.2006.02.040", } @Article{Curtis:1972, author = "A. R. Curtis and John K. Reid", title = "On the Automatic Scaling of Matrices for Gaussian Elimination", journal = "IMA Journal of Applied Mathematics", year = "1972", volume = "10", pages = "118--124", month = aug, abstract = "The usual pivotal strategies for Gaussian elimination are based on the assumption that all the matrix elements are of comparable size. We propose an algorithm for scaling based on the assumption that the given matrix can be scaled to this required form. Some numerical experiments are presented to show that it produces much better results than straightforward row and column norm equilibration, particularly in the sparse case, and that the computing cost is moderate.", doi = "10.1093/imamat/10.1.118", } @Article{Cuyt:2002, author = "Annie A. M. Cuyt and Peter Kuterna and Brigitte Verdonk and Dennis Verschaeren", title = "Underflow revisited", journal = "Calcolo", year = 2002, volume = 39, pages = "169--179", month = oct, abstract = "Underflow is a floating-point phenomenon. Although the use of gradual underflow as defended in [2] and [3] is now widespread, most numerical analysts may not be aware of the fact that several implementations of the same principle are in existence, leading to different behavior of code on different platforms, mainly with respect to exception signaling. We intend to thoroughly discuss the slight differences among these implementations. Examples will be taken from current hardware and from our own multiprecision software class library. Throughout the discussion the focus is on the analysis of the phenomenon and not on any implementation issues. Many programmers are also unaware of the fact that the IEEE 754 and 854 standards do not guarantee that a program will deliver identical results on all conforming systems. Of all the differences that can occur cross-platform, the underflow exception is just one.", doi = "10.1007/s100920200003", } @Article{Damberg:1996, author = "Olof Damberg and Sverre Storøy and Tor Sørevik", title = "A data parallel augmenting path algorithm for the dense linear many-to-one assignment problem", journal = "Computational Optimization and Applications", year = 1996, volume = 6, doi = {10.1007/BF00247794}, pages = "251--272", } @Article{Davis:2004, author = "Timothy A. Davis and John R. Gilbert and Stefan I. Larimore and Esmond G. Ng", title = "A column approximate minimum degree ordering algorithm", journal = "ACM Transactions on Mathematical Software.", year = "2004", volume = "30", pages = "353--376", abstract = "Sparse Gaussian elimination with partial pivoting computes the factorization $PAQ = LU$ of a sparse matrix $A$, where the row ordering $P$ is selected during factorization using standard partial pivoting with row interchanges. The goal is to select a column preordering, $Q$, based solely on the nonzero pattern of $A$, that limits the worst-case number of nonzeros in the factorization. The fill-in also depends on $P$, but $Q$ is selected to reduce an upper bound on the fill-in for any subsequent choice of $P$. The choice of $Q$ can have a dramatic impact on the number of nonzeros in $L$ and $U$. One scheme for determining a good column ordering for $A$ is to compute a symmetric ordering that reduces fill-in in the Cholesky factorization of $A^T A$. A conventional minimum degree ordering algorithm would require the sparsity structure of $A^T A$ to be computed, which can be expensive both in terms of space and time since $A^T A$ may be much denser than $A$. An alternative is to compute $Q$ directly from the sparsity structure of $A$; this strategy is used by MATLAB's COLMMD preordering algorithm. A new ordering algorithm, COLAMD, is presented. It is based on the same strategy but uses a better ordering heuristic. COLAMD is faster and computes better orderings, with fewer nonzeros in the factors of the matrix.", doi = "10.1145/1024074.1024079", } @Article{Dax:2003, author = "Achiya Dax", title = "A Modified Iterative Refinement Scheme", journal = "SIAM Journal on Scientific Computing", year = 2003, volume = 25, pages = "1199--1213", doi = {10.1137/S1064827502409055}, } @Misc{Demmel:1983, author = "James W. Demmel", title = "Effects of Underflow on Solving Linear Systems", year = "1983", abstract = "In this paper we examine the effects of underflow on solving systems of linear equations using Gaussian Elimination. Our goal is to decide if reliable software for solving linear in the presence of underflow can be written at reasonable cost. We contrast the utilities of gradual underflow and ``store zero'', and show that only by using gradual underflow can we achieve reliability easily.", publisher = "EECS Department, University of California, Berkeley", URL = "http://www.eecs.berkeley.edu/Pubs/TechRpts/1983/6331.html", } @Article{Demmel:1984, author = "James W. Demmel", title = "Underflow and the Reliability of Numerical Software", journal = "SIAM Journal on Scientific and Statistical Computing", year = 1984, volume = 5, pages = "887--919", month = dec, doi = {10.1137/0905062}, } @Article{Demmel:1992, author = "James W. Demmel", title = "The Componentwise Distance to the Nearest Singular Matrix", journal = "SIAM Journal on Matrix Analysis and Applications", year = "1992", volume = "13", pages = "10--19", doi = {10.1137/0613003}, } @Article{Demmel:1992a, author = "James W. Demmel and Nicholas J. Higham", title = "Stability of block algorithms with fast level-3 {BLAS}", journal = "ACM Transactions on Mathematical Software", year = 1992, volume = 18, doi = {10.1145/131766.131769}, pages = "274--291", abstract = "Block algorithms are becoming increasingly popular in matrix computations. Since their basic unit of data is a submatrix rather than a scalar, they have a higher level of granularity than point algorithms, and this makes them well suited to high-performance computers. The numerical stability of the block algorithms in the new linear algebra program library LAPACK is investigated here. It is shown that these algorithms have backward error analyses in which the backward error bounds are commensurate with the error bounds for the underlying level-3 BLAS (BLAS3). One implication is that the block algorithms are as stable as the corresponding point algorithms when conventional BLAS3 are used. A second implication is that the use of BLAS3 based on fast matrix multiplication techniques affects the stability only insofar as it increases the constant terms in the normwise backward error bounds. For linear equation solvers employing LU factorization, it is shown that fixed precision iterative refinement helps to mitigate the effect of the larger error constants. Despite the positive results presented here, not all plausible block algorithms are stable; we illustrate this with the example of LU factorization with block triangular factors and describe how to check a block algorithm for stability without doing a full error analysis.", } @Article{Demmel:1994, author = "James W. Demmel and Xiaoye S. Li", title = "Faster Numerical Algorithms Via Exception Handling", journal = "IEEE Transactions on Computing", year = 1994, volume = 43, pages = "983--992", abstract = "An attractive paradigm for building fast numerical algorithms is the following: 1) try a fast but occasionally unstable algorithm, 2) test the accuracy of the computed answer, and 3) recompute the answer slowly and accurately in the unlikely event it is necessary. This is especially attractive on parallel machines where the fastest algorithms may be less stable than the best serial algorithms. Since unstable algorithms can overflow or cause other exceptions, exception handling is needed to implement this paradigm safely. To implement it efficiently, exception handling cannot be too slow. We illustrate this paradigm with numerical linear algebra algorithms from the LAPACK library.", doi = {10.1109/12.295860}, } @Book{Demmel:1997, title = "Applied numerical linear algebra", publisher = "Society for Industrial and Applied Mathematics", year = 1997, author = "James W. Demmel", address = "Philadelphia", ISBN = "978-0-89871-389-3", googlebookid = {fjC2d0odoi8C}, } @TechReport{Demmel:2002, author = "James W. Demmel and Yozo Hida", title = "Accurate Floating Point Summation", year = "2002", abstract = "We present and analyze several simple algorithms for accurately summing $n$ floating point numbers, independent of how much cancellation occurs in the sum. Let $f$ be the number of significant bits in the sum ($s_i$). We assume a register is available with $F > f$ significant bits. Then assuming that (1) $n \leq \lfloor 2^{F-f} / (1-2^{-f}) \rfloor + 1$, (2) rounding is to nearest, (3) no overflow occurs, and (4) all underflow is gradual, then simply summing the $s_i$ in decreasing order of magnitude yields $S$ rounded to within just over 1.5 units in its last place. If $S = 0$, then it is computed exactly. If we increase $n$ slightly to $\lfloor 2^{F-f} / (1-2^{-f}) \rfloor + 3$, then all accuracy can be lost. This result extends work of Priest and others who considered double precision only ($F \geq 2f$). We apply this result to the floating point formats in the (proposed revision of the) IEEE floating point standard. For example, a dot product of IEEE single precision vectors computed using double precision and sorting is guaranteed correct to nearly 1.5 ulps as long as $n \leq 33$. If double extended is used $n$ can be as large as 65537. We also show how sorting may be mostly avoided while retaining accuracy.", institution = "University of California at Berkeley", number = {CSD-02-1180}, url = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2002/5662.html}, month = may, } @Article{Demmel:2004, author = "James W. Demmel and Yozo Hida", title = "Fast and Accurate Floating Point Summation with Application to Computational Geometry", journal = "Numerical Algorithms", year = 2004, volume = 37, pages = "101--112", month = dec, abstract = "We present several simple algorithms for accurately computing the sum of n floating point numbers using a wider accumulator. Let f and F be the number of significant bits in the summands and the accumulator, respectively. Then assuming gradual underflow, no overflow, and round-to-nearest arithmetic, up to $2F-f/(1-2-f)+1$ numbers can be accurately added by just summing the terms in decreasing order of exponents, yielding a sum correct to within about 1.5 units in the last place. In particular, if the sum is zero, it is computed exactly. We apply this result to the floating point formats in the IEEE floating point standard, and investigate its performance. Our results show that in the absence of massive cancellation (the most common case) the cost of guaranteed accuracy is about 30-40\% more than the straightforward summation. If massive cancellation does occur, the cost of computing the accurate sum is about a factor of ten. Finally, we apply our algorithm in computing a robust geometric predicate (used in computational geometry), where our accurate summation algorithm improves the existing algorithm by a factor of two on a nearly coplanar set of points.", doi = "10.1023/B:NUMA.0000049458.99541.38", } @TechReport{Demmel:2005, author = "James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S. Li and Sonil Mukherjee and E. Jason Riedy", title = "Error bounds from extra-precise iterative refinement", year = 2005, note = "Also issued as UCB//CSD-05-1414, UT-CS-05-547, and LBNL-56965; expanded from TOMS version", type = "LAPACK Working Note", URL = "http://www.netlib.org/lapack/lawnspdf/lawn165.pdf", institution = {Netlib}, number = 165, } @InProceedings{Demmel:2006, author = "James W. Demmel and Jack J. Dongarra and Beresford N. Parlett and W. Kahan and Ming Gu and David Bindel and Yozo Hida and Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and Christof Vömel and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Julie Langou and Stanimire Tomov", title = "Prospectus for the Next {LAPACK} and Sca{LAPACK} Libraries", abstract = "LAPACK and ScaLAPACK are widely used software libraries for numerical linear algebra. There have been over 68M web hits at www.netlib.org for the associated libraries LAPACK, ScaLAPACK, CLAPACK and LAPACK95. LAPACK and ScaLAPACK are used to solve leading edge science problems and they have been adopted by many vendors and software providers as the basis for their own libraries, including AMD, Apple (under Mac OS X), Cray, Fujitsu, HP, IBM, Intel, NEC, SGI, several Linux distributions (such as Debian), NAG, IMSL, the MathWorks (producers of MATLAB), Interactive Supercomputing, and PGI. Future improvements in these libraries will therefore have a large impact on users.", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/para06-lapack.pdf", crossref = "para06", } @Article{Demmel:2006a, author = "James W. Demmel and Ioana Dumitriu and Olga Holtz", title = "Fast linear algebra is stable", journal = "Numerische Mathematik", year = 2006, volume = 108, pages = "59--91", number = 1, month = nov, abstract = "In Demmel et al. (Numer. Math. 106(2), 199--224, 2007) we showed that a large class of fast recursive matrix multiplication algorithms is stable in a normwise sense, and that in fact if multiplication of n-by-n matrices can be done by any algorithm in $O(n^{\omega + \eta})$ operations for any $\eta > 0$, then it can be done stably in $O(n^{\omega + \eta})$ operations for any $\eta > 0$. Here we extend this result to show that essentially all standard linear algebra operations, including $LU$ decomposition, $QR$ decomposition, linear equation solving, matrix inversion, solving least squares problems, (generalized) eigenvalue problems and the singular value decomposition can also be done stably (in a normwise sense) in $O(n^{\omega+\eta})$ operations.", doi = "10.1007/s00211-007-0114-x", eprint = "math/0612264", } @Article{Demmel:2006b, author = "James W. Demmel and Yozo Hida and W. Kahan and Xiaoye S. Li and Sonil Mukherjee and E. Jason Riedy", title = "Error bounds from extra-precise iterative refinement", doi = {10.1145/1141885.1141894}, journal = "ACM Transactions on Mathematical Software", year = "2006", volume = "32", pages = "325--351", abstract = "We present the design and testing of an algorithm for iterative refinement of the solution of linear equations where the residual is computed with extra precision. This algorithm was originally proposed in 1948 and analyzed in the 1960s as a means to compute very accurate solutions to all but the most ill-conditioned linear systems. However, two obstacles have until now prevented its adoption in standard subroutine libraries like LAPACK: (1) There was no standard way to access the higher precision arithmetic needed to compute residuals, and (2) it was unclear how to compute a reliable error bound for the computed solution. The completion of the new BLAS Technical Forum Standard has essentially removed the first obstacle. To overcome the second obstacle, we show how the application of iterative refinement can be used to compute an error bound in any norm at small cost and use this to compute both an error bound in the usual infinity norm, and a componentwise relative error bound.", ISSN = "0098-3500", } @TechReport{Demmel:2007, author = "James W. Demmel and Jack J. Dongarra and Beresford N. Parlett and W. Kahan and Ming Gu and David Bindel and Yozo Hida and Xiaoye S. Li and Osni A. Marques and E. Jason Riedy and Christof Vömel and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Julie Langou and Stanimire Tomov", title = "Prospectus for the Next {LAPACK} and Sca{LAPACK} Libraries", year = 2007, note = "Also issued as UT-CS-07-592", type = "LAPACK Working Note", URL = "http://www.netlib.org/lapack/lawnspdf/lawn181.pdf", institution = {Netlib}, number = 181, } @Article{Demmel:2007a, author = "James W. Demmel and Ioana Dumitriu and Olga Holtz and Robert Kleinberg", title = "Fast matrix multiplication is stable", journal = "Numerische Mathematik", year = 2007, volume = 106, pages = "199--224", month = apr, abstract = "Abstract We perform forward error analysis for a large class of recursive matrix multiplication algorithms in the spirit of Bini and Lotti [Numer. Math. 36:63?72, 1980]. As a consequence of our analysis, we show that the exponent of matrix multiplication (the optimal running time) can be achieved by numerically stable algorithms. We also show that new group-theoretic algorithms proposed in Cohn and Umans [Foundations of Computer Science, 44th Annual IEEE Symposium, pp. 438?449, 2003] and Cohn et al. [Foundations of Computer Science, 46th Annual IEEE Symposium, pp. 379?388, 2005] are all included in the class of algorithms to which our analysis applies, and are therefore numerically stable. We perform detailed error analysis for three specific fast group-theoretic algorithms.", doi = "10.1007/s00211-007-0061-6", } @TechReport{Demmel:2007b, author = "James W. Demmel and Yozo Hida and Xiaoye S. Li and E. Jason Riedy", title = "Extra-precise iterative refinement for overdetermined least squares problems", institution = {Netlib}, month = may, year = 2007, note = "Also issued as UCB/EECS-2007-77; sumbitted to TOMS.", abstract = "We present the algorithm, error bounds, and numerical results for extra-precise iterative refinement applied to overdetermined linear least squares (LLS) problems. We apply our linear system refinement algorithm to Björck's augmented linear system formulation of an LLS problem. Our algorithm reduces the forward normwise and componentwise errors to $O(\varepsilon)$ unless the system is too ill conditioned. In contrast to linear systems, we provide two separate error bounds for the solution $x$ and the residual $r$. The refinement algorithm requires only limited use of extra precision and adds only $O(mn)$ work to the $O(mn^2)$ cost of QR factorization for problems of size m-by-n. The extra precision calculation is facilitated by the new extended-precision BLAS standard in a portable way, and the refinement algorithm will be included in a future release of LAPACK and can be extended to the other types of least squares problems.", type = "LAPACK Working Note", number = 188, URL = "http://www.netlib.org/lapack/lawnspdf/lawn188.pdf", } @Misc{Demmel:2007c, author = "James W. Demmel and Yozo Hida and Xiaoye S. Li and E. Jason Riedy and Meghana Vishvanath and David Vu", title = "Precise Solutions for Overdetermined Least Squares Problems", year = 2007, note = "Stanford 50 -- Eighth Bay Area Scientific Computing Day", abstract = "Linear least squares (LLS) fitting is the most widely used data modeling technique and is included in almost every data analysis system (e.g. spreadsheets). These software systems often give no feedback on the conditioning of the LLS problem or the floating-point calculation errors present in the solution. With limited use of extra precision, we can eliminate these concerns for all but the most ill-conditioned LLS problems. Our algorithm provides either a solution and residual with relatively tiny error or a notice that the LLS problem is too ill-conditioned.", address = "Stanford, CA", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/bascd2007-poster.pdf", } @Article{Derigs:1986, author = "Ulrich Derigs and A. Metz", title = "An efficient labeling technique for solving sparse assignment problems", journal = "Computing", year = 1986, volume = 36, pages = "301--311", month = dec, abstract = "We describe a new implementation of the shortest augmenting path approach for solving sparse assignment problems and report computational experience documenting its efficiency.", doi = "10.1007/BF02240205", } @Book{Diestel:2005, author = {Reinhard Diestel}, title = {Graph Theory}, publisher = {Springer-Verlag, Heidelberg}, year = 2005, volume = 173, series = {Graduate Texts in Mathematics}, edition = {3rd}, month = {July}, url = {http://www.math.uni-hamburg.de/home/diestel/books/graph.theory/}, isbn = {3-540-26182-6}, googlebookid = {04YbQF8oscQC}, } @Article{Dongarra:1988, author = "Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Richard J. Hanson", title = "An extended set of {FORTRAN} basic linear algebra subprograms", journal = "ACM Transactions on Mathematical Software", year = 1988, volume = 14, pages = "1--17", abstract = "This paper describes an extension to the set of Basic Linear Algebra Subprograms. The extensions are targeted at matrix-vector operations that should provide for efficient and portable implementations of algorithms for high-performance computers.", doi = {10.1145/42288.42291}, } @Article{Dongarra:1990, author = "Jack J. Dongarra and Jeremy Du Croz and Sven Hammarling and Iain S. Duff", title = "A set of level 3 basic linear algebra subprograms", journal = "ACM Transactions on Mathematical Software", year = "1990", volume = "16", pages = "1--17", abstract = "This paper describes an extension to the set of Basic Linear Algebra Subprograms. The extensions are targeted at matrix-vector operations that should provide for efficient and portable implementations of algorithms for high-performance computers", doi = {10.1145/77626.79170}, } @Unpublished{Dongarra:2006, author = "Jack J. Dongarra and Julien Langou and E. Jason Riedy", title = "Sca/{LAPACK} Program Style", note = "Contributor documentation.", year = 2006, abstract = "The purpose of this document is to facilitate contributions to LAPACK and ScaLAPACK by documenting their design and implementation guidelines. The long-term goal is to provide guidelines for both LAPACK and ScaLAPACK. However, the parallel ScaLAPACK code has more open issues, so this document primarily concerns LAPACK.", URL = "http://www.netlib.org/lapack-dev/lapack-coding/program-style.html", } @article{Driscoll:2007, author = {Tobin A. Driscoll and Kara L. Maki}, title = {Searching for Rare Growth Factors Using Multicanonical Monte Carlo Methods}, publisher = {SIAM}, year = 2007, journal = {SIAM Review}, volume = 49, number = 4, pages = {673--692}, doi = {10.1137/050637662}, abstract = {The growth factor of a matrix quantifies the amount of potential error growth possible when a linear system is solved using Gaussian elimination with row pivoting. While it is an easy matter [N. J. Higham and D. J. Higham, SIAM J. Matrix Anal. Appl., 10 (1989), pp. 155–164] to construct examples of $n\times n$ matrices having any growth factor up to the maximum of $2^{n-1}$, the weight of experience and analysis [N. J. Higham, Accuracy and Stability of Numerical Algorithms, SIAM, Philadelphia, 1996], [L. N. Trefethen and R. S. Schreiber, SIAM J. Matrix Anal. Appl., 11 (1990), pp. 335–360], [L. N. Trefethen and I. D. Bau, Numerical Linear Algebra, SIAM, Philadelphia, 1997] suggest that matrices with exponentially large growth factors are exceedingly rare. Here we show how to conduct numerical experiments on random matrices using a multicanonical Monte Carlo method to explore the tails of growth factor probability distributions. Our results suggest, for example, that the occurrence of an $8\times 8$ matrix with a growth factor of 40 is on the order of a once-in-the-age-of-the-universe event.}, } @Article{Duff:1983, author = "Iain S. Duff and John K. Reid", title = "The Multifrontal Solution of Indefinite Sparse Symmetric Linear", journal = "ACM Transactions on Mathematical Software", year = "1983", doi = {10.1145/356044.356047}, volume = "9", number = {3}, pages = "302--325", } @Book{Duff:1986, title = "Direct methods for sparse matrices", publisher = "Oxford University Press, Inc", year = 1986, author = "Iain S. Duff and Albert M. Erisman and John K. Reid", googlebookid = {rlX7tbJstpIC}, ISBN = "0-19-853408-6", } @Article{Duff:1999, author = "Iain S. Duff and Jacko Koster", title = "The Design and Use of Algorithms for Permuting Large Entries to the Diagonal of Sparse Matrices", journal = "SIAM Journal on Matrix Analysis and Applications", year = 1999, volume = 20, pages = "889--901", abstract = "We consider techniques for permuting a sparse matrix so that the diagonal of the permuted matrix has entries of large absolute value. We discuss various criteria for this and consider their implementation as computer codes. We then indicate several cases where such a permutation can be useful. These include the solution of sparse equations by a direct method and by an iterative technique. We also consider its use in generating a preconditioner for an iterative method. We see that the effect of these reorderings can be dramatic although the best a priori strategy is by no means clear.", doi = {10.1137/S0895479897317661}, } @Article{Duff:2000, author = "Iain S. Duff and Jacko Koster", title = "On Algorithms For Permuting Large Entries to the Diagonal of a Sparse Matrix", journal = "SIAM Journal on Matrix Analysis and Applications", year = 2000, volume = 22, pages = "973--996", abstract = "We consider bipartite matching algorithms for computing permutations of a sparse matrix so that the diagonal of the permuted matrix has entries of large absolute value. We discuss various strategies for this and consider their implementation as computer codes. We also consider scaling techniques to further increase the relative values of the diagonal entries. Numerical experiments show the effect of the reorderings and the scaling on the solution of sparse equations by a direct method and by preconditioned iterative techniques.", URL = "http://portal.acm.org/citation.cfm?id=587801", doi = {10.1137/S0895479899358443}, } @Article{Edmonds:1972, author = "Jack Edmonds and Richard M. Karp", doi = {10.1145/321694.321699}, title = "Theoretical Improvements in Algorithmic Efficiency for Network Flow Problems", journal = "Journal of the ACM", year = 1972, volume = 19, number = 2, pages = "248--264", } @Article{Erisman:1974, author = "Albert M. Erisman and John K. Reid", title = "Monitoring the stability of the triangular factorization of a sparse matrix", journal = "Numerische Mathematik", year = 1974, volume = 22, pages = "183--186", month = jun, abstract = "We propose a method for bounding the numerical instability in Gaussian elimination which may result from the use of interchanges calculated from the sparsity pattern alone or calculated for another matrix having the same sparsity pattern. The computational cost of obtaining the bound is small compared with that of the elimination. Also we propose the use of a variant of iterative refinement to estimate the accuracy of the solution obtained.", doi = "10.1007/BF01436966", } @Article{Farnum:1988, author = "Charles Donald Farnum", title = "Compiler support for floating-point computation", journal = "Software: Practice and Experience", year = "1988", volume = "18", pages = "701--709", doi = "10.1002/spe.4380180709", } @Article{Favati:2000, author = {Paola Favati and Mauro Leoncini and Angeles Martinez}, title = {On the Robustness of Gaussian Elimination with Partial Pivoting}, journal = {BIT Numerical Mathematics}, year = {2000}, volume = {40}, number = {1}, pages = {62--73}, month = mar, doi = {10.1023/A:1022314201484}, abstract = {It has been recently shown that large growth factors might occur in Gaussian Elimination with Partial Pivoting (GEPP) also when solving some plausibly \emph{natural} systems. In this note we argue that this potential problem could be easily solved, with much smaller risk of failure, by very small (and low cost) modifications of the basic algorithm, thus confirming its inherent robustness. To this end, we first propose an informal model with the goal of providing further support to the comprehension of the stability properties of GEPP. We then report the results of numerical experiments that confirm the viewpoint embedded in the model. Basing on the previous observations, we finally propose a simple scheme that could be turned into (even more) accurate software for the solution of linear systems.}, } @Article{Fielder:1980, author = {Miroslav Fiedler}, title = {Application of graphs to the Gaussian elimination method}, journal = {Journal of Mathematical Sciences}, year = 1980, volume = 13, number = 2, pages = {244--250}, month = feb, doi = {10.1007/BF01296240}, abstract = {Certain algebraic operations (in the Boolean sense) are developed for directed graphs. Nonsingular and inverse graphs are defined and some of their characteristics are derived. The results are applied for the Gaussian elimination process.}, } @Article{Foster:1994, author = "Leslie V. Foster", title = "Gaussian Elimination with Partial Pivoting Can Fail in Practice", journal = "SIAM Journal on Matrix Analysis and Applications", year = 1994, volume = 15, pages = "1354--1362", doi = {10.1137/S0895479892239755}, month = oct, } @Article{Foster:1997, author = "Leslie V. Foster", title = "The growth factor and efficiency of Gaussian elimination with rook pivoting", journal = "Journal of Computational and Applied Mathematics", year = "1997", volume = "86", pages = "177--194", number = "1", month = nov, abstract = "Gaussian elimination is among the most widely used tools in scientific computing. Gaussian elimination with partial pivoting requires only O(n2) comparisons beyond the work required in Gaussian elimination with no pivoting but can, in principle, have error growth that is exponential in the matrix size n. Gaussian elimination with complete pivoting, on the other hand, cannot have exponential error growth but requires O(n3) comparisons beyond the work required by Gaussian elimination with no pivoting. Numerical experiments suggest that Gaussian elimination with rook pivoting is between partial pivoting and complete pivoting in terms of efficiency and accuracy. In this paper we prove that rook pivoting cannot have exponential error growth. We also introduce a combination of partial pivoting and rock pivoting which we call Gaussian elimination with partial rook pivoting and we prove the partial rook pivoting cannot have exponential error growth. We include numerical experiments showing that on a serial computer the run times for cook pivoting are almost always close to those of partial pivoting and the run times for partial rook pivoting appear to be the same as those of partial pivoting.", doi = "10.1016/S0377-0427(97)00154-4", } @Article{Galil:1986, author = "Zvi Galil", title = "Efficient algorithms for finding maximum matching in graphs", journal = "ACM Computing Surveys", year = 1986, volume = 18, doi = {10.1145/6462.6502}, pages = "23--38", abstract = "This paper surveys the techniques used for designing the most efficient algorithms for finding a maximum cardinality or weighted matching in (general or bipartite) graphs. It also lists some open problems concerning possible improvements in existing algorithms and the existence of fast parallel algorithms for these problems.", } @Article{Gilbert:2001, author = {John R. Gilbert and Xiaoye S. Li and Esmond G. Ng and Berry W. Peyton}, title = {Computing Row and Column Counts for Sparse {$QR$} and {$LU$} Factorization}, journal = {BIT Numerical Mathematics}, year = {2001}, volume = {41}, number = {4}, pages = {693--710}, month = sep, doi = {10.1023/A:1021943902025}, abstract = {We present algorithms to determine the number of nonzeros in each row and column of the factors of a sparse matrix, for both the $QR$ factorization and the $LU$ factorization with partial pivoting. The algorithms use only the nonzero structure of the input matrix, and run in time nearly linear in the number of nonzeros in that matrix. They may be used to set up data structures or schedule parallel operations in advance of the numerical factorization. The row and column counts we compute are upper bounds on the actual counts. If the input matrix is strong Hall and there is no coincidental numerical cancellation, the counts are exact for $QR$ factorization and are the tightest bounds possible for $LU$ factorization. These algorithms are based on our earlier work on computing row and column counts for sparse Cholesky factorization, plus an efficient method to compute the column elimination tree of a sparse matrix without explicitly forming the product of the matrix and its transpose.}, } @Article{Gohberg:1993, author = "Israel Gohberg and Israel Koltracht", title = "Mixed, Componentwise, and Structured Condition Numbers", journal = "SIAM Journal on Matrix Analysis and Applications", year = 1993, volume = 14, pages = "688--704", month = jul, doi = "10.1137/0614049", } @Article{Gold:1996, author = "Steven Gold and Anand Rangarajan", title = "A Graduated Assignment Algorithm for Graph Matching", month = apr, year = "1996", journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {18}, number = {4}, issn = {0162-8828}, pages = {377--388}, abstract = "A graduated assignment algorithm for graph matching is presented which is fast and accurate even in the presence of high noise. By combining graduated nonconvexity, two-way (assignment) constraints, and sparsity, large improvements in accuracy and speed are achieved. Its low order computational complexity [$O(lm)$, where $l$ and $m$ are the number of links in the two graphs] and robustness in the presence of noise offer advantages over traditional combinatorial approaches. The algorithm, not restricted to any special class of graph, is applied to subgraph isomorphism, weighted graph matching, and attributed relational graph matching. To illustrate the performance of the algorithm, attributed relational graphs derived from objects are matched. Then, results from twenty-five thousand experiments conducted on 100 node random graphs of varying types (graphs with only zero-one links, weighted graphs, and graphs with node attributes and multiple link types) are reported. No comparable results have been reported by any other graph matching algorithm before in the research literature. Twenty-five hundred control experiments are conducted using a relaxation labeling algorithm and large improvements in accuracy are demonstrated.", doi = "10.1109/34.491619", } @InProceedings{Goldberg:1986, author = "Andrew V. Goldberg and Robert Endre Tarjan", title = "A new approach to the maximum flow problem", pages = "136--146", crossref = "stoc18", } @Article{Goldberg:1991, author = "David Goldberg", title = "What every computer scientist should know about floating-point arithmetic", journal = "ACM Computing Surveys", year = 1991, volume = 23, pages = "5--48", abstract = "Floating-point arithmetic is considered as esoteric subject by many people. This is rather surprising, because floating-point is ubiquitous in computer systems: Almost every language has a floating-point datatype; computers from PCs to supercomputers have floating-point accelerators; most compilers will be called upon to compile floating-point algorithms from time to time; and virtually every operating system must respond to floating-point exceptions such as overflow. This paper presents a tutorial on the aspects of floating-point that have a direct impact on designers of computer systems. It begins with background on floating-point representation and rounding error, continues with a discussion of the IEEE floating point standard, and concludes with examples of how computer system builders can better support floating point.", doi = {10.1145/103162.103163}, } @Article{Goldberg:1992, author = "David Goldberg", title = "The design of floating-point data types", journal = "ACM Letters on Programming Languages and Systems", year = 1992, volume = 1, pages = "138--151", abstract = "The issues involved in designing the floating-point part of a programming language are discussed. Looking at the language specifications for most existing languages might suggest that this design involves only trivial issues, such as whether to have one or two types of REALs or how to name the functions that convert from INTEGER to REAL. It is shown that there are more significant semantic issues involved. After discussing the trade-offs for the major design decisions, they are illustrated by presenting the design of the floating-point part of the Modula-3 language.", doi = {10.1145/151333.151373}, } @Book{Golub:1996, title = "Matrix computations", publisher = "Johns Hopkins University Press", year = "1996", author = "Gene H. Golub and Charles F. Van Loan", address = "Baltimore", edition = "3rd ed.", ISBN = "978-0-8018-5413-2", googlebookid = {mlOa7wPX6OYC}, } @Article{Gomes:1996, author = "Carla Pedro Gomes and Julie Hsu", title = "{ABA}: an assignment based algorithm for resource allocation", journal = "SIGART Bulletin", year = 1996, volume = 7, pages = "2--8", abstract = "In this paper we describe an approach to allocating a set of tasks to a set of resources or processors. The novelty of the approach has to do with the way decisions are performed. Rather than making one decision about one resource (or one task) at a time, several decisions concerning multiple resources and multiple tasks are made at a time. The algorithm incorporates the formulation of the assignment problem. Furthermore, knowledge about temporal constraints between activities is exploited to improve the computational efficiency of the algorithm. The algorithm is very flexible, allowing for the incorporation of different types of constraints as well as the consideration of non-equivalent resources.", doi = {10.1145/230062.230063}, } @Misc{Group:2007, author = "ANU Data Mining Group", title = "Parallel Large Scale Techniques for High-Performance Record Linkage", year = 2007, annote = "Potential user of parallel weighted bipartite matching.", URL = "http://datamining.anu.edu.au/projects/linkage.html", } @Article{Hager:1984, author = "William W. Hager", title = "Condition Estimates", journal = "SIAM Journal on Scientific and Statistical Computing", year = "1984", volume = "5", pages = "311--316", month = jun, doi = {10.1137/0905023}, } @Book{Harary:1999, title = "Graph theory", publisher = "Perseus Books", year = 1999, author = "Frank Harary", googlebookid = {9nOljWrLzAAC}, address = "Reading Mass.", ISBN = "978-0-201-41033-4", } @Unpublished{Harvey:2006, author = "Nicholas J. A. Harvey", title = "Algebraic Structures and Algorithms for Matching and Matroid Problems (Preliminary Version)", note = "arXiv.org eprint", month = jan, year = "2006", abstract = "Basic path-matchings, introduced by Cunningham and Geelen (FOCS 1996), are a common generalization of matroid intersection and non-bipartite matching. The main results of this paper are a new algebraic characterization of basic path-matching problems and an algorithm for constructing basic path-matchings in $O(n^w)$ time, where n is the number of vertices and w is the exponent for matrix multiplication. Our algorithms are randomized, and our approach assumes that the given matroids are linear and can be represented over the same field. Our main results have interesting consequences for several special cases of path-matching problems. For matroid intersection, we obtain an algorithm with running time $O(nr^{w-1})=O(nr^{1.38})$, where the matroids have $n$ elements and rank $r$. This improves the long-standing bound of $O(nr^{1.62})$ due to Gabow and Xu (FOCS 1989). Also, we obtain a simple, purely algebraic algorithm for non-bipartite matching with running time $O(n^w)$. This resolves the central open problem of Mucha and Sankowski (FOCS 2004).", eprint = "cs/0601026", } @Article{Hauser:1996, author = "John R. Hauser", title = "Handling floating-point exceptions in numeric programs", journal = "ACM Transactions on Programming Languages and Systems", doi = {10.1145/227699.227701}, year = 1996, volume = 18, pages = "139--174", abstract = "There are a number of schemes for handling arithmetic exceptions that can be used to improve the speed (or alternatively the reliability) of numeric code. Overflow and underflow are the most troublesome exceptions, and depending on the context in which the exception can occur, they may be addressed either: (1) through a `brute force' reevaluation with extended range, (2) by reevaluating using a technique known as scaling, (3) by substituting an infinity or zero, or (4) in the case of underflow, with gradual underflow. In the first two of these cases, the offending computation is simply reevaluated using a safer but slower method. The latter two cases are cheaper, more automated schemes that ideally are built in as options within the computer system. Other arithmetic exceptions can be handled with similar methods. These and some other techniques are examined with an eye toward determining the support programming languages and computer systems ought to provide for floating-point exception handling. It is argued that the cheapest short-term solution would be to give full support to most of the required (as opposed to recommended) special features of the IEC/IEEE Standard for Binary Floating-Point Arithmetic. An essential part of this support would include standardized access from high-level languages to the exception flags defined by the standard. Some possibilities outside the IEEE Standard are also considered, and a few thought on possible better-structured support within programming languages are discussed.", } @InProceedings{He:2000, crossref = "sc14", author = "Yun He and Chris H. Q. Ding", title = "Using accurate arithmetics to improve numerical reproducibility and stability in parallel applications", pages = "225--234", abstract = "Numerical reproducibility and stability of large scale scientific simulations, especially climate modeling, on distributed memory parallel computers are becoming critical issues. In particular, global summation of distributed arrays is most susceptible to rounding errors, and their propagation and accumulation cause uncertainty in final simulation results. We analyzed several accurate summation methods and found that two methods are particularly effective to improve (ensure) reproducibility and stability: Kahan's self-compensated summation and Bailey's double-double precision summation. We provide an MPI operator MPLSUMDD to work with MPI collective operations to ensure a scalable implementation on large number of processors. The final methods are particularly simple to adopt in practical codes.", doi = {10.1023/A:1008153532043}, } @InProceedings{Hida:2001, crossref = "arith15", author = "Yozo Hida and Xiaoye S. Li and David H. Bailey", title = "Algorithms for quad-double precision floating point arithmetic", pages = "155--162", abstract = "A quad-double number is an unevaluated sum of four IEEE double precision numbers, capable of representing at least 212 bits of significand. We present the algorithms for various arithmetic operations (including the four basic operations and various algebraic and transcendental operations) on quad-double numbers. The performance of the algorithms, implemented in C++, is also presented.", doi = "10.1109/ARITH.2001.930115", } @Article{Higham:1988, author = "Nicholas J. Higham", title = "{FORTRAN} codes for estimating the one-norm of a real or complex matrix, with applications to condition estimation", journal = "ACM Transactions on Mathematical Software", year = 1988, volume = 14, pages = "381--396", abstract = "FORTRAN 77 codes SONEST and CONEST are presented for estimating the 1-norm ( or the infinity-norm) of a real or complex matrix, respectively. The codes are of wide applicability in condition estimation since explicit access to the matrix, $A$, is not required; instead, matrix-vector products $A x$ and $A^T x$ are computed by the calling program via a reverse communication interface. The algorithms are based on a convex optimization method for estimating the 1-norm of a real matrix devised by Hager. We derive new results concerning the behavior of Hager's method, extend it to complex matrices, and make several algorithmic modifications in order to improve the reliability and efficiency.", doi = {10.1145/50063.214386}, } @Article{Higham:1989, author = "Nicholas J. Higham and Desmond J. Higham", title = "Large Growth Factors in Gaussian Elimination with Pivoting", journal = "SIAM Journal on Matrix Analysis and Applications", year = "1989", volume = "10", pages = "155--164", month = apr, doi = {10.1137/0610012}, abstract = {The growth factor plays an important role in the error analysis of Gaussian elimination. It is well known that when partial pivoting or complete pivoting is used the growth factor is usually small, but it can be large. The examples of large growth usually quoted involve contrived matrices that are unlikely to occur in practice. We present real and complex $n \times n$ matrices arising from practical applications that, for any pivoting strategy, yield growth factors bounded below by $n / 2$ and $n$, respectively. These matrices enable us to improve the known lower bounds on the largest possible growth factor in the case of complete pivoting. For partial pivoting, we classify the set of real matrices for which the growth factor is $2^{n - 1} $. Finally, we show that large element growth does not necessarily lead to a large backward error in the solution of a particular linear system, and we comment on the practical implications of this result.}, mimsurl = {http://eprints.ma.man.ac.uk/356/}, } @InProceedings{Higham:1990, crossref = "na89", author = "Nicholas J. Higham", title = "How Accurate is Gaussian Elimination?", pages = "137--154", citeseerurl = "http://citeseer.ist.psu.edu/higham90how.html", } @Article{Higham:1991, author = "Nicholas J. Higham", title = "Iterative refinement enhances the stability of $QR$ factorization methods for solving linear equations", journal = "BIT Numerical Mathematics", year = "1991", volume = "31", pages = "447--468", abstract = "Iterative refinement is a well-known technique for improving the quality of an approximate solution to a linear system. In the traditional usage residuals are computed in extended precision, but more recent work has shown that fixed precision is sufficient to yield benefits for stability. We extend existing results to show that fixed precision iterative refinement renders anarbitrary linear equations solver backward stable in a strong, componentwise sense, under suitable assumptions. Two particular applications involving theQR factorization are discussed in detail: solution of square linear systems and solution of least squares problems. In the former case we show that one step of iterative refinement suffices to produce a small componentwise relative backward error. Our results are weaker for the least squares problem, but again we find that iterative refinement improves a componentwise measure of backward stability. In particular, iterative refinement mitigates the effect of poor row scaling of the coefficient matrix, and so provides an alternative to the use of row interchanges in the Householder QR factorization. A further application of the results is described to fast methods for solving Vandermonde-like systems.", doi = "10.1007/BF01933262", } @Article{Higham:1992, author = "Desmond J. Higham and Nicholas J. Higham", title = "Backward Error and Condition of Structured Linear Systems", journal = "SIAM Journal on Matrix Analysis and Applications", year = 1992, volume = 13, pages = "162--175", doi = {10.1137/0613014}, } @InProceedings{Higham:1994, crossref = "mathcomp43", author = "Nicholas J. Higham", title = "A Survey of Componentwise Perturbation Theory in Numerical Linear Algebra", pages = "49--77", citeseerurl = "http://citeseer.ist.psu.edu/295767.html", } @Book{Higham:1996, title = "Accuracy and stability of numerical algorithms", publisher = "Society for Industrial and Applied Mathematics", year = 1996, author = "Nicholas J. Higham", address = "Philadelphia", ISBN = "978-0-89871-355-8", googlebookid = {FJyBjjtHREQC}, } @Article{Higham:1997, author = "Nicholas J. Higham", title = "Iterative refinement for linear systems and {LAPACK}", journal = "IMA Journal of Numerical Analysis", year = "1997", volume = "17", pages = "495--509", month = oct, abstract = "The technique of iterative refinement for improving the computed solution to a linear system was used on desk calculators and computers in the 1940s and has remained popular. In the 1990s iterative refinement is well supported in software libraries, notably in LAPACK. Although the behaviour of iterative refinement in floating point arithmetic is reasonably well understood, the existing theory is not sufficient to justify the use of fixed precision iterative refinement in all the LAPACK routines in which it is implemented. We present analysis that provides the theoretical support needed for LAPACK. The analysis covers both mixed and fixed precision iterative refinement with an arbitrary number of iterations, makes only a general assumption on the underlying solver, and is relatively short. We identify some remaining open problems.", doi = "10.1093/imanum/17.4.495", } @Article{Higham:2000, author = "Nicholas J. Higham and Fran\c{c}oise Tisseur", title = "A Block Algorithm for Matrix 1-Norm Estimation, with an Application to 1-Norm Pseudospectra", journal = "SIAM Journal on Matrix Analysis and Applications", year = 2000, volume = 21, pages = "1185--1201", citeseerurl = "http://citeseer.ist.psu.edu/article/higham00block.html", doi = {10.1137/S0895479899356080}, } @Book{Higham:2002, title = "Accuracy and Stability of Numerical Algorithms", publisher = "SIAM: Society for Industrial and Applied Mathematics", year = "2002", author = "Nicholas J. Higham", pages = "680", edition = "2nd", month = aug, ISBN = "0-89871-521-0", googlebookid = {epilvM5MMxwC}, searchtags = "asna", } @InProceedings{Hough:2005, crossref = "arith17", author = "David Hough and Bill Hay and Jeff Kidder and E. Jason Riedy and Guy L. Steele Jr. and Jim Thomas", title = "Arithmetic Interactions: From Hardware to Applications", pages = "87--87", note = "See \hrefx{http://purl.oclc.org/NET/jason-riedy/resume/material/arith17-slides.pdf}{related presentation}", abstract = "The entire process of creating and executing applications that solve interesting problems with acceptable cost and accuracy involves a complex interaction among hardware, system software, programming environments, mathematical software libraries, and applications software, all mediated by standards for arithmetic, operating systems, and programming environments. This panel will discuss various issues arising among these various contending points of view, sometimes from the point of view of issues raised during the current IEEE 754R standards revision effort.", doi = "10.1109/ARITH.2005.10", } @Article{Hull:1994, author = "T. E. Hull and Thomas F. Fairgrieve and Ping-Tak Peter Tang", title = "Implementing complex elementary functions using exception handling", doi = {10.1145/178365.178404}, journal = "ACM Transactions on Mathematical Software", year = "1994", volume = "20", pages = "215--244", abstract = "Algorithms are developed for reliable and accurate evaluations of the complex elementary functions required in FORTRAN 77 and FORTRAN 90, namely, cabs, csqrt, cexp, clog, csin, and ccos. The algorithms are presented in a pseudocode that has a convenient exception-handling facility. A tight error bound is derived for each algorithm. Corresponding FORTRAN programs for an IEEE environment have also been developed to illustrate the practicality of the algorithms, and these programs have been tested very carefully to help confirm the correctness of the algorithms and their error bounds. The results of these tests are included in the paper, but the FORTRAN programs are not.", } @Article{IV:1992, author = "Nathaniel J. Davis IV and Barry A. Carpenter and Charles W. Glover and Jean-Christophe Culioli", title = "Parallel approaches to the solution of the assignment problem", journal = "Concurrency: Practice and Experience", year = 1992, volume = 4, pages = "163--184", abstract = "This paper discusses efforts to develop parallel algorithms that can be used to solve large-scale assignment problems typical of military battle management systems. Attempts to parallellze the classical Hungarian method are explored. Drawing on experimental data, a regresslon model is derived that relates the Hungarian method's run time to a cubic function of the number of parallel processors used in the solution. Four parallel heuristics for solving the assignment problem are developed and analysed. Two of them perform well in a parallel environment. The first, based on Vagel's approximation, can be used to identify a feasible, near-optimal assignment. The second algorithm partitions the assignment problem into independent subproblems across the parallel array. The resulting solution, although infeasible in the strict definition of the assignment problem, is seen to be reasonably good and can be obtained very rapidly. Either of these two heuristics are of potential use in the stringent computing environment of a real-time resource management system", doi = "10.1002/cpe.4330040205", } @Article{Jankowski:1977, author = "M. Jankowski and Henryk Woźniakowski", title = "Iterative refinement implies numerical stability", journal = "BIT Numerical Mathematics", year = 1977, volume = 17, pages = "303--311", abstract = "Suppose that a method $\phi$ computes an approximation of the exact solution of a linear system $A x = b$ with the relative error $q$, $q < 1$ and single precision, We prove that if all computations are performed in floating point arithmetic and single precision, then $\phi$ with iterative refinement is numerically stable and well-behaved whenever $q \|A\| \|A^{-1}\|$ is at most of order unity.", doi = "10.1007/BF01932150", } @Article{Jonker:1986, author = "Roy Jonker and Ton Volgenant", title = "Improving the Hungarian assignment algorithm", journal = "Operations Research Letters", year = 1986, volume = 5, pages = "171--175", month = oct, abstract = "We describe three easily implementable improvement for the Hungarian linear assignment algorithm. Computation times vary from about two to more than three times lower than previously, where the effectiveness increases with problem size. Furthermore, the algorithm is now less sensitive to the range of the cost coefficients. We also show that the Hungarian algorithm is essentially equivalent to assignment algorithms based on shortest augmenting paths.", doi = "10.1016/0167-6377(86)90073-8", } @Article{Jonker:1987, author = "Roy Jonker and Ton Volgenant", title = "A shortest augmenting path algorithm for dense and sparse linear assignment problems", journal = "Computing", year = "1987", volume = "38", pages = "325--340", month = dec, abstract = "We develop a shortest augmenting path algorithm for the linear assignment problem. It contains new initialization routines and a special implementation of Dijkstra's shortest path method. For both dense and sparse problems computational experiments show this algorithm to be uniformly faster than the best algorithms from the literature. A Pascal implementation is presented.", doi = "10.1007/BF02278710", } @Article{Karp:1994, author = "Richard M. Karp and Alexander H. G. Rinnooy Kan and Rakesh V. Vohra", title = "Average case analysis of a heuristic for the assignment problem", journal = "Mathematics of Operations Research", year = 1994, volume = 19, pages = "513--522", url = {http://links.jstor.org/sici?sici=0364-765X\%28199408\%2919\%3A3\%3C513\%3AACAOAH\%3E2.0.CO\%3B2-9}, abstract = {Our main contribution is an $O(n \log n)$ algorithm that determines with high probability a perfect matching in a random 2-out bipartite graph. We also show that this algorithm runs in $O(n)$ expected time. This algorithm can be used as a subroutine in an $O(n^{2})$ heuristic for the assignment problem. When the weights in the assignment problem are independently and uniformly distributed in the interval $[0, 1]$, we prove that the expected weight of the assignment returned by this heuristic is bounded above by $3+O(n^{-a})$, for some positive constant $a$.}, } @Article{Kim:1991, author = "Whoi-Yul Kim and Avinash C. Kak", title = "3-{D} Object Recognition Using Bipartite Matching Embedded in Discrete Relaxation", journal = "IEEE Transactions Pattern Analysis and Machine Intelligence", doi = {10.1109/34.75511}, year = "1991", volume = "13", pages = "224--251", abstract = "The authors show how large efficiencies can be achieved in model-based 3-D vision by combining the notions of discrete relaxation and bipartite matching. The computational approach presented is capable of pruning large segments of search space-an indispensable step when the number of objects in the model library is large and when recognition of complex objects with a large number of surfaces is called for. Bipartite matching is used for quick wholesale rejection of inapplicable models and for the determination of compatibility of a scene surface with a potential model surface taking into account relational considerations. The time complexity function associated with those aspects of the procedure that are implemented via bipartite matching is provided. The algorithms do not take more than a couple of iterations, even for objects with more than 30 surfaces.", } @TechReport{Korimort:2000, author = "Thomas Korimort and Rainer E. Burkard and Eranda Çela", title = "An interior point approach to weighted bipartite matching", month = sep, year = 2000, institution = "Institute of Mathematics, Technical University Graz", citeseerurl = "http://citeseer.ist.psu.edu/korimort00interior.html", } @Article{Kuhn:1955, author = "H. W. Kuhn", title = "The Hungarian method for the assignment problem", journal = "Naval Research Logistics Quarterly", year = "1955", volume = "2", pages = "83--97", abstract = "Assuming that numerical scores are available for the performance of each of n persons on each of n jobs, the `assignment problem' is the quest for an assignment of persons to jobs so that the sum of the n scores so obtained is as large as possible. It is shown that ideas latent in the work of two Hungarian mathematicians may be exploited to yield a new method of solving this problem", doi = "10.1002/nav.3800020109", } @Article{Langlois:2004, author = "Philippe Langlois", title = "More accuracy at fixed precision", journal = "Journal of Computational and Applied Mathematics", year = 2004, volume = 162, pages = "57--77", abstract = "Several different techniques and software intend to improve the accuracy of results computed in a fixed finite precision. Here we focus on the CENA method that processes an automatic correction of the first-order effect of the rounding errors the computation generates. This method provides a corrected result and a bound of the residual error for a class of algorithms we identify. We present the main features of the CENA method and illustrate its interests and limitations with examples.", doi = {10.1016/j.cam.2003.08.017}, } @Article{Lemeire:1976, author = "Frans Lemeire", title = "Equilibration of matrices to optimize backward numerical stability", journal = "BIT Numerical Mathematics", year = "1976", volume = "16", pages = "143--145", month = jun, abstract = "An equilibration is introduced that minimizes the maximum ratio of the upper bounds for the backward rounding error and the inherent error in the given data. This can be applied to the solution of systems of linear equations and to the linear eigenproblem.", doi = "10.1007/BF01931366", } @Article{Li:2002, author = "Xiaoye S. Li and James W. Demmel and David H. Bailey and Greg Henry and Yozo Hida and Jimmy Iskandar and W. Kahan and Suh Y. Kang and Anil Kapur and Michael C. Martin and Brandon J. Thompson and Teresa Tung and Daniel J. Yoo", title = "Design, implementation and testing of extended and mixed precision {BLAS}", journal = "ACM Transactions on Mathematical Software", year = 2002, volume = 28, pages = "152--205", doi = {10.1145/567806.567808}, abstract = "This article describes the design rationale, a C implementation, and conformance testing of a subset of the new Standard for the BLAS (Basic Linear Algebra Subroutines): Extended and Mixed Precision BLAS. Permitting higher internal precision and mixed input/output types and precisions allows us to implement some algorithms that are simpler, more accurate, and sometimes faster than possible without these features. The new BLAS are challenging to implement and test because there are many more subroutines than in the existing Standard, and because we must be able to assess whether a higher precision is used for internal computations than is used for either input or output variables. We have therefore developed an automated process of generating and systematically testing these routines. Our methodology is applicable to languages besides C. In particular, our algorithms used in the testing code will be valuable to all other BLAS implementors. Our extra precision routines achieve excellent performance---close to half of the machine peak Megaflop rate even for the Level 2 BLAS, when the data access is stride one.", } @Article{Li:2003, author = "Xiaoye S. Li and James W. Demmel", title = "Super{LU}\_{DIST}: {A} scalable distributed-memory sparse direct solver for unsymmetric linear systems", journal = "ACM Transactions on Mathematical Software", year = 2003, volume = 29, pages = "110--140", abstract = "We present the main algorithmic features in the software package SuperLU\_DIST, a distributed-memory sparse direct solver for large sets of linear equations. We give in detail our parallelization strategies, with a focus on scalability issues, and demonstrate the software's parallel performance and scalability on current machines. The solver is based on sparse Gaussian elimination, with an innovative static pivoting strategy proposed earlier by the authors. The main advantage of static pivoting over classical partial pivoting is that it permits a priori determination of data structures and communication patterns, which lets us exploit techniques used in parallel sparse Cholesky algorithms to better parallelize both LU decomposition and triangular solution on large-scale distributed machines.", doi = "10.1145/779359.779361", } @Article{Lim:2002, author = "Andrew Lim and Oon Wee Chong and Chi Chi-Hung", title = "A matching-based algorithm for page access sequencing in join processing", journal = "The Journal of Systems and Software", doi = {10.1016/S0164-1212(01)00076-0}, year = 2002, volume = 60, pages = "11--19", abstract = "One of the fundamental problems in relational database management is the handling of the join operation. Two of the problems are: (1) finding a page access sequence which requires the minimum number of buffer pages if there are no page reaccesses; and (2) finding a page access sequence which requires the minimum number of page reaccesses given a fixed buffer size, In general, a heuristic for the first problem can be converted to a corresponding heuristic to the second problem.In this paper, a new heuristic is proposed. The experimental results show that the new heuristic performs significantly better than existing heuristics for data which is modelled with geometric graphs, and no worse in others, for the first problem. It performs similarly better for the second problem if the fixed buffer size is not much less than the maximum buffer size generated in the first problem.", } @Article{Liu:1989, author = "Joseph W. H. Liu", title = "A graph partitioning algorithm by node separators", journal = "ACM Transactions on Mathematical Software", doi = {10.1145/66888.66890}, year = 1989, volume = 15, pages = "198--219", abstract = "A heuristic graph partitioning scheme is presented to determine effective node separators for undirected graphs. An initial separator is first obtained from the minimum degree ordering, an algorithm designed originally to produce fill-reducing orderings for sparse matrices. The separator is then improved by an iterative strategy based on some known results from bipartite graph matching. This gives an overall practical scheme in partitioning graphs. Experimental results are provided to demonstrate the effectiveness of this heuristic algorithm on graphs arising from sparse matrix applications.", } @TechReport{Livne:2003, author = "Oren E. Livne and Gene H. Golub", title = "Scaling by Binormalization", month = jun, year = 2003, abstract = "We present an iterative algorithm (BIN) for scaling all the rows and columns of a real symmetric matrix to unit 2-norm. We study the theoretical convergence properties and its relation to optimal conditioning.", number = {SCCM-03-12}, institution = "Stanford's Scientific Computing and Computational Mathematics Program", url = {http://www-sccm.stanford.edu/pub/sccm/sccm03-12.pdf}, citeseerurl = "http://citeseer.ist.psu.edu/657012.html", } @Book{Magnus:1999, title = "Matrix differential calculus with applications in statistics and econometrics", publisher = "John Wiley", year = 1999, author = "Jan R. Magnus and Heinz Neudecker", googlebookid = {0CXXdKKiIpQC}, address = "New York", edition = "Revised edition", isbn = {047198633X}, } @Book{Magnus:2007, author = {Jan R. Magnus and Heinz Neudecker}, title = {Matrix Differential Calculus with Applications in Statistics and Econometrics}, publisher = {John Wiley and Sons}, year = 2007, edition = {Third}, url = {http://center.uvt.nl/staff/magnus/mdc/}, } @TechReport{Marques:2005, author = "Osni A. Marques and E. Jason Riedy and Christof Vömel", title = "Benefits of {IEEE}-754 Features in Modern Symmetric Tridiagonal Eigensolvers", year = 2005, note = "Also issued as UCB//CSD-05-1414; expanded from SISC version", type = "LAPACK Working Note", number = 172, URL = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf", institution = {Netlib}, } @Article{Marques:2006, author = "Osni A. Marques and E. Jason Riedy and Christof Vömel", title = "Benefits of {IEEE}-754 Features in Modern Symmetric Tridiagonal Eigensolvers", journal = "SIAM Journal on Scientific Computing", year = "2006", volume = "28", pages = "1613--1633", doi = {10.1137/050641624}, abstract = "Bisection is one of the most common methods used to compute the eigenvalues of symmetric tridiagonal matrices. Bisection relies on the Sturm count: For a given shift sigma, the number of negative pivots in the factorization $T - \sigma I = LDL^T$ equals the number of eigenvalues of T that are smaller than sigma. In IEEE-754 arithmetic, the value $\infty$ permits the computation to continue past a zero pivot, producing a correct Sturm count when $T$ is unreduced. Demmel and Li showed [IEEE Trans. Comput., 43 (1994), pp. 983{\^a}??992] that using $\infty$ rather than testing for zero pivots within the loop could significantly improve performance on certain architectures. When eigenvalues are to be computed to high relative accuracy, it is often preferable to work with $LDL^T$ factorizations instead of the original tridiagonal $T$. One important example is the MRRR algorithm. When bisection is applied to the factored matrix, the Sturm count is computed from $LDL^T$ which makes differential stationary and progressive qds algorithms the methods of choice. While it seems trivial to replace $T$ by $LDL^T$, in reality these algorithms are more complicated: In IEEE-754 arithmetic, a zero pivot produces an overflow followed by an invalid exception (NaN, or ``Not a Number'') that renders the Sturm count incorrect. We present alternative, safe formulations that are guaranteed to produce the correct result. Benchmarking these algorithms on a variety of platforms shows that the original formulation without tests is always faster provided that no exception occurs. The transforms see speed-ups of up to 2.6x over the careful formulations. Tests on industrial matrices show that encountering exceptions in practice is rare. This leads to the following design: First, compute the Sturm count by the fast but unsafe algorithm. Then, if an exception occurs, recompute the count by a safe, slower alternative. The new Sturm count algorithms improve the speed of bisection by up to 2x on our test matrices. Furthermore, unlike the traditional tiny-pivot substitution, proper use of IEEE-754 features provides a careful formulation that imposes no input range restrictions.", ISSN = "1064-8275", } @Article{Marshall:1968, author = "Albert W. Marshall and Ingram Olkin", title = "Scaling of matrices to achieve specified row and column sums", journal = "Numerische Mathematik", year = 1968, volume = 12, pages = "83--90", abstract = "If $A$ is an $n\times n$ matrix with strictly positive elements, then according to a theorem of Sinkhorn, there exist diagonal matrices $D_1$ and $D_2$ with strictly positive diagonal elements such that $D_1 A D_2$ is doubly stochastic. This note offers an alternative proof of a generalization due to Brualdi, Parter and Scheider, and independently to Sinkhorn and Knopp, who show that $A$ need not be strictly positive, but only fully indecomposable. In addition, we show that the same scaling is possible (with $D_1 = D_2$) when $A$ is strictly copositive, and also discuss related scaling for rectangular matrices. The proofs given show that $D_1$ and $D_2$ can be obtained as the solution of an appropriate extremal problem. The scaled matrix $D_1 A D_2$ is of interest in connection with the problem of estimating the transition matrix of a Markov chain which is known to be doubly stochastic. The scaling may also be of interest as an aid in numerical computations.", doi = "10.1007/BF02170999", } @Article{McCullough:1999, author = "B. D. McCullough and H. D. Vinod", title = "The Numerical Reliability of Econometric Software", journal = "Journal of Economic Literature", year = 1999, abstract = {A recent software review in this journal (McCullough, 1999) raises serious questions about EViews' performance on nonlinear least squares problems. We demonstrate that after correcting errors in the paper and adjusting convergence tolerances, EViews performance was comparable with the other benchmarked software.}, volume = 37, pages = "633--665", month = jun, ISSN = 00220515, URL = "http://links.jstor.org/sici?sici=0883-7252\%28200001\%2F02\%2915\%3A1\%3C107\%3AESRANE\%3E2.0.CO\%3B2-J", } @Article{McCullough:2003, author = "B. D. McCullough and H. D. Vinod", title = "Econometrics and Sofware", journal = "The Journal of Economic Perspectives", year = "2003", volume = "17", pages = "223--224", ISSN = "08953309", URL = "http://links.jstor.org/sici?sici=0895-3309\%28200324\%2917\%3A1\%3C223\%3AEAS\%3E2.0.CO\%3B2-8", } @Article{Menon:1967, author = "M. V. Menon", title = "Reduction of a Matrix with Positive Elements to a Doubly Stochastic Matrix", journal = "Proceedings of the American Mathematical Society", year = 1967, volume = 18, pages = "244--247", month = apr, ISSN = 00029939, URL = "http://links.jstor.org/sici?sici=0002-9939\%28196704\%2918\%3A2\%3C244\%3AROAMWP\%3E2.0.CO\%3B2-X", } @Article{Miller:1975, author = "Webb Miller", title = "Computational Complexity and Numerical Stability", journal = "SIAM Journal on Computing", year = "1975", volume = "4", pages = "97--107", month = jun, doi = {10.1137/0204009}, } @Article{Miller:1981, author = "Webb Miller", title = "A remark on gradual underflow", journal = "Computing", year = 1981, volume = 27, pages = "217--225", abstract = "Sometimes computational errors are easy to understand once all the uses, as an operand, of each computed value are known. Directed graphs provide a notational device for displaying these dependencies. Simple graph arguments show that gradual underflow improves the performance of certain computational procedures but not others. (Some procedures require deeper analysis.) Here we use graph arguments to show that if computer arithmetic is augmented with a `denormal zero' then errors from gradual underflow are always comparable to the uncertainty due to rounding error, though the comparison involves a factor that can grow exponentially with the number of arithmetic operations. Thus for procedures where existence of a denormal zero is unnecessary and the exponential growth is impossible, gradual underflow diminishes the noise from underflow to a level that can safely be ignored.", doi = "10.1007/BF02237979", } @Article{Mulmuley:1987, author = "Ketan Mulmuley and Umesh V. Vazirani and Vijay V. Vazirani", title = "Matching is as easy as matrix inversion", journal = "Combinatorica", year = 1987, volume = 7, pages = "105--113", month = mar, abstract = "Abstract We present a new algorithm for finding a maximum matching in a general graph. The special feature of our algorithm is that its only computationally non-trivial step is the inversion of a single integer matrix. Since this step can be parallelized, we get a simple parallel ($RNC^2$) algorithm. At the heart of our algorithm lies a probabilistic lemma, the isolating lemma. We show other applications of this lemma to parallel computation and randomized reductions.", doi = "10.1007/BF02579206", } @Article{Neal:1991, author = {George Poole and Larry Neal}, title = {A geometric analysis of Gaussian elimination. I}, journal = {Linear Algebra and its Applications}, year = {1991}, volume = {149}, pages = {249--272}, month = apr, doi = {10.1016/0024-3795(91)90337-V}, abstract = {The algorithm known as Gaussian elimination (GE) is fully understood in an exact-arithmetic environment. But in the finite-precision environment of computers, a full understanding of GE has been somewhat elusive. Heretofore, the analysis of this popular and important algorithm has been primarily from a numerical perspective. This paper seeks to analyze GE from a geometric perspective, and by so doing, (1) confirm the classical numerical analysis and (2) demonstrate a new level of understanding through the Euclidean geometry of GE.}, } @Article{Neal:1992, author = {George Poole and Larry Neal}, title = {Gaussian elimination: when is scaling beneficial? }, journal = {Linear Algebra and its Applications}, year = {1992}, volume = {162--164}, pages = {309--324}, month = feb, doi = {10.1016/0024-3795(92)90382-K}, abstract = {For the linear system $Ax = b$, the ordered pair $(D, F)$ of nonsingular diagonal matrices determine a scaling of the system through the two equations $D(AF) Y = Db$, $y = F^{−1} x$. When scaling is implemented along with partial pivoting (PP) to solve $Ax = b$ by Gaussian elimination (GE), it is well known that certain ordered pairs $(D, F)$ produce better computed solutions than those obtained in the absence of scaling, while others produce worse solutions. The two most common explanations of this fact are (1) $(D, F)$ modifies (magnifies or reduces) the classical condition number of $A$, and (2) $(D, F)$ modifies the magnitudes of the elements of $A$. In case (2), if a scaling yields entries of approximately the same magnitude, it is called an equilibration. Here, the underlying hyperplane geometry of both the sweepout phase and the back-substitution phase of GE is used to achieve a new level of understanding. We present what we believe to be a better explanation of how scaling or equilibration influences PP in the selection of pivot equations, a process critical to both phases of GE.}, } @Article{Neal:1991b, author = {George Poole and Larry Neal}, title = {A geometric analysis of Gaussian elimination. II}, journal = {Linear Algebra and its Applications}, year = {1992}, volume = {173}, pages = {239--264}, month = aug, doi = {10.1016/0024-3795(92)90432-A}, abstract = {In Part I of this work, we began a discussion of the numeric consequences of hyperplane orientation in Gaussian elimination. We continue this discussion by introducing the concept of \emph{back-substitution-phase error multipliers}. These error multipliers help to explain many of the previously unproven or poorly understood observations concerning Gaussian elimination in a finite-precision environment. A new pivoting strategy designed to control both sweepout phase roundoff error and back-substitution-phase instability is also presented. This new strategy, called \emph{rook's pivoting}, is only slightly more expensive than partial pivoting yet produces results comparable to those produced by complete pivoting.}, } @Article{Nievergelt:2003, author = "Yves Nievergelt", title = "Scalar fused multiply-add instructions produce floating-point matrix arithmetic provably accurate to the penultimate digit", doi = {10.1145/641876.641878}, journal = "ACM Transactions on Mathematical Software", year = "2003", volume = "29", pages = "27--48", abstract = "Combined with doubly compensated summation, scalar fused multiply-add instructions redefine the concept of floating-point arithmetic, because they allow for the computation of sums of real or complex matrix products accurate to the penultimate digit. Particular cases include complex arithmetic, dot products, cross products, residuals of linear systems, determinants of small matrices, discriminants of quadratic, cubic, or quartic equations, and polynomials.", } @Article{Nievergelt:2004, author = "Yves Nievergelt", title = "Analysis and applications of Priest's distillation", journal = "ACM Transactions on Mathematical Software", year = 2004, volume = 30, pages = "402--433", doi = {10.1145/1039813.1039815}, abstract = "Correcting an infinite loop in Douglas M. Priest's renormalization algorithm, the theory proved here supports streamlined algorithms to resolve the tablemaker's dilemma for the floating-point computation of real and complex sums and dot-products, properly rounded to the ultimate digit. Applications include computations of areas, volumes, and intersections.", } @Article{Oettli:1964, author = "W. Oettli and W. Prager", title = "Compatibility of approximate solution of linear equations with given error bounds for coefficients and right-hand sides", journal = "Numerische Mathematik", year = 1964, volume = 6, pages = "405--409", month = dec, abstract = "In the spirit of Wilkinson's [1, 2] backward error analysis, conditions are established under which a given approximate solution of a system of $n$ linear equations with $n$ unknowns is the exact solution of a modified system whose coefficients and right-hand sides are within a given neighborhood of those of the original system.", doi = "10.1007/BF01386090", } @Article{Ogita:2005, author = "Takeshi Ogita and Siegfried M. Rump and Shin'ichi Oishi", title = "Accurate Sum and Dot Product", journal = "SIAM Journal on Scientific Computing", year = "2005", volume = "26", pages = "1955--1988", doi = {10.1137/030601818}, abstract = {Algorithms for summation and dot product of floating-point numbers are presented which are fast in terms of measured computing time. We show that the computed results are as accurate as if computed in twice or $K$-fold working precision, $K\ge 3$. For twice the working precision our algorithms for summation and dot product are some 40\% faster than the corresponding XBLAS routines while sharing similar error estimates. Our algorithms are widely applicable because they require only addition, subtraction, and multiplication of floating-point numbers in the same working precision as the given data. Higher precision is unnecessary, algorithms are straight loops without branch, and no access to mantissa or exponent is necessary.}, } @Article{Olschowka:1996, author = "Markus Olschowka and Arnold Neumaier", title = "A new pivoting strategy for Gaussian elimination", journal = "Linear Algebra and its Applications", year = 1996, volume = 240, pages = "131--151", month = jun, doi = {10.1016/0024-3795(94)00192-8}, abstract = "This paper discusses a method for determining a good pivoting sequence for Gaussian elimination, based on an algorithm for solving assignment problems. The worst case complexity is $O(n^3)$; in practice $O(n^{2.25})$ operations are sufficient.", } @Book{Papadimitriou:1982, title = "Combinatorial optimization: algorithms and complexity", publisher = "Prentice-Hall, Inc", year = "1982", author = "Christos H. Papadimitriou and Kenneth Steiglitz", pages = "496", ISBN = "0-13-152462-3", } @Article{Parlett:1982, author = "Beresford N. Parlett and T. L. Landis", title = "Methods for scaling to doubly stochastic form", journal = "Linear Algebra and its Applications", year = 1982, volume = 48, pages = "53--79", month = dec, abstract = "New methods for scaling square, nonnegative matrices to doubly stochastic form are described. A generalized version of the convergence theorem of Sinkhorn and Knopp (1967) is proved and applied to show convergence for these new methods. Tests indicate that one of the new methods has significantly better average and worst-case behavior than the Sinkhorn-Knopp method; for one of the $3 \times 3$ examples of Marshall and Olkin (1968), SK requires 130 times as many operations as the new algorithm to achieve row and column sums 1+/-10-5.", doi = {10.1016/0024-3795(82)90099-4}, } @Article{Pena:2001, author = "J. M. Peña", title = "A Note on a Paper by {P}. Amodio and {F}. Mazzia", journal = "BIT Numerical Mathematics", year = "2001", volume = "41", pages = "640--643", number = "3", month = jun, abstract = "In 1999 Amodio and Mazzia presented a new backward error analysis for $LU$ factorization and introduced a new growth factor $\rho_n$. Their very interesting approach allowed them to obtain sharp error bounds. In particular, they derive nice results assuming that partial pivoting is used. However, the forward error bound for the solution of a linear system whose coefficient matrix $A$ is an $M$-matrix given in Theorem 4.1 of that paper is not correct. They first obtain a bound for the condition number $\kappa(U)$ assuming that one has the $LU$ factorization of an $M$-matrix and then they apply the bounds obtained when partial pivoting is used. But if $P$ is the permutation associated with partial pivoting then $PA = LU$ can fail to be an $M$-matrix and the bound for $\kappa(U)$ can be false, as shown in our Example 1.1. We also prove that, for a pivoting strategy presented in the paper, the growth factor of an $M$-matrix $A$ is $\rho_n(A) = 1$ and $\kappa(U) \leq \kappa(A)$, where $U$ is the upper triangular matrix obtained after applying such a pivoting strategy.", doi = "10.1023/A:1021983616138", } @Article{Petit:2003, author = "Jordi Petit", title = "Experiments on the minimum linear arrangement problem", journal = "Journal of Experimental Algorithmics", doi = {10.1145/996546.996554}, year = 2003, volume = 8, pages = 33, number = 6, abstract = "This paper deals with the Minimum Linear Arrangement problem from an experimental point of view. Using a testsuite of sparse graphs, we experimentally compare several algorithms to obtain upper and lower bounds for this problem. The algorithms considered include Successive Augmentation heuristics, Local Search heuristics and Spectral Sequencing. The testsuite is based on two random models and ``real life'' graphs. As a consequence of this study, two main conclusions can be drawn: On one hand, the best approximations are usually obtained using Simulated Annealing, which involves a large amount of computation time. Solutions found with Spectral Sequencing are close to the ones found with Simulated Annealing and can be obtained in significantly less time. On the other hand, we notice that there exists a big gap between the best obtained upper bounds and the best obtained lower bounds. These two facts together show that, in practice, finding lower and upper bounds for the Minimum Linear Arrangement problem is hard.", issn = {1084-6654}, URL = "http://www.jea.acm.org/2003/PetitMLAP/", } @Article{Pferschy:1997, author = "U. Pferschy", title = "Solution methods and computational investigations for the Linear Bottleneck Assignment Problem", journal = "Computing", year = "1997", volume = "59", pages = "237--258", abstract = "Abstract The Linear Bottleneck Assignment Problem (LBAP) is analyzed from a computational point of view. Beside a brief review of known algorithms new methods are developed using only sparse subgraphs for their computation. The practical behaviour of both types of algorithms is investigated. The most promising algorithm consists of computing a maximum cardinality matching with all edge costs smaller than a previously determined bound and augmenting this matching to an assignment. The methods on sparse subgraphs are useful in the case of memory restrictions and are superior if the subgraph selection can be improved by some previously generated structure. Other treated questions are how to select a suitable subgraph for the new methods, how to deal with non regular data and what connections to asymptotic results for the LBAP can be detected.", doi = "10.1007/BF02684443", } @Article{Pothen:1990, author = "Alex Pothen and Chin-Ju Fan", title = "Computing the block triangular form of a sparse matrix", journal = "ACM Transactions on Mathematical Software", year = 1990, volume = 16, pages = "303--324", abstract = "We consider the problem of permuting the rows and columns of a rectangular or square, unsymmetric sparse matrix to compute its block triangular form. This block triangular form is based on a canonical decomposition of bipartite graphs induced by a maximum matching and was discovered by Dulmage and Mendelsohn. We describe implementations of algorithms to compute the block triangular form and provide computational results on sparse matrices from test collections. Several applications of the block triangular form are also included.", doi = {10.1145/98267.98287}, } @PhdThesis{Priest:1992, author = "Douglas M. Priest", title = "On properties of floating point arithmetics: numerical stability and the cost of accurate computations", school = "University of California at Berkeley", year = "1992", } @Unpublished{Riedy:MTA, author = "E. Jason Riedy and Rich Vuduc", title = "Microbenchmarking the Tera {MTA}", note = "Cited, \hrefx{http://purl.oclc.org/NET/jason-riedy/resume/material/Tera-presentation.pdf}{presentation version available}", } @Unpublished{Riedy:2000, author = "E. Jason Riedy and Robert Szewczyk", title = "Power and Control in Networked Sensors", note = "Cited", year = 2000, abstract = "The fundamental constraint on a networked sensor is its energy consumption, since it may be either impossible or not feasible to replace its energy source. We analyze the power dissipation implications of implementing the network sensor with either a central processor switching between I/O devices or a family of processors, each dedicated to a single device. We present the energy measurements of the current generations of networked sensors, and develop an abstract description of tradeoffs between both designs.", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/power-and-control.pdf", } @Unpublished{Riedy:2001, author = "E. Jason Riedy", title = "Type System Support for Floating-Point Computation", note = "(draft)", year = 2001, abstract = "Floating-point arithmetic is often seen as untrustworthy. We show how manipulating precisions according to the following rules of thumb enhances the reliability of and removes surprises from calculations: Store data narrowly, compute intermediates widely, and derive properties widely. Further, we describe a typing system for floating point that both supports and is supported by these rules. A single type is established for all in- termediate computations. The type describes a precision at least as wide as all inputs to and results from the computation. Picking a single type provides benefits to users, compilers, and interpreters. The type system also extends cleanly to encompass intervals and higher precisions.", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/type-support-for-fp.pdf", } @Misc{Riedy:2002, author = "E. Jason Riedy", title = "Parallel Bipartite Matching for Sparse Matrix Computation", year = 2002, note = "Third Bay Area Scientific Computing Day", address = "Livermore, CA", } @Misc{Riedy:2003, author = "E. Jason Riedy", title = "Parallel Bipartite Matching for Sparse Matrix Computations", year = "2003", note = "SIAM Conference on Computational Science and Engineering", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/siam-cse03-poster.pdf", } @Misc{Riedy:2003a, author = "E. Jason Riedy", title = "Practical Alternatives for Parallel Pivoting", year = 2003, note = "SIAM Annual Meeting", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/siam-am03.pdf", } @Misc{Riedy:2004, author = "E. Jason Riedy", title = "Parallel Weighted Bipartite Matching and Applications", month = feb, year = 2004, note = "SIAM Parallel Processing for Scientific Computing", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/pp04.pdf", } @Misc{Riedy:2004a, author = "E. Jason Riedy", title = "Sparse Data Structures for Weighted Bipartite Matching", month = feb, year = "2004", note = "SIAM Workshop on Combinatorial Scientific Computing", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/csc04.pdf", } @Misc{Riedy:2005, author = "E. Jason Riedy", title = "Modern Language Tools and 754{R}", year = 2005, note = "ARITH'05", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/arith17-slides.pdf", } @Misc{Riedy:2005a, author = "E. Jason Riedy", title = "Parallel Combinatorial Computing and Sparse Matrices", month = feb, year = 2005, note = "SIAM Conference on Computational Science and Engineering", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/cse05.pdf", } @Misc{Riedy:2005b, author = "E. Jason Riedy and Yozo Hida and James W. Demmel", title = "The Future of {LAPACK} and Sca{LAPACK}", year = 2005, note = "Robert C. Thompson Matrix Meeting", abstract = "We are planning new releases of the widely used LAPACK and ScaLAPACK numerical linear algebra libraries. Based on an on-going user survey and research by many people, we are proposing the following improvements: Faster algorithms (including better numerical methods, memory hierarchy optimizations, parallelism, and automatic performance tuning to accomodate new architectures), more accurate algorithms (including better numerical methods, and use of extra precision), expanded functionality (including updating and downdating, new eigenproblems, etc. and putting more of LAPACK into ScaLAPACK), and improved ease of use (friendlier interfaces in multiple languages). To accomplish these goals we are also relying on better software engineering techniques and contributions from collaborators at many institutions. This is joint work with Jack Dongarra.", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/future-of-scalapack.pdf", } @Misc{Riedy:2006, author = "E. Jason Riedy", title = "Making Static Pivoting Dependable", year = 2006, note = "Seventh Bay Area Scientific Computing Day", abstract = "For sparse LU factorization, dynamic pivoting tightly couples symbolic and numerical computation. Dynamic structural changes limit parallel scalability. Demmel and Li use static pivoting in distributed SuperLU for performance, but intentionally perturbing the input may lead silently to erroneous results. Are there experimentally stable static pivoting heuristics that lead to a dependable direct solver? The answer is currently a qualified yes. Current heuristics fail on a few systems, but all failures are detectable.", address = "Livermore, CA", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/bascd2006-poster.pdf", } @Article{Rigal:1967, author = "J. L. Rigal and J. Gaches", title = "On the Compatibility of a Given Solution With the Data of a Linear System", doi = {10.1145/321406.321416}, journal = "Journal of the ACM", year = 1967, volume = 14, pages = "543--548", abstract = "Some new theorems generalizing a result of Oettli and Prager are applied to the a posteriori analysis of the compatibility of a computed solution to the uncertain data of a linear system (or of a polynomial equation).", } @TechReport{Ruiz:2001, author = "Daniel Ruiz", title = "A scaling algorithm to equilibrate both row and column norms in matrices", month = sep, year = 2001, abstract = "We present an iterative procedure which asymptotically scales the infinity norm of both rows and columns in a matrix to 1. This scaling strategy exhibits some optimality properties and additionally preserves symmetry. The algorithm also shows fast linear convergence with an asymptotic rate of 1/2. We discuss possible extensions of such an algorithm when considering the one-norm or the two norm of the rows and columns of the given matrix, and give the proof of its convergence when the matrix pattern satis?es some common properties.", institution = "Rutherford Appleton Laboratory", URL = "ftp://ftp.numerical.rl.ac.uk/pub/reports/drRAL2001034.pdf", } @Book{Saad:2003, title = "Iterative methods for sparse linear systems", publisher = "SIAM", year = 2003, author = "Yousef Saad", address = "Philadelphia", edition = "2nd ed.", ISBN = "978-0-89871-534-7", googlebookid = {ZdLeBlqYeF8C}, } @Misc{Sankowski:2006, author = "Piotr Sankowski", title = "Weighted Bipartite Matching in Matrix Multiplication Time", year = "2006", abstract = "In this paper we consider the problem of finding maximum weighted matchings in bipartite graphs with nonnegative integer weights. The presented algorithm for this problem work in $\tilde{O}(W n^\omega)$ time, where $\omega$ is the matrix multiplication exponent, and $W$ is the highest edge weight in the graph. As a consequence of this result we obtain time algorithms for computing: minimum weight bipartite vertex cover, single source shortest paths and minimum weight vertex disjoint $s-t$ paths.", journal = "Automata, Languages and Programming", doi = "10.1007/11786986_25", } @Article{Sinkhorn:1967, author = "Richard Sinkhorn", title = "Diagonal Equivalence to Matrices with Prescribed Row and Column Sums", journal = "The American Mathematical Monthly", year = 1967, volume = 74, pages = "402--405", month = apr, ISSN = 00029890, URL = "http://links.jstor.org/sici?sici=0002-9890\%28196704\%2974\%3A4\%3C402\%3ADETMWP\%3E2.0.CO\%3B2-9", } @Article{Skeel:1979, author = "Robert D. Skeel", title = "Scaling for Numerical Stability in Gaussian Elimination", journal = "Journal of the ACM", year = "1979", volume = "26", pages = "494--526", doi = {10.1145/322139.322148}, } @Article{Skeel:1980, author = "Robert D. Skeel", title = "Iterative Refinement Implies Numerical Stability for Gaussian Elimination", journal = "Mathematics of Computation", year = 1980, volume = 35, pages = "817--832", month = jul, abstract = "Because of scaling problems, Gaussian elimination with pivoting is not always as accurate as one might reasonably expect. It is shown that even a single iteration of iterative refinement in single precision is enough to make Gaussian elimination stable in a very strong sense. Also, it is shown that without iterative refinement row pivoting is inferior to column pivoting in situations where the norm of the residual is important.", ISSN = 00255718, URL = "http://www.jstor.org/stable/2006197", } @Article{Sluis:1970, author = "A. van der Sluis", title = "Condition, equilibration and pivoting in linear algebraic systems", journal = "Numerische Mathematik", year = "1970", volume = "15", pages = "74--86", month = may, doi = "10.1007/BF02165662", } @Article{Storoey:1997, author = "Sverre Storøy and Tor Sørevik", title = "Massively parallel augmenting path algorithms for the assignment problem", journal = "Computing", year = 1997, volume = 59, pages = "1--16", number = 1, month = mar, url = "\url{http://www.ii.uib.no/~tors/papers/assign.ps}", doi = "10.1007/BF02684400", publisher = "Springer Wien", } @Article{Strassen:1969, author = "Volker Strassen", title = "Gaussian elimination is not optimal", journal = "Numerische Mathematik", year = "1969", volume = "13", pages = "354--356", doi = "10.1007/BF02165411", } @Article{Tanimoto:1978, author = "Steven L. Tanimoto and Alon Itai and Michael Rodeh", title = "Some Matching Problems for Bipartite Graphs", journal = "Journal of the ACM", year = 1978, volume = 25, pages = "517--525", doi = {10.1145/322092.322093}, } @Article{Tosovic:1973, author = "Ljubomir B. Tosovic", title = "Some Experiments on Sparse Sets of Linear Equations", journal = "SIAM Journal on Applied Mathematics", year = "1973", volume = "25", pages = "142--148", number = "2", month = sep, abstract = "This paper describes five pivot selection algorithms in inversion of a large sparse matrix when using the method of Gaussian elimination. Some numerical experiments are presented. Since the pivots are chosen partly on the basis of element magnitudes, it is assumed that all the matrix elements are of comparable size and two scaling algorithms are discussed based on the assumption that the given matrix can be scaled to this form.", doi = "10.1137/0125018", ISSN = "00361399", URL = "http://links.jstor.org/sici?sici=0036-1399\%28197309\%2925\%3A2\%3C142\%3ASEOSSO\%3E2.0.CO\%3B2-X", } @article{Trefethen:1985, author = {Lloyd N. Trefethen}, title = {Three mysteries of Gaussian elimination}, journal = {SIGNUM Newsl.}, volume = {20}, number = {4}, year = {1985}, issn = {0163-5778}, pages = {2--5}, doi = {10.1145/1057954.1057955}, publisher = {ACM}, address = {New York, NY, USA}, abstract = {If numerical analysts understand anything, surely it must be Gaussian elimination. This is the oldest and truest of numerical algorithms. To be precise, I am speaking of Gaussian elimination with partial pivoting, the universal method for solving a dense, unstructured $n \times n$ linear system of equations $Ax = b$ on a serial computer. This algorithm has been so successful that to many of us, Gaussian elimination and $Ax = b$ are more or less synonymous. The chapter headings in the book by Golub and Van Loan are typical -- along with "Orthogonalization and Least Squares Methods," "The Symetric Eigenvalue Problem," and the rest, one finds "Gaussian Elimination," not "Linear Systems of Equations."}, } @article{Trefethen:1990, author = {Lloyd N. Trefethen and Robert S. Schreiber}, title = {Average-Case Stability of Gaussian Elimination}, publisher = {SIAM}, year = 1990, journal = {SIAM Journal on Matrix Analysis and Applications}, volume = 11, number = 3, pages = {335--360}, doi = {10.1137/0611023}, abstract = {Gaussian elimination with partial pivoting is unstable in the worst case: the "growth factor" can be as large as $2^{n - 1} $, where $n$ is the matrix dimension, resulting in a loss of $n - 1$ bits of precision. It is proposed that an average-case analysis can help explain why it is nevertheless stable in practice. The results presented begin with the observation that for many distributions of matrices, the matrix elements after the first few steps of elimination are approximately normally distributed. From here, with the aid of estimates from extreme value statistics, reasonably accurate predictions of the average magnitudes of elements, pivots, multipliers, and growth factors are derived. For various distributions of matrices with dimensions $n\leqq 1024$, the average growth factor (normalized by the standard deviation of the initial matrix elements) is within a few percent of $n^{2/3} $ for partial pivoting and approximately $n^{1/2} $ for complete pivoting. The average maximum element of the residual with both kinds of pivoting appears to be of magnitude $O(n)$, as compared with $O ( n^{1/2} )$ for QR factorization.The experiments and analysis presented show that small multipliers alone are not enough to explain the average-case stability of Gaussian elimination; it is also important that the correction introduced in the remaining matrix at each elimination step is of rank 1. Because of this low-rank property, the signs of the elements and multipliers in Gaussian elimination are not independent, but are interrelated in such a way as to retard growth. By contrast, alternative pivoting strategies involving high-rank corrections are sometimes unstable even though the multipliers are small.}, } @Article{Tutte:1947, author = "William Thomas Tutte", title = "The Factorization of Linear Graphs", journal = "Journal of the London Mathematical Society", year = 1947, volume = "s1-22", pages = "107--111", URL = "http://jlms.oxfordjournals.org/cgi/reprint/s1-22/2/107", } @InProceedings{Wein:1990, crossref = "fmpc90", author = "Joel M. Wein and Stavros A. Zenios", title = "Massively parallel auction algorithms for the assignment problem", pages = "90--99", abstract = "Alternative approaches to the massively parallel implementation of D. P. Bertsekas' auction algorithm (see Ann. Oper. Res., vol.14, p.105-23, 1988) on the Connection Machine CM2 are discussed. The most efficient implementation is a hybrid Jacobi/Gauss-Seidel implementation. It exploits two different levels of parallelism and an efficient way of communicating the data between them without the need to perform general router operations across the hypercube network. The implementations are evaluated empirically, solving large, dense problems", doi = {10.1109/FMPC.1990.89444}, } @article{Wein:1991, author = {Joel M. Wein and Stavros A. Zenios}, title = {On the massively parallel solution of the assignment problem}, journal = {Journal of Parallel and Distributed Computing}, volume = 13, number = 2, year = 1991, issn = {0743-7315}, pages = {228--236}, doi = {10.1016/0743-7315(91)90092-N}, publisher = {Academic Press, Inc.}, address = {Orlando, FL, USA}, abstract = {In this paper we discuss the design, implementation, and effectiveness of massively parallel algorithms for the solution of large-scale dense assignment problems. In particular, we study the auction algorithm of Bertsekas, an algorithm based on the method of multipliers of Hestenes and Powell, and an algorithm based on the alternating direction method of multipliers of Eckstein. We discuss alternative approaches to the massively parallel implementation of the auction algorithm, including Jacobi, Gauss-Seidel, and a hybrid scheme. The hybrid scheme, in particular, exploits two different levels of parallelism and an efficient way of communicating the data between them without the need to perform general router operations across the hypercube network. We then study the performance of massively parallel implementations of the two methods of multipliers. Implementations are carried out on the Connection Machine CM-2, and the algorithms are evaluated empirically with the solution of large-scale problems. The hybrid scheme significantly outperforms all of the other methods and gives the best computational results to date for a massively parallel solution to this problem.}, } @InProceedings{Wilson:1997, crossref = "spie3166", author = "Joseph N. Wilson and E. Jason Riedy", title = "Efficient {SIMD} evaluation of image processing programs", pages = "199--210", abstract = "SIMD parallel systems have been employed for image processing and computer vision applications since their inception. This paper describes a system in which parallel programs are implemented using a machine-independent, retargetable object library that provides SIMD execution on the Lockheed Martin PAL-I SIMD parallel processor. Programs' performance on this machine is improved through on-the-fly execution analysis and scheduling. We describe the relevant elements of the system structure, the general scheme for execution analysis, and the current cost model for scheduling.", doi = "10.1117/12.279618", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/ia-cost.pdf", } @InBook{Wilson:1999, crossref = "vircip", pages = "523--542", chapter = 17, title = "An Image Algebra Based {SIMD} Image Processing Environment", author = "Joseph N. Wilson and E. Jason Riedy and Gerhard X. Ritter and Hongchi Shi", abstract = "SIMD parallel computers have been employed for image related applications since their inception. They have been leading the way in improving processing speed for those applications. However, current parallel programming technologies have not kept pace with the performance growth and cost decline of parallel hardware. A highly usable parallel software development environment is needed. This chapter presents a computing environment that integrates a SIMD mesh architecture with image algebra for high-performance image processing applications. The environment describes parallel programs through a machine-independent, retargetable image algebra object library that supports SIMD execution on the Lockheed Martin PAL-I parallel computer. Program performance on this machine is improved through on-the-fly execution analysis and scheduling. We describe the relevant elements of the system structure, outline the scheme for execution analysis, and provide examples of the current cost model and scheduling system.", URL = "http://purl.oclc.org/NET/jason-riedy/resume/material/ia-simd-chap.pdf", } @Book{vircip, ISBN = "0-8247-1928-X", title = "Visual Information Representation, Communication, and Image Processing", booktitle = "Visual Information Representation, Communication, and Image Processing", googlebookid = "MeVdKQ4JYt4C", publisher = "Marcel Dekker", year = 1999, editor = "C. W. Chen and Y. Q. Zhang", address = "New York", } @Book{Wright:1987, title = "Primal-Dual Interior-Point Methods", publisher = "Society for Industrial Mathematics", year = "1987", author = "Stephen J. Wright", pages = "309", ISBN = "0-89871-382-X", googlebookid = {jwV5zlj0laoC}, } @article{Wright:1993, author = {Stephen J. Wright}, title = {A Collection of Problems for Which Gaussian Elimination with Partial Pivoting is Unstable}, publisher = {SIAM}, year = 1993, journal = {SIAM Journal on Scientific Computing}, volume = 14, number = 1, pages = {231--238}, keywords = {two-point boundary value problems; Gaussian elimination; partial pivoting}, doi = {10.1137/0914013}, abstract = {A significant collection of two-point boundary value problems is shown to give rise to linear systems of algebraic equations on which Gaussian elimination with row partial pivoting is unstable when standard solution techniques are used.}, } @Article{Zaki:1995, author = "Hossam A. Zaki", title = "A comparison of two algorithms for the assignment problem", journal = "Computational Optimization and Applications", year = 1995, volume = 4, pages = "23--45", abstract = "State-of-the-art computational results have shown that the shortest augmenting path (SAP) methods are more efficient than other primal-dual and primal-simplex based methods for solving the linear assignment problem on uniprocessor computers. There is, however, some controversy concerning their merits when compared with Bertsekas' auction algorithm on multiprocessor computers. In this study we investigate the performance of these competing methods on the Alliant FX/8. For each method, theoretical motivation, sources of parallelism and computational results are presented.", doi = "10.1007/BF01299157", } @Proceedings{arith15, title = "Proceedings of the 15th {IEEE} Symposium on Computer Arithmetic", booktitle = "Proceedings of the 15th {IEEE} Symposium on Computer Arithmetic", year = 2001, month = jun, ISBN = "0-7695-1150-3", googlebookid = {3Wm8GAAACAAJ}, } @Proceedings{arith17, title = "Proceedings of the 17th {IEEE} Symposium on Computer Arithmetic", booktitle = "Proceedings of the 17th {IEEE} Symposium on Computer Arithmetic", year = "2005", month = jun, ISBN = "0-7695-2366-8", googlebookid = {PspVAAAAMAAJ}, } @Proceedings{cad1998, title = "Proceedings of the 1998 {IEEE}/{ACM} international conference on Computer-aided design", booktitle = "Proceedings of the 1998 {IEEE}/{ACM} international conference on Computer-aided design", year = 1998, address = "San Jose, California, United States", publisher = "ACM Press", ISBN = "1-58113-008-2", googlebookid = {fwpWAAAAMAAJ}, } @Proceedings{fmpc90, title = "Proceedings of the Third Symposium on the Frontiers of Massively Parallel Computation", booktitle = "Proceedings of the Third Symposium on the Frontiers of Massively Parallel Computation", year = "1990", publisher = {IEEE Computer Society}, googlebookid = {2bPCAAAACAAJ}, } @Proceedings{focs37, title = "Proceedings of the 37th Annual Symposium on Foundations of Computer Science", booktitle = "Proceedings of the 37th Annual Symposium on Foundations of Computer Science", year = 1996, publisher = "IEEE Computer Society", googlebookid = {Qj5vKAAACAAJ}, } @Proceedings{ipps6, title = "Proceedings of the Sixth International Parallel Processing Symposium", booktitle = "Proceedings of the Sixth International Parallel Processing Symposium", year = 1992, } @Proceedings{mathcomp43, title = "Mathematics of Computation 1943--1993: {A} Half Century of Computational Mathematics", booktitle = "Mathematics of Computation 1943--1993: {A} Half Century of Computational Mathematics", year = 1994, editor = "Walter Gautschi", volume = 48, publisher = "American Mathematical Society, Providence, RI, USA", googlebookid = {vSdUw1HRuwMC}, isbn = 0821802917, } % url = {http://books.google.com/books?id=vSdUw1HRuwMC}, @Proceedings{na89, title = "Proceedings of the 13th Dundee Conference on Numerical Analysis, 1989", booktitle = "Proceedings of the 13th Dundee Conference on Numerical Analysis, 1989", year = 1990, editor = "D. F. Griffiths and G. A. Watson", volume = 228, address = "Essex, UK", publisher = "Longman Scientific and Technical", isbn = 0582059232, } @Proceedings{para06, title = "Proceedings of the workshop on state-of-the-art in scientific and parallel computing (Para06)", booktitle = "Proceedings of the workshop on state-of-the-art in scientific and parallel computing (Para06)", year = "2006", series = "Springer's Lecture Notes in Computer Science", address = "Umeå, Sweden", month = jun, publisher = "Springer", } @Proceedings{sc14, title = "Proceedings of the 14th international conference on Supercomputing", booktitle = "Proceedings of the 14th international conference on Supercomputing", year = 2000, address = "Santa Fe, New Mexico, United States", publisher = "ACM Press", ISBN = "1-58113-270-0", } @Proceedings{spaa14, title = "Proceedings of the fourth annual {ACM} symposium on Parallel Algorithms and Architectures", booktitle = "Proceedings of the fourth annual {ACM} symposium on Parallel Algorithms and Architectures", year = "1992", address = "San Diego, California, United States", ISBN = "0-89791-483-X", publisher = "ACM Press", } @Proceedings{spie3166, title = "Parallel and Distributed Methods for Image Processing", booktitle = "Parallel and Distributed Methods for Image Processing", year = 1997, month = sep, editor = "Hongchi Shi and Patrick C. Coffield", series = "Proceedings of SPIE", volume = 3166, address = "San Diego, CA", } @Proceedings{stoc18, title = "Proceedings of the eighteenth annual {ACM} symposium on Theory of computing", booktitle = "Proceedings of the eighteenth annual {ACM} symposium on Theory of computing", year = "1986", address = "Berkeley, California, United States", publisher = "ACM Press", ISBN = "0-89791-193-8", } @Book{Forsythe:1967, author = "George E. Forsythe and Cleve B. Moler", title = "Computer Solution of Linear Algebraic Systems", publisher = "Prent{\-}ice-Hall, Inc.", address = "Englewood Cliffs, NJ", pages = "xi + 148", year = "1967", LCCN = "QA297 .F57 1967", MRclass = "65.35", MRnumber = "MR0219223 (36 \#2306)", mrreviewer = "N. Gastinel", zmnumber = "0154.40401", mrnumber-url = "http://www.ams.org/mathscinet-getitem?mr=36\%232306", } @Book{Stewart:1990, author = "G. W. Stewart and Ji-Guang Sun", title = "Matrix perturbation theory", publisher = "Academic Press Inc.", address = "Boston, MA", year = "1990", pages = "xvi+365", ISBN = "0-12-670230-6", MRclass = "65-02 (65Fxx)", MRnumber = "92a:65017", mrrevr = "Dao Sheng Zheng", mrnumber-url = "http://www.ams.org/mathscinet-getitem?mr=92a\%3a65017", } @Misc{oai:osti.gov:465805, title = "Matrix market: a web resource for test matrix collection", author = "R. F. Boisvert", year = "2001", month = may # "~05", abstract = "We describe a repository of data for the testing of numerical algorithms and mathematical software for matrix computations. The repository is designed to accommodate both dense and sparse matrices, as well as software to generate matrices. It has been seeded with the well known Harwell-Boeing sparse matrix collection. The raw data files have been augmented with an integrated World Wide Web interface which describes the matrices in the collection quantitatively and visually, For example, each matrix has a Web page which details its attributes, graphically depicts its sparsity pattern, and provides access to the matrix itself in several formats. In addition, a search mechanism is included which allows retrieval of matrices based on a variety of attributes, such as type and size, as well as through free-text search in abstracts. The URL is http://math.nist.gov/MatrixMarket.", annote = "Boisvert, R.F.; Pozo, R.; Remington, K.[National Inst. of Standards and Technology, Gaithersburg, MD (United States)]; Barrett, R.F.[Los Alamos National Lab., NM (United States)]; Dongarra, J.J.[Tennessee Univ., Knoxville, TN (United States)]/[Oak Ridge National Lab., TN (United States)]", bibsource = "OAI-PMH server at prodweb.osti.gov", oai = "oai:osti.gov:465805", subject = "99 MATHEMATICS, COMPUTERS, INFORMATION SCIENCE, MANAGEMENT, LAW, MISCELLANEOUS ; MATRICES; COMPUTER CALCULATIONS; INTERNET; INFORMATION RETRIEVAL; DATA BASE MANAGEMENT; MATHEMATICS", URL = "http://www.osti.gov/servlets/purl/465805-mMda1h/webviewable/", } @Misc{ufcollection, author = {Timothy A. Davis}, title = {The {U}niversity of {F}lorida Sparse Matrix Collection}, url = {http://www.cise.ufl.edu/research/sparse/matrices}, howpublished = {(submitted to ACM Transactions on Mathematical Software)}, note = {See also NA Digest, vol. 92, no. 42, October 16, 1994, NA Digest, vol. 96, no. 28, July 23, 1996, and NA Digest, vol. 97, no. 23, June 7, 1997}, month = {June}, year = {2008}, } @article{wilkinson66, author = {H.J. Bowdler and R.S. Martin and G. Peters and J.H. Wilkinson}, title = {Handbook Series Linear Algebra: Solution of Real and Complex Systems of Linear equations}, journal ={Numerische Mathematik}, volume = {8}, pages = {217--234}, year = {1966} } @Book{Wilkinson:1963, author = "James Hardy Wilkinson", title = "Rounding Errors in Algebraic Processes", publisher = "Notes on Applied Science No. 32, Her Majesty's Stationery Office", address = "London", year = "1963", pages = "vi+161", note = "Also published by Prentice-Hall, Englewood Cliffs, NJ, USA. Reprinted by Dover, New York, 1994", ISBN = "0-486-67999-3", } @Book{Bellman:1970:IMA, author = "Richard Bellman", title = "Introduction to Matrix Analysis", edition = "Second", publisher = "Mc{\-}Graw-Hill", address = "New York", year = "1970", pages = "xxiii+403", googlebookid = "nGiMHQAACAAJ", librarythingurl = "http://www.librarything.com/work/753068/book/6004278", } @Article{demmel:illcond, author = "James W. Demmel", title = "The geometry of ill-conditioning", journal = "Journal of Complexity", volume = "3", year = "1987", number = "2", pages = "201--229", ISSN = "0885-064X", MRclass = "65F35 (12D10)", MRnumber = "88h:65091", mrreviewer = "Henry Wolkowicz", mrnumber-url = "http://www.ams.org/mathscinet-getitem?mr=88h\%3a65091", doi = "10.1016/0885-064X(87)90027-6", } @Book{boyd:book, author = "Stephen P. Boyd and Lieven Vandenberghe", title = "Convex Optimization", publisher = "Cambridge University Press", address = "Cambridge, UK", pages = "730", year = "2004", URL = "http://www.stanford.edu/~boyd/cvxbook.html", isbn = "0521833787", googlebookid = "mYm0bLd3fcoC", } @PhdThesis{yozothesis, author = {Yozo Hida}, title = {XXX}, school = "University of California at Berkeley", year = {2008}, } @book{eaton:2002, author = "John W. Eaton", title = "GNU Octave Manual", publisher = "Network Theory Limited", year = 2002, isbn = "0-9541617-2-6", url = "http://www.gnu.org/software/octave/doc/interpreter/", } @Manual{matlab, title = {\textsc{Matlab}\textsuperscript{\textregistered} 7 {P}rogramming {F}undamentals}, author = {The {MathWorks}\textsuperscript{\textsc{tm}}}, XXXorganization = {The {MathWorks}\textsuperscript{\textsc{tm}}}, address = {Natick, MA}, year = {2008}, url = {http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matlab_prog.pdf}, note = {Available on-line in \href{http://www.mathworks.com/access/helpdesk/help/techdoc/index.html?/access/helpdesk/help/techdoc/matlab_prog/bqjgwp9.html}{easily viewable} or \href{http://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matlab_prog.pdf}{printable} form.} } @article{rump:cwise-illcond, author = {Siegfried M. Rump}, collaboration = {}, title = {Ill-Conditioned Matrices Are Componentwise Near to Singularity}, publisher = {SIAM}, year = {1999}, journal = {SIAM Review}, volume = {41}, number = {1}, pages = {102-112}, keywords = {componentwise distances; ill conditioned; ill posed; condition number; sign-real spectral radius}, doi = {10.1137/S0036144598323216} } @Article{npsingular, author = {Svatopluk Poljak and Ji\v{r}\'\i{} Rohn}, title = {Checking robust nonsingularity is NP-hard}, journal = {Mathematics of Control, Signals, and Systems (MCSS)}, year = {1993}, volume = {6}, number = {1}, pages = {1-9}, month = mar, doi = {10.1007/BF01213466}, } @Article{demmel:distillposed, author = {James W. Demmel}, title = {On condition numbers and the distance to the nearest ill-posed problem}, journal = {Numerische Mathematik}, year = 1987, volume = {51}, number = {3}, pages = {251--289}, month = may, doi = {10.1007/BF01400115}, } @Article{demmel:errbndcmplx, journal = {Foundations of Computational Mathematics}, title = {On the Complexity of Computing Error Bounds}, volume = 1, number = 1, pages = {101--125}, year = 2001, month = sep, author = {James W. Demmel and Benjamin Diament and Gregorio Malajovich}, doi = {10.1007/s10208001004}, } @TechReport{demmel:openprob, author = "James W. Demmel", title = "Open problems in numerical linear algebra", year = 1992, type = "LAPACK Working Note", number = 47, URL = "http://www.netlib.org/lapack/lawnspdf/lawn47.pdf", institution = {Netlib}, } @Article{strassen:mm, author = {Volker Strassen}, title = {Gaussian elimination is not optimal}, journal = {Numerische Mathematik}, year = {1969}, volume = {13}, number = {4}, pages = {354--356}, month = sep, doi = {10.1007/BF02165411}, MRclass = "65.35", MRnumber = "0248973", mrreviewer = "G. Fairweather", mrnumber-url = "http://www.ams.org/mathscinet-getitem?mr=0248973", } @article{umanskleinberg:mm, author = "Henry Cohn and Robert D. Kleinberg and Bal{\'a}zs Szegedy and Christopher Umans", title = {Group-theoretic Algorithms for Matrix Multiplication}, journal = "46th Annual IEEE Symposium on the Foundations of Computer Science", year = {2005}, isbn = {0-7695-2468-0}, pages = {379-388}, doi = {10.1109/SFCS.2005.39}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, }