author = {Ballard, G. and Bulu\c{c}, A. and Demmel, J. and Grigori, L. and Lipshitz, B. and Schwartz, O. and Toledo, S.},

title = {Communication optimal parallel multiplication of sparse random matrices},

booktitle = {Proceedings of the 25th ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '13},

year = {2013},

isbn = {978-1-4503-1572-2},

location = {Montr\'eal, Qu\'ebec, Canada},

pages = {222--231},

numpages = {10},

url = {http://doi.acm.org/10.1145/2486159.2486196},

doi = {10.1145/2486159.2486196},

acmid = {2486196},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {communication-avoiding algorithms, communication-cost lower bounds, random graphs, sparse matrix multiplication}

}

@inproceedings{BDLST13,

author = {Ballard, G. and Demmel, J. and Lipshitz, B. and Schwartz, O. and Toledo, S.},

title = {Communication efficient {Gaussian} elimination with partial pivoting using a shape morphing data layout},

booktitle = {Proceedings of the 25th ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '13},

year = {2013},

isbn = {978-1-4503-1572-2},

location = {Montr\'eal, Qu\'ebec, Canada},

pages = {232--240},

numpages = {9},

url = {http://doi.acm.org/10.1145/2486159.2486198},

doi = {10.1145/2486159.2486198},

acmid = {2486198},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {cache oblivious algorithms, communication-avoiding algorithms, matrix data layouts, matrix factorization}

}

@inproceedings{BB+13,

author = {Ballard, G. and Becker, D. and Demmel, J. and Dongarra, J. and Druinsky, A. and Peled, I. and Schwartz, O. and Toledo, S. and Yamazaki, I.},

title = {Implementing a Blocked {Aasen's} Algorithm with a Dynamic Scheduler on Multicore Architectures},

booktitle = {Proceedings of the 27th IEEE International Parallel Distributed Processing Symposium},

series = {IPDPS '13},

year = {2013},

month = May,

pages={895-907},

doi={10.1109/IPDPS.2013.98},

ISSN={1530-2075}

}

@article{BDHS12,

author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},

title = {Graph expansion and communication costs of fast matrix multiplication},

journal = {J. ACM},

issue_date = {December 2012},

volume = {59},

number = {6},

month = dec,

year = {2012},

issn = {0004-5411},

pages = {32:1--32:23},

articleno = {32},

numpages = {23},

url = {http://doi.acm.org/10.1145/2395116.2395121},

doi = {10.1145/2395116.2395121},

acmid = {2395121},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {Communication-avoiding algorithms, I/O-complexity, fast matrix multiplication}

}

@inproceedings{LBDS12,

author = {B. Lipshitz and G. Ballard and J. Demmel and O. Schwartz},

title = {Communication-avoiding parallel {S}trassen: Implementation and performance},

booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis},

series = {SC '12},

year = {2012},

isbn = {978-1-4673-0804-5},

location = {Salt Lake City, Utah},

pages = {101:1--101:11},

articleno = {101},

numpages = {11},

url = {http://dl.acm.org/citation.cfm?id=2388996.2389133},

acmid = {2389133},

publisher = {IEEE Computer Society Press},

address = {Los Alamitos, CA, USA}

}

@inproceedings{BDHLS12-RMM,

author={G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},

title={Graph Expansion Analysis for Communication Costs of Fast Rectangular Matrix Multiplication},

year={2012},

isbn={978-3-642-34861-7},

booktitle={Design and Analysis of Algorithms},

volume={7659},

series={Lecture Notes in Computer Science},

editor={G. Even and D. Rawitz},

doi={10.1007/978-3-642-34862-4_2},

url={http://dx.doi.org/10.1007/978-3-642-34862-4_2},

publisher={Springer Berlin Heidelberg},

pages={13-36}

}

@inproceedings{BDHLS12-CAPS,

author = {G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},

title = {Communication-optimal parallel algorithm for {S}trassen's matrix multiplication},

booktitle = {Proceedings of the 24th ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '12},

year = {2012},

isbn = {978-1-4503-1213-4},

location = {Pittsburgh, Pennsylvania, USA},

pages = {193--204},

numpages = {12},

url = {http://doi.acm.org/10.1145/2312005.2312044},

doi = {10.1145/2312005.2312044},

acmid = {2312044},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {communication-avoiding algorithms, fast matrix multiplication, parallel algorithms},

}

@inproceedings{BDHLS12-SS,

author = {G. Ballard and J. Demmel and O. Holtz and B. Lipshitz and O. Schwartz},

title = {Brief announcement: strong scaling of matrix multiplication algorithms and memory-independent communication lower bounds},

booktitle = {Proceedings of the 24th ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '12},

year = {2012},

isbn = {978-1-4503-1213-4},

location = {Pittsburgh, Pennsylvania, USA},

pages = {77--79},

numpages = {3},

url = {http://doi.acm.org/10.1145/2312005.2312021},

doi = {10.1145/2312005.2312021},

acmid = {2312021},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {communication-avoiding algorithms, fast matrix multiplication, strong scaling},

}

@inproceedings{BDK12,

author = {G. Ballard and J. Demmel and N. Knight},

title = {Communication avoiding successive band reduction},

booktitle = {Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming},

series = {PPoPP '12},

year = {2012},

isbn = {978-1-4503-1160-1},

location = {New Orleans, Louisiana, USA},

pages = {35--44},

numpages = {10},

url = {http://doi.acm.org/10.1145/2145816.2145822},

doi = {10.1145/2145816.2145822},

acmid = {2145822},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {band reduction, communication avoiding algorithms, symmetric eigenvalue problem},

}

@article{BDHS11b,

author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},

title = {Minimizing Communication in Numerical Linear Algebra},

publisher = {SIAM},

year = {2011},

journal = {SIAM Journal on Matrix Analysis and Applications},

volume = {32},

number = {3},

pages = {866-901},

keywords = {linear algebra algorithms; bandwidth; latency; communication-avoiding; lower bound},

url = {http://link.aip.org/link/?SML/32/866/1},

doi = {10.1137/090769156}

}

@inproceedings{BDHS11a,

author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},

title = {Graph expansion and communication costs of fast matrix multiplication: regular submission},

booktitle = {Proceedings of the 23rd ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '11},

year = {2011},

isbn = {978-1-4503-0743-7},

location = {San Jose, California, USA},

pages = {1--12},

numpages = {12},

url = {http://doi.acm.org/10.1145/1989493.1989495},

doi = {10.1145/1989493.1989495},

acmid = {1989495},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {communication avoiding algorithms, fast matrix multiplication, i/o-complexity},

}

@inproceedings{BDG11,

author = {Ballard, G. and Demmel, J. and Gearhart, A.},

title = {Brief announcement: communication bounds for heterogeneous architectures},

booktitle = {Proceedings of the 23rd ACM Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '11},

year = {2011},

isbn = {978-1-4503-0743-7},

location = {San Jose, California, USA},

pages = {257--258},

numpages = {2},

url = {http://doi.acm.org/10.1145/1989493.1989531},

doi = {10.1145/1989493.1989531},

acmid = {1989531},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {communication-avoiding, heterogeneity},

}

@inproceedings{ABDK11,

author = {Anderson, M. and Ballard, G. and Demmel, J. and Keutzer, K.},

title = {Communication-Avoiding {QR} Decomposition for {GPU}s},

booktitle = {Proceedings of the 2011 IEEE International Parallel \& Distributed Processing Symposium},

series = {IPDPS '11},

year = {2011},

isbn = {978-0-7695-4385-7},

pages = {48--58},

numpages = {11},

url = {http://dx.doi.org/10.1109/IPDPS.2011.15},

doi = {10.1109/IPDPS.2011.15},

acmid = {2059520},

publisher = {IEEE Computer Society},

address = {Washington, DC, USA},

}

@article{BKP11,

author = {G. Ballard and T. Kolda and T. Plantenga},

title = {Efficiently Computing Tensor Eigenvalues on a GPU},

journal ={2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and PhD Forum, },

volume = {0},

issn = {1530-2075},

year = {2011},

pages = {1340-1348},

url = {http://www.computer.org/portal/web/csdl/doi/10.1109/IPDPS.2011.287},

doi = {10.1109/IPDPS.2011.287},

publisher = {IEEE Computer Society},

address = {Los Alamitos, CA, USA},

}

@article{BDHS10,

author = {G. Ballard and J. Demmel and O. Holtz and O. Schwartz},

title = {Communication-optimal Parallel and Sequential {C}holesky Decomposition},

publisher = {SIAM},

year = {2010},

journal = {SIAM Journal on Scientific Computing},

volume = {32},

number = {6},

pages = {3495-3523},

keywords = {Cholesky decomposition; bandwidth; latency; communication avoiding; algorithm; lower bound},

url = {http://link.aip.org/link/?SCE/32/3495/1},

doi = {10.1137/090760969}

}

@inproceedings{BDHS09,

author = {Ballard, G. and Demmel, J. and Holtz, O. and Schwartz, O.},

title = {Communication-optimal parallel and sequential {C}holesky decomposition: extended abstract},

booktitle = {Proceedings of the 22nd Symposium on Parallelism in Algorithms and Architectures},

series = {SPAA '09},

year = {2009},

isbn = {978-1-60558-606-9},

location = {Calgary, AB, Canada},

pages = {245--252},

numpages = {8},

url = {http://doi.acm.org/10.1145/1583991.1584054},

doi = {10.1145/1583991.1584054},

acmid = {1584054},

publisher = {ACM},

address = {New York, NY, USA},

keywords = {Cholesky decomposition, algorithm, bandwidth, communication avoiding, latency, lower bound},

}

Last updated 14 August 2013.