
@misc{yzelman15b,
 author = "Yzelman, A. N.",
 title = "High performance sparse computations applied to a parallel conjugate gradient solver",
 year = "2015",
 keywords = "Sparse matrix--vector multiplication",
 keywords = "shared-memory parallelism",
 keywords = "cache-oblivious",
 keywords = "sparse matrix partitioning",
 keywords = "matrix reordering",
 keywords = "Conjugate Gradient",
 keywords = "high-performance computing",
 keywords = "{NUMA} architectures",
 abstract = "Many advanced sparse matrix storage schemes have appeared in literature, demonstrating large gains in efficiency for basic operations such as the sparse matrix--vector ({SpMV}) multiplication. One class of methods, based on sparse matrix partitioning and reordering, sparse blocking, compression, and cache-oblivious nonzero traversals, gained two to three factors of increased performance on single {SpMV} multiplications, but had yet to be used in practice. This work explores its use within a {Conjugate Gradients (CG)} iterative solver for linear systems, and analyses the resulting algorithm using the {Bulk Synchronous Parallel (BSP)} paradigm. It is implemented using {C++11} and the high-performance {MulticoreBSP for C} programming framework, and tested on real-world problems on a shared-memory dual-socket architecture. Its performance is compared against {Blaze}, a {C++} library for high-performance numerical linear algebra. The {CG} solver developed using the {BSP} paradigm and advanced sparse computation techniques results in a total speedup of almost 10x, close to a 3x performance increase over {Blaze}.",
 url = "\url{http://albert-jan.yzelman.net/PDFs/yzelman15b-pp.pdf}"
}

