
@article{pawlowski19,
title = "A multi-dimensional {M}orton-ordered block storage for mode-oblivious tensor computations",
journal = "{J}ournal of {C}omputational {S}cience",
volume = "33",
pages = "34 - 44",
year = "2019",
issn = "1877-7503",
doi = "https://doi.org/10.1016/j.jocs.2019.02.007",
url = "http://www.sciencedirect.com/science/article/pii/S187775031831130X",
author = "Paw\l{l}owski, Filip and U\c{c}ar, Bora and Yzelman, A.~N.",
keywords = "Tensor computations, Data structure, Morton order, Tensor--vector multiplication",
abstract = "Computation on tensors, treated as multidimensional arrays, revolve around generalized basic linear algebra subroutines (BLAS). We propose a novel data structure in which tensors are blocked and blocks are stored in an order determined by Morton order. This is not only proposed for efficiency reasons, but also to induce efficient performance regardless of which mode a generalized BLAS call is invoked for; we coin the term mode-oblivious to describe data structures and algorithms that induce such behavior. Experiments on one of the most bandwidth-bound generalized BLAS kernel, the tensorvector multiplication, not only demonstrate superior performance over two state-of-the-art variants by up to 18%, but additionally show that the proposed data structure induces a 71% less sample standard deviation for tensorvector multiplication across d modes, where d varies from 2 to 10. Finally, we show our data structure naturally expands to other tensor kernels and demonstrate up to 38% higher performance for the higher-order power method."
}

