34 #include "SparseMatrix.hpp"
74 template<
typename _t_value,
typename _i_value=LI,
typename _sub_ds = ICRS< _t_value, u
int16_t >,
unsigned char logBeta = 15 >
103 static const ULI
beta_m = 1l << (logBeta-1);
121 for(
size_t i = 0; i < static_cast< size_t >(this->
nnz); i++ ) {
122 if(
dss[ i ] != NULL ) {
135 FBICRS( std::string file, _t_value zero = 0 ) {
140 FBICRS( _i_value* row, _i_value* col, _t_value* val, ULI
m, ULI
n, ULI nz, _t_value zero ) {
141 this->
load( row, col, val, m, n, nz, zero );
146 this->
load( input, m, n, zero );
151 this->
load( input, m, n, zero );
170 dss =
new _sub_ds*[ 1 ];
171 if( static_cast< ULI >( m ) >
beta_m || static_cast< ULI >( n ) >
beta_n ) {
172 std::cerr <<
"Matrix is too large to fit into a flat FBICRS structure!" << std::endl;
175 dss[ 0 ] =
new _sub_ds( input, m, n, zero );
176 r_inc =
new _i_value[ 1 ];
177 c_inc =
new _i_value[ 1 ];
192 void load( _i_value* row, _i_value* col, _t_value* val, ULI
m, ULI
n, ULI nz, _t_value zero ) {
194 std::vector< Triplet< _t_value > > packed;
195 for(
unsigned long int i=0; i<nz; i++ ) {
198 load( packed, m, n, zero );
218 unsigned long int cursum = 0;
219 for(
unsigned long int i=0; i<input.size(); i++ ) cursum += input[ i ].size();
220 const unsigned long int local_nnz = cursum;
224 std::vector< ULI > startpos;
225 std::vector< std::vector< Triplet< _t_value > > > replace;
226 typename std::vector< std::vector< Triplet< _t_value > > >::iterator it = input.begin();
227 for( ; it != input.end(); ++it ) {
228 if( it->size() > 0 ) {
232 ULI lowest_col = (*it)[0].j();
234 for(
size_t i = 1; i<it->size(); i++ )
235 if( (*it)[ i ].j() < lowest_col )
236 lowest_col = (*it)[i].j();
238 const ULI lowest_row = (*it)[0].i();
240 const ULI startpos_row =
beta_m * ( lowest_row /
beta_m );
241 const ULI startpos_col =
beta_n * ( lowest_col /
beta_n );
242 startpos.push_back( startpos_row );
243 startpos.push_back( startpos_col );
246 std::vector< Triplet< _t_value > > toPush;
248 toPush.reserve( it->size() );
250 for(
unsigned long int c = 0; c<it->size(); c++ ) {
251 const ULI new_i = (*it)[ c ].i() - startpos_row;
252 const ULI new_j = (*it)[ c ].j() - startpos_col;
257 replace.push_back( toPush );
263 for(
size_t r = 0, c = 0, q = 0; r < replace.size(); ++r, ++q ) {
264 while( input[ q ].size() == 0 ) ++q;
265 assert( q < input.size() );
266 const ULI cur_block_i = startpos[ c++ ];
267 const ULI cur_block_j = startpos[ c++ ];
268 assert( replace[ r ].size() == input[ q ].size() );
269 for(
size_t k = 0; k < replace[ r ].size(); ++k ) {
270 const ULI cur_i = cur_block_i + replace[ r ][ k ].i();
271 const ULI cur_j = cur_block_j + replace[ r ][ k ].j();
272 assert( cur_i == input[ q ][ k ].i() );
273 assert( cur_j == input[ q ][ k ].j() );
279 if( replace.size() == 0 ) {
287 this->
nnz = replace.size();
291 for(
size_t i=0; i<replace.size(); i++ ) cursum += replace[i].size();
292 assert( local_nnz == cursum );
298 dss =
new _sub_ds*[ this->
nnz ];
299 for(
size_t i = 0; i < static_cast< size_t >( this->
nnz ); ++i ) {
312 const ULI cur_block_i = startpos[ 2*i ];
313 const ULI cur_block_j = startpos[ 2*i+1 ];
314 for(
size_t k = 0; k < replace[ i ].size(); ++k ) {
315 const ULI cur_i = cur_block_i + replace[ i ][ k ].i();
316 const ULI cur_j = cur_block_j + replace[ i ][ k ].j();
319 assert( replace[ i ][ k ].i() < msize );
320 assert( replace[ i ][ k ].j() < nsize );
323 dss[ i ] =
new _sub_ds( replace[ i ], msize, nsize, zero );
330 size_t prevrow, prevcol;
331 size_t currow, curcol;
333 prevrow = startpos[ walk++ ];
334 prevcol = startpos[ walk++ ];
335 for(
size_t i = 1; i < replace.size(); i++ ) {
336 currow = startpos[ walk++ ];
337 curcol = startpos[ walk++ ];
338 if( currow != prevrow )
jumps++;
342 std::cout <<
jumps <<
" upper-level jumps found." << std::endl;
355 c_end = startpos[ 2 * this->
nnz - 1 ];
356 r_end = startpos[ 2 * this->
nnz - 2 ];
364 for(
size_t i = 1; i < static_cast< size_t >(this->
nnz); i++ ) {
365 currow = startpos[ walk++ ];
366 curcol = startpos[ walk++ ];
367 assert( currow < static_cast< size_t >(this->
nor) );
368 assert( curcol < static_cast< size_t >(this->
noc) );
369 this->
c_inc[ i-1 ] = curcol - prevcol;
370 if( currow != prevrow ) {
372 this->
r_inc[ c++ ] = currow - prevrow;
373 assert( this->
r_inc[c-1] + prevrow == currow );
376 assert( this->
c_inc[i-1] + prevcol == curcol || this->
c_inc[i-1] + prevcol -
n_overflow == curcol );
385 ULI rowwalk = prevrow =
r_start;
388 currow = startpos[ walk++ ];
389 curcol = startpos[ walk++ ];
390 assert( currow == rowwalk );
391 assert( curcol == colwalk );
395 for(
size_t i = 1; i < static_cast< size_t >(this->
nnz); ++i ) {
396 colwalk += this->
c_inc[ i - 1 ];
399 rowwalk += this->
r_inc[ c++ ];
401 assert( rowwalk < static_cast< size_t >(this->
nor) );
402 assert( colwalk < static_cast< size_t >(this->
noc) );
403 currow = startpos[ walk++ ];
404 curcol = startpos[ walk++ ];
405 assert( currow == rowwalk );
406 assert( curcol == colwalk );
408 assert( c ==
jumps );
411 rowwalk = prevrow =
r_end;
413 walk = 2 * this->
nnz - 1;
414 curcol = startpos[ walk-- ];
415 currow = startpos[ walk-- ];
416 assert( currow == rowwalk );
417 assert( curcol == colwalk );
419 for(
size_t i = static_cast< size_t >(this->
nnz) - 2; i < static_cast< size_t >(this->
nnz) - 1; --i ) {
420 colwalk -= this->
c_inc[ i ];
423 rowwalk -= this->
r_inc[ c-- ];
425 curcol = startpos[ walk-- ];
426 currow = startpos[ walk-- ];
427 assert( currow == rowwalk );
428 assert( curcol == colwalk );
438 virtual void zxa(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
439 if( this->
nnz == 0 )
return;
442 _sub_ds**__restrict__ ds_f = this->
dss;
443 _i_value*__restrict__ c_f = this->
c_inc;
444 _i_value*__restrict__ r_f = this->
r_inc;
445 const _t_value*__restrict__ x_f = x_p + this->
r_start;
446 _t_value*__restrict__ y_f = y_p + this->
c_start;
448 _t_value*
const y_end = y_p + this->
noc;
449 _sub_ds**__restrict__
const ds_end = ds_f + this->
nnz;
452 while( ds_f < ds_end ) {
454 (*ds_f++)->
zxa( x_f, y_f );
465 void ZXa(
const _t_value *__restrict__
const *__restrict__
const X, _t_value *__restrict__
const *__restrict__
const Z ) {
466 if( this->
nnz == 0 )
return;
469 _sub_ds**__restrict__ ds_f = this->
dss;
470 _i_value*__restrict__ c_f = this->
c_inc;
471 _i_value*__restrict__ r_f = this->
r_inc;
472 _t_value*
const y_end = Z[0] + this->
noc;
473 _sub_ds**__restrict__
const ds_end = ds_f + this->
nnz;
476 const _t_value*__restrict__ X_f[ k ];
477 _t_value*__restrict__ Y_f[ k ];
478 for(
size_t i = 0; i < k; ++i ) {
484 while( ds_f < ds_end ) {
485 (*ds_f++)->ZXa< k >( X_f, Y_f );
486 for(
size_t i = 0; i < k; ++i ) {
490 if( Y_f[0] >= y_end ) {
491 for(
size_t i = 0; i < k; ++i ) {
501 virtual void zxa_fb(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
502 if( this->
nnz == 0 )
return;
505 _sub_ds**__restrict__ ds_f = this->
dss;
506 _sub_ds**__restrict__ ds_b = ds_f + this->
nnz - 1;
507 _i_value*__restrict__ c_f = this->
c_inc;
508 _i_value*__restrict__ c_b = this->
c_inc + this->nnz - 2;
509 _i_value*__restrict__ r_f = this->
r_inc;
510 _i_value*__restrict__ r_b = this->
r_inc + this->
jumps - 1;
511 const _t_value*__restrict__ x_f = x_p + this->
r_start;
512 const _t_value*__restrict__ x_b = x_p + this->
r_end;
513 _t_value*__restrict__ y_f = y_p + this->
c_start;
514 _t_value*__restrict__ y_b = y_p + this->
c_end;
516 const _t_value* y_end = y_p + this->
noc;
519 while( ds_f <= ds_b ) {
521 (*ds_f++)->
zxa( x_f, y_f );
529 if( ds_f > ds_b )
break;
532 (*ds_b--)->
zxa( x_b, y_b );
534 assert( c_b >= this->
c_inc-1 );
538 assert( y_b >= y_p );
539 assert( y_b < y_end );
544 virtual void zax(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
545 if( this->
nnz == 0 )
return;
548 _sub_ds**__restrict__ ds_f = this->
dss;
549 _i_value*__restrict__ c_f = this->
c_inc;
550 _i_value*__restrict__ r_f = this->
r_inc;
551 const _t_value*__restrict__ x_f = x_p + this->
c_start;
552 _t_value*__restrict__ y_f = y_p + this->
r_start;
554 const _t_value*
const x_end = x_p + this->
noc;
555 _sub_ds**__restrict__
const ds_end = ds_f + this->
nnz;
558 while( ds_f < ds_end ) {
560 assert( x_f >= x_p );
561 assert( x_f < x_end );
562 (*ds_f++)->
zax( x_f, y_f );
564 if( x_f < x_p || x_f >= x_end ) {
573 void ZaX(
const _t_value*__restrict__
const *__restrict__
const X, _t_value*__restrict__
const *__restrict__
const Z ) {
574 if( this->
nnz == 0 )
return;
577 _sub_ds**__restrict__ ds_f = this->
dss;
578 _i_value*__restrict__ c_f = this->
c_inc;
579 _i_value*__restrict__ r_f = this->
r_inc;
582 const _t_value *__restrict__ X_f[ k ];
583 _t_value *__restrict__ Y_f[ k ];
586 for(
size_t s = 0; s < k; ++s ) {
592 const _t_value*
const x_end = X[0] + this->
noc;
593 _sub_ds**__restrict__
const ds_end = ds_f + this->
nnz;
596 while( ds_f < ds_end ) {
597 for(
size_t s = 0; s < k; ++s ) {
598 assert( X_f[s] >= X[s] );
599 assert( X_f[s] < X[0] + this->noc );
601 (*ds_f++)->ZaX< k >( X_f, Y_f );
602 for(
size_t s = 0; s < k; ++s ) {
606 if( X_f[0] < X[0] || X_f[0] >= x_end ) {
607 for(
size_t s = 0; s < k; ++s ) {
617 virtual void zax_fb(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
618 if( this->
nnz == 0 )
return;
621 _sub_ds**__restrict__ ds_f = this->
dss;
622 _sub_ds**__restrict__ ds_b = ds_f + this->
nnz - 1;
623 _i_value*__restrict__ c_f = this->
c_inc;
624 _i_value*__restrict__ c_b = this->
c_inc + this->nnz - 2;
625 _i_value*__restrict__ r_f = this->
r_inc;
626 _i_value*__restrict__ r_b = this->
r_inc + this->
jumps - 1;
627 const _t_value*__restrict__ x_f = x_p + this->
c_start;
628 const _t_value*__restrict__ x_b = x_p + this->
c_end;
629 _t_value*__restrict__ y_f = y_p + this->
r_start;
630 _t_value*__restrict__ y_b = y_p + this->
r_end;
632 const _t_value* x_end = x_p + this->
noc;
635 while( ds_f <= ds_b ) {
637 assert( x_f >= x_p );
638 assert( x_f < x_end );
639 (*ds_f++)->
zax( x_f, y_f );
641 if( x_f < x_p || x_f >= x_end ) {
647 if( ds_f > ds_b )
break;
650 assert( x_b >= x_p );
651 assert( x_b < x_end );
652 (*ds_b--)->
zax( x_b, y_b );
654 assert( c_b >= this->
c_inc-1 );
655 if( x_b < x_p || x_b >= x_end ) {
658 assert( x_b >= x_p );
659 assert( x_b < x_end );
666 size_t ret =
sizeof( ULI ) * 4 +
sizeof( _i_value ) * ( this->
nnz +
jumps + 1 );
668 ret +=
sizeof(
void* ) * this->
nnz;
670 for(
size_t i = 0; i < static_cast< size_t >(this->
nnz); ++i ) {
671 ret +=
dss[ i ]->bytesUsed();
Hierarchical BICRS with fixed subblock size and distribution.
Definition: FBICRS.hpp:75
_i_value nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
_i_value * c_inc
Column increment array.
Definition: FBICRS.hpp:98
static const ULI beta_n
Maximum matrix size for _sub_ds with the above data type; column size.
Definition: FBICRS.hpp:106
FBICRS(std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero=0)
Base hierarchical constructor.
Definition: FBICRS.hpp:150
virtual void zax(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
In-place z=Ax function.
Definition: FBICRS.hpp:544
The incremental compressed row storage sparse matrix data structure.
Definition: ICRS.hpp:53
FBICRS(_i_value *row, _i_value *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Base COO-based constructor.
Definition: FBICRS.hpp:140
virtual void zxa_fb(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Interleaved zxa kernel for use with the BetaHilbert scheme.
Definition: FBICRS.hpp:501
ULI c_end
Column index of the last nonzero.
Definition: FBICRS.hpp:89
_sub_ds ** dss
Stores the lower-level data structures.
Definition: FBICRS.hpp:112
ULI jumps
How many row jumps are stored.
Definition: FBICRS.hpp:92
void load(_i_value *row, _i_value *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Builds input matrix from COO input.
Definition: FBICRS.hpp:192
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
ULI r_start
Row index of the first nonzero.
Definition: FBICRS.hpp:80
static const ULI beta_m
Maximum matrix size for _sub_ds with the above data type; row size.
Definition: FBICRS.hpp:103
ULI r_end
Row index of the last nonzero.
Definition: FBICRS.hpp:86
ULI c_start
Column index of the first nonzero.
Definition: FBICRS.hpp:83
void load(std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero)
Builds input matrix from Triplet input.
Definition: FBICRS.hpp:209
void loadFromFile(const std::string file, const _t_valuezero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
FBICRS(std::string file, _t_value zero=0)
Base file-based constructor.
Definition: FBICRS.hpp:135
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
virtual ~FBICRS()
Default destructor.
Definition: FBICRS.hpp:118
FBICRS(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero=0)
Base Triplet-based constructor.
Definition: FBICRS.hpp:145
_i_value noc
Number of columns.
Definition: SparseMatrix.hpp:55
virtual void getFirstIndexPair(_i_value &row, _i_value &col)
Returns the first nonzero index, per reference.
Definition: FBICRS.hpp:433
FBICRS()
Base constructor (initialises with invalid data).
Definition: FBICRS.hpp:132
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: FBICRS.hpp:664
void ZXa(const _t_value *__restrict__ const *__restrict__ const X, _t_value *__restrict__ const *__restrict__ const Z)
Definition: FBICRS.hpp:465
_i_value nor
Number of rows.
Definition: SparseMatrix.hpp:52
void ZaX(const _t_value *__restrict__ const *__restrict__ const X, _t_value *__restrict__ const *__restrict__ const Z)
Definition: FBICRS.hpp:573
_t_value zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
virtual void zxa(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
In-place z=xA function.
Definition: FBICRS.hpp:438
ULI n_overflow
Caches the overflow forcing value.
Definition: FBICRS.hpp:115
_i_value * r_inc
Row increment array.
Definition: FBICRS.hpp:95
virtual void zax_fb(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Interleaved zax kernel for use with the BetaHilbert scheme.
Definition: FBICRS.hpp:617
virtual void load(std::vector< Triplet< _t_value > > &input, _i_value m, _i_value n, _t_value zero)
Builds input matrix from Triplet input.
Definition: FBICRS.hpp:162
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
A single triplet value.
Definition: Triplet.hpp:52
size_t fillIn
Stores the total fillIn.
Definition: FBICRS.hpp:109