35 #include "Triplet.hpp"
36 #include "FileToVT.hpp"
38 #include "CCSWrapper.hpp"
67 template<
typename _t_value,
68 typename _master_i_value=
signed long int,
typename _master_j_value=
signed long int,
69 typename _i_value=LI,
typename _j_value=LI >
114 ULI &row_overflows, ULI &col_overflows, ULI &sim_overflows, ULI &jumps ) {
115 unsigned long int row_max = (((
unsigned long int)1) << (
sizeof(_i_value)*8-1)) - 1;
116 unsigned long int col_max = (((
unsigned long int)1) << (
sizeof(_j_value)*8-1)) - 1;
117 ULI prevrow = row[ 0 ];
118 ULI prevcol = col[ 0 ];
119 for(
unsigned long int i=1; i<
nnz; i++ ) {
120 bool overflow =
false;
121 if( row[ i ] > prevrow ) {
122 if( row[ i ] - prevrow > row_max ) {
126 }
else if( prevrow > row[ i ] ) {
127 if( prevrow - row[ i ] > row_max ) {
132 if( row[ i ] != prevrow ) {
134 if( col[ i ] - prevcol + ntt > col_max ) {
142 if( col[ i ] > prevcol ) {
143 if( col[ i ] - prevcol > col_max ) {
147 if( prevcol - col[ i ] > col_max ) {
160 static unsigned long int memoryUsage(
const ULI
nnz,
const ULI jumps,
const ULI row_o,
const ULI col_o,
const ULI sim_o ) {
161 return nnz/8 + (((nnz % 8) > 0) ? 2 : 1) +
162 jumps/8 + (((jumps % 8) > 0) ? 1 : 0) +
163 sizeof(_master_i_value) * (row_o + sim_o + 1) +
164 sizeof(_master_j_value) * (col_o + sim_o + 2) +
165 sizeof(_i_value) * (jumps - row_o - sim_o) +
166 sizeof(_j_value) * (nnz - col_o - sim_o) +
167 sizeof(_t_value) * nnz;
173 static unsigned long int getMemoryUsage( ULI *row, ULI *col,
const ULI nz,
const ULI
m,
const ULI
n ) {
175 ULI row_overflows = 0;
176 ULI col_overflows = 0;
177 ULI sim_overflows = 0;
179 return memoryUsage( nz, jumps, row_overflows, col_overflows, sim_overflows );
184 ULI nz = input.size();
185 ULI* row =
new ULI[ nz ];
186 ULI* col =
new ULI[ nz ];
187 unsigned long int c = 0;
188 typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
189 for( ; it!=input.end(); it++, c++ ) {
190 row[ c ] = (*it).i();
191 col[ c ] = (*it).j();
222 CBICRS( std::string file, _t_value zero = 0 ) {
237 CBICRS( ULI* row, ULI* col, _t_value* val, ULI
m, ULI
n, ULI nz, _t_value zero ) {
238 load( row, col, val, m, n, nz, zero );
245 load( input, m, n, zero );
255 ULI nz = input.size();
256 ULI* row =
new ULI[ nz ];
257 ULI* col =
new ULI[ nz ];
258 _t_value* val =
new _t_value[ nz ];
259 unsigned long int c = 0;
260 typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
261 for( ; it!=input.end(); it++, c++ ) {
262 row[ c ] = (*it).i();
263 col[ c ] = (*it).j();
264 val[ c ] = (*it).value;
266 load( row, col, val, m, n, nz, zero );
273 void load( ULI* row, ULI* col, _t_value* val, ULI
m, ULI
n, ULI nz, _t_value zero ) {
275 std::cerr <<
"Warning: _DEBUG flag set." << std::endl;
282 unsigned long int row_max = (((
unsigned long int)1) << (
sizeof(_i_value)*8-1)) - 1;
283 unsigned long int col_max = (((
unsigned long int)1) << (
sizeof(_j_value)*8-1)) - 1;
285 ULI row_overflows = 0;
286 ULI col_overflows = 0;
287 ULI sim_overflows = 0;
290 std::cout << jumps <<
" row jumps found." << std::endl;
291 std::cout << row_overflows <<
" exclusive row overflows found." << std::endl;
292 std::cout << col_overflows <<
" exclusive column overflows found." << std::endl;
293 std::cout << sim_overflows <<
" simultaneous row/column overflows found." << std::endl;
294 std::cout <<
"Total array memory usage: "
295 <<
memoryUsage( this->
nnz, jumps, row_overflows, col_overflows, sim_overflows )
296 <<
" bytes." << std::endl;
300 const size_t mask1size = (this->
nnz+1)/8 + (((this->
nnz+1)%8) > 0 ? 1 : 0) + 1;
301 const size_t mask2size = jumps / 8 + ((jumps % 8) > 0 ? 1 : 0 ) + 1;
302 mask1 =
new unsigned char[ mask1size ];
303 mask2 =
new unsigned char[ mask2size ];
304 r_start =
new _master_i_value[ row_overflows + sim_overflows + 1 ];
305 c_start =
new _master_j_value[ col_overflows + sim_overflows + 2 ];
306 r_inc =
new _i_value[ jumps - row_overflows - sim_overflows ];
307 c_inc =
new _j_value[ this->
nnz - col_overflows - sim_overflows - 1 ];
308 vals =
new _t_value[ this->
nnz ];
311 bytes =
sizeof(
unsigned char ) * ( mask1size + mask2size );
312 bytes +=
sizeof( _master_i_value ) * ( row_overflows + col_overflows + 2 * sim_overflows + 3 );
313 bytes +=
sizeof( _i_value ) * ( jumps - row_overflows - sim_overflows );
314 bytes +=
sizeof( _j_value ) * ( this->
nnz - col_overflows - sim_overflows - 1 );
315 bytes +=
sizeof( _t_value ) * this->
nnz;
318 for(
unsigned long int i=0; i<this->
nnz; ++i )
vals[i] = val[i];
321 r_start[ 0 ] = (_master_i_value)row[ 0 ];
322 ULI prevrow = row[ 0 ];
323 c_start[ 0 ] = (_master_j_value)col[ 0 ];
324 ULI prevcol = col[ 0 ];
327 std::cout <<
"r_start: " <<
r_start[0] << std::endl;
328 std::cout <<
"c_start: " <<
c_start[0] << std::endl;
331 unsigned long int cincc = 0;
332 unsigned long int rincc = 0;
333 unsigned long int cstartc = 1;
334 unsigned long int rstartc = 1;
335 unsigned long int mask2c = 0;
336 this->
mask1[ 0 ] = 1;
337 for(
unsigned long int i=1; i<this->
nnz; i++ ) {
338 if( i%8 == 0 ) this->
mask1[ i/8 ] = 0;
339 if( mask2c%8 == 0 ) this->
mask2[ mask2c/8 ] = 0;
342 if( row[ i ] != prevrow ) {
343 if( static_cast< unsigned long int >( col[ i ] +
ntt - prevcol ) > col_max ) {
344 assert( cstartc < col_overflows + sim_overflows + 1 );
345 this->
c_start[ cstartc++ ] = col[ i ] - prevcol +
ntt;
346 this->
mask1[ i/8 ] |= ((
unsigned char)1)<<(i%8);
348 assert( cincc < this->nnz );
349 this->
c_inc[ cincc++ ] = col[ i ] - prevcol +
ntt;
351 if( row[ i ] > prevrow ) {
352 if( row[ i ] - prevrow > row_max ) {
353 assert( rstartc < row_overflows + sim_overflows + 1 );
354 this->
r_start[ rstartc++ ] = row[ i ] - prevrow;
355 this->
mask2[ mask2c/8 ] |= ((
unsigned char)1)<<(mask2c%8);mask2c++;
357 assert( rincc < jumps - row_overflows );
358 this->
r_inc[ rincc++ ] = row[ i ] - prevrow;
362 if( prevrow - row[ i ] > row_max ) {
363 assert( rstartc < row_overflows + sim_overflows + 1 );
364 this->
r_start[ rstartc++ ] = row[ i ] - prevrow;
365 this->
mask2[ mask2c/8 ] |= ((
unsigned char)1)<<(mask2c%8);mask2c++;
367 assert( rincc < jumps - row_overflows );
368 this->
r_inc[ rincc++ ] = row[ i ] - prevrow;
375 std::cout << i <<
", (" << prevrow <<
"," << prevcol <<
")), column increment = " <<
376 ( this->
mask1[ i/8 ] & ((
unsigned char)1)<<(i%8) ? this->
c_start[ cstartc-1 ] : this->
c_inc[ cincc-1 ] ) <<
377 ", row increment " << ( this->
mask2[ (mask2c-1)/8 ] & ((
unsigned char)1)<<((mask2c-1)%8) ?
378 this->
r_start[ rstartc-1 ] : this->
r_inc[ rincc-1 ] ) <<
", mask1 index " <<
379 (i/8) <<
"(" << ( (this->
mask1[ i/8 ] & ((
unsigned char)1)<<(i%8)) > 0 ) <<
")" << std::endl;
385 if( col[ i ] > prevcol ) {
386 if( col[ i ] - prevcol > col_max ) {
387 assert( cstartc < col_overflows + sim_overflows + 1 );
388 this->
c_start[ cstartc++ ] = col[ i ] - prevcol;
389 this->
mask1[ i/8 ] |= ((
unsigned char)1)<<(i%8);
391 assert( cincc < this->nnz );
392 this->
c_inc[ cincc++ ] = col[ i ] - prevcol;
394 }
else if( prevcol > col[ i ] ) {
395 if( prevcol - col[ i ] > col_max ) {
396 assert( cstartc < col_overflows + sim_overflows + 1 );
397 this->
c_start[ cstartc++ ] = col[ i ] - prevcol;
398 this->
mask1[ i/8 ] |= ((
unsigned char)1)<<(i%8);
400 assert( cincc < this->nnz );
401 this->
c_inc[ cincc++ ] = col[ i ] - prevcol;
406 std::cout << i <<
", (" << prevrow <<
"," << prevcol <<
"), column increment = " <<
407 ( this->
mask1[ i/8 ] & ((
unsigned char)1)<<(i%8) ? this->
c_start[ cstartc-1 ] : this->
c_inc[ cincc-1 ] ) <<
408 ", row increment " << ( this->
mask2[ (mask2c-1)/8 ] & ((
unsigned char)1)<<((mask2c-1)%8) ?
409 this->
r_start[ rstartc-1 ] : this->
r_inc[ rincc-1 ] ) <<
", mask1 index = " << (i/8) <<
410 "(" << ( (this->
mask1[ i/8 ] & ((
unsigned char)1)<<(i%8)) > 0 ) <<
")" << std::endl;
413 assert( cincc == this->nnz - col_overflows - sim_overflows - 1 );
414 assert( rincc == jumps - row_overflows - sim_overflows );
415 assert( cstartc == col_overflows + sim_overflows + 1 );
416 assert( rstartc == row_overflows + sim_overflows + 1 );
417 assert( mask2c == jumps );
420 c_start[ col_overflows + sim_overflows + 1 ] =
ntt;
421 this->
mask1[ this->nnz/8 ] |= ((
unsigned char)1)<<(this->nnz%8);
424 assert( this->
mask1[0] & 1 );
425 assert( this->
mask1[this->nnz/8] & ((
unsigned char)1<<(this->nnz%8)) );
426 unsigned long int mask1c = 0;
427 for(
unsigned long int k=0; k<=this->
nnz; k++ )
428 if( this->
mask1[ k/8 ] & ((
unsigned char)1<<(k%8)) )
430 assert( mask1c == col_overflows + sim_overflows + 2 );
431 assert( this->nnz+1-mask1c == this->nnz - col_overflows - sim_overflows - 1 );
433 std::cout <<
"Construction done." << std::endl;
439 row = (ULI)(this->
r_start[ 0 ]);
440 col = (ULI)(this->
c_start[ 0 ]);
448 virtual void zxa(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
449 unsigned char *__restrict__ mask1_p = this->
mask1;
450 unsigned char *__restrict__ mask2_p = this->
mask2;
451 _master_i_value *__restrict__ r_start_p =
r_start;
452 _master_j_value *__restrict__ c_start_p =
c_start;
453 _i_value *__restrict__ r_inc_p =
r_inc;
454 _j_value *__restrict__ c_inc_p =
c_inc;
455 _t_value *__restrict__ v_p =
vals;
460 const _t_value *
const x = x_p;
461 const _t_value *
const x_end = x+this->
nor;
463 const _t_value *
const y = y_p;
464 const _t_value *
const y_end = y+this->
noc;
465 const _t_value *
const v_end =
vals+this->
nnz;
468 while( v_p < v_end ) {
470 assert( y_p < y_end );
471 assert( v_p >=
vals );
472 assert( v_p < v_end );
474 assert( x_p < x_end );
475 assert( c_inc_p >=
c_inc );
476 assert( r_inc_p >=
r_inc );
477 assert( mask1_p < this->
mask1 + (this->nnz/8 + (this->nnz%8==0 ? 0 : 1)) );
478 assert( mask1_p >= this->
mask1 );
479 assert( maskc1 == (v_p-
vals) % 8 );
480 assert( mask1_p == &(this->
mask1[ (v_p-
vals)/8 ]) );
482 if( *mask1_p & ((
unsigned char)1<<maskc1) ) {
484 std::cout <<
"Overflowed column increment is " << *c_start_p << std::endl;
489 std::cout <<
"Compressed column increment is " << *c_inc_p << std::endl;
493 if( ++maskc1 == 8 ) {
499 std::cout << (y_p-y) <<
" > " << this->noc <<
" so performing a row jump." << std::endl;
501 if( *mask2_p & ((
unsigned char)1<<maskc2) ) {
506 if( ++maskc2 == 8 ) {
513 assert( mask1_p == &(this->
mask1[ (v_p -
vals + 1)/8 ]) );
514 std::cout << (v_p-
vals) <<
", position: " << (x_p-x) <<
"(<=" << (this->
noc) <<
") by " << (y_p-y) <<
"(<=" << (this->
nor) <<
"), mask1 index is " << (mask1_p-this->
mask1) <<
", mask1 was " << ((this->
mask1[(v_p-
vals)/8]&(
unsigned char)1<<((v_p-
vals)%8))>0) << std::endl;
516 *y_p += *v_p++ * *x_p;
525 virtual void zax(
const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
526 const _t_value *
const x = x_p;
527 unsigned char *__restrict__ mask1_p = this->
mask1;
528 unsigned char *__restrict__ mask2_p = this->
mask2;
529 _master_i_value *__restrict__ r_start_p =
r_start;
530 _master_j_value *__restrict__ c_start_p =
c_start;
531 _i_value *__restrict__ r_inc_p =
r_inc;
532 _j_value *__restrict__ c_inc_p =
c_inc;
533 _t_value *__restrict__ v_p =
vals;
537 unsigned char tmask1 = *mask1_p++;
538 unsigned char tmask2 = *mask2_p++;
547 const _t_value *
const y = y_p;
548 const _t_value *
const y_end = y+this->
nor;
550 const _t_value *
const x_end = x+this->
noc;
551 const _t_value *
const v_end =
vals+this->
nnz;
554 while( v_p < v_end ) {
556 assert( y_p < y_end );
557 assert( v_p >=
vals );
558 assert( v_p < v_end );
560 assert( x_p < x_end );
561 assert( c_inc_p >=
c_inc );
562 assert( r_inc_p >=
r_inc );
563 if ( mask1_p > this->
mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) ) {
564 std::cout <<
"Mask1 is at start position for index " << (mask1_p-this->
mask1)*8 <<
565 " of " << this->nnz <<
", maskc1=" << (
int)maskc1 << std::endl;
567 assert( mask1_p <= this->
mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) );
568 assert( mask1_p >= this->
mask1 );
573 if ( mask1_p >=this->
mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) )
574 std::cout <<
"Overflowed column increment is " << (int)(*c_start_p) << std::endl;
579 if ( mask1_p >=this->
mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) )
580 std::cout <<
"Compressed column increment is " << (
int)(*c_inc_p) << std::endl;
585 if( ++maskc1 == 8 ) {
591 std::cout << (x_p-x) <<
" > " << this->noc <<
" so performing a row jump." << std::endl;
599 if( ++maskc2 == 8 ) {
606 std::cout << (v_p-
vals) <<
", position: " << (y_p-y) <<
"(<=" << (this->
nor) <<
") by " << (x_p-x) <<
"(<=" << (this->
noc) <<
"), mask1 index is " << (mask1_p-this->
mask1) << std::endl;
608 *y_p += *v_p++ * *x_p;
777 #ifndef _H_CBICRS_FACTORY
778 #define _H_CBICRS_FACTORY
788 template<
typename _t_value >
794 template<
typename _master_i_value,
typename _master_j_value,
typename _i_value,
typename _j_value >
796 unsigned long int m,
unsigned long int n,
unsigned long int &usage,
unsigned long int &zle_usage ) {
797 ULI nz = input.size();
798 ULI* row =
new ULI[ nz ];
799 ULI* col =
new ULI[ nz ];
800 unsigned long int c = 0;
801 typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
802 for( ; it!=input.end(); it++, c++ ) {
803 row[ c ] = (*it).i();
804 col[ c ] = (*it).j();
811 std::cout <<
"Total array (" << tn <<
") memory usage: " << usage <<
" bytes." << std::endl;
813 zle_usage = usage + 1;
815 std::cout <<
"Warning: no implementation for ZLE CBICRS yet, so no estimate is given!" << std::endl;
820 template<
typename _master_i_value,
typename _master_j_value,
typename _i_value,
typename _j_value >
822 ULI m, ULI n,
unsigned long int &usage,
unsigned long int &zle_usage ) {
825 std::cout <<
"Total array (" << tn <<
") memory usage: " << usage <<
" bytes." << std::endl;
827 zle_usage = usage + 1;
829 std::cout <<
"Warning: no implementation for ZLE CBICRS yet, so no estimate is given!" << std::endl;
844 std::vector< Triplet< double > > triplets =
FileToVT::parse( file, m, n );
845 return getCBICRS( triplets, m, n, zero );
857 std::vector< Triplet< double > > triplets =
FileToVT::parse( file, m, n );
858 return getCBICCS( triplets, m, n, zero );
871 unsigned int rowbit = log2( m );
872 if( ((
unsigned long int)1)<<rowbit < m ) rowbit++;
873 unsigned int colbit = log2( n );
874 if( ((
unsigned long int)1)<<colbit < n ) colbit++;
875 std::cout <<
"Finding optimal expected index type." << std::endl;
876 unsigned long int usage, zle_usage;
914 investigate< signed long int, signed long int, signed char, signed char >(
"signed char", triplets, m, n, usage, zle_usage );
917 bool zle = zle_usage < usage;
918 unsigned long int min = zle ? zle_usage : usage;
919 investigate< signed long int, signed long int, signed short int, signed short int >(
"short int", triplets, m, n, usage, zle_usage );
920 unsigned long int curmin = zle_usage < usage ? zle_usage : usage;
923 zle = zle_usage < usage;
926 investigate< signed long int, signed long int, signed int, signed int >(
"int", triplets, m, n, usage, zle_usage );
927 curmin = zle_usage < usage ? zle_usage : usage;
930 zle = zle_usage < usage;
933 investigate< signed long int, signed long int, signed long int, signed long int >(
"long int", triplets, m, n, usage, zle_usage );
934 curmin = zle_usage < usage ? zle_usage : usage;
937 zle = zle_usage < usage;
942 std::cout <<
"Selecting `signed char' datatype";
944 std::cout <<
", with ZLE" << std::endl;
946 std::cout <<
", without ZLE";
947 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed char)*8-1))-1 &&
948 n < (((
unsigned long int)1)<<(
sizeof(
signed char)*8-1))-1 ) {
949 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
952 std::cout << std::endl;
957 std::cout <<
"Selecting `signed short int' datatype";
959 std::cout <<
", with ZLE" << std::endl;
961 std::cout <<
", without ZLE";
962 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed short int)*8-1))-1 &&
963 n < (((
unsigned long int)1)<<(
sizeof(
signed short int)*8-1))-1 ) {
964 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
967 std::cout << std::endl;
972 std::cout <<
"Selecting `signed int' datatype";
974 std::cout <<
", with ZLE" << std::endl;
976 std::cout <<
", without ZLE";
977 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed int)*8-1))-1 &&
978 n < (((
unsigned long int)1)<<(
sizeof(
signed int)*8-1))-1 ) {
979 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
982 std::cout << std::endl;
987 std::cout <<
"Selecting `signed long int' datatype";
989 std::cout <<
", with ZLE" << std::endl;
991 std::cout <<
", without ZLE";
992 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed long int)*8-1))-1 &&
993 n < (((
unsigned long int)1)<<(
sizeof(
signed long int)*8-1))-1 ) {
994 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
997 std::cout << std::endl;
1002 std::cerr <<
"Error in tuning, invalid data type selected (" << choice <<
")!" << std::endl;
1003 exit( EXIT_FAILURE );
1005 std::cerr <<
"CBICRS not yet implemented!" << std::endl;
1006 exit( EXIT_FAILURE );
1019 unsigned int rowbit = log2( m );
1020 if( ((
unsigned long int)1)<<rowbit < m ) rowbit++;
1021 unsigned int colbit = log2( n );
1022 if( ((
unsigned long int)1)<<colbit < n ) colbit++;
1023 std::cout <<
"Finding optimal expected index type." << std::endl;
1024 unsigned long int usage, zle_usage;
1062 investigateCCS< signed long int, signed long int, signed char, signed char >(
"signed char", triplets, m, n, usage, zle_usage );
1065 bool zle = zle_usage < usage;
1066 unsigned long int min = zle ? zle_usage : usage;
1067 investigateCCS< signed long int, signed long int, signed short int, signed short int >(
"short int", triplets, m, n, usage, zle_usage );
1068 unsigned long int curmin = zle_usage < usage ? zle_usage : usage;
1069 if( curmin < min ) {
1071 zle = zle_usage < usage;
1074 investigateCCS< signed long int, signed long int, signed int, signed int >(
"int", triplets, m, n, usage, zle_usage );
1075 curmin = zle_usage < usage ? zle_usage : usage;
1076 if( curmin < min ) {
1078 zle = zle_usage < usage;
1081 investigateCCS< signed long int, signed long int, signed long int, signed long int >(
"long int", triplets, m, n, usage, zle_usage );
1082 curmin = zle_usage < usage ? zle_usage : usage;
1083 if( curmin < min ) {
1085 zle = zle_usage < usage;
1090 std::cout <<
"Selecting `signed char' datatype";
1092 std::cout <<
", with ZLE" << std::endl;
1094 std::cout <<
", without ZLE";
1095 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed char)*8-1))-1 &&
1096 n < (((
unsigned long int)1)<<(
sizeof(
signed char)*8-1))-1 ) {
1097 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1100 std::cout << std::endl;
1105 std::cout <<
"Selecting `signed short int' datatype";
1107 std::cout <<
", with ZLE" << std::endl;
1109 std::cout <<
", without ZLE";
1110 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed short int)*8-1))-1 &&
1111 n < (((
unsigned long int)1)<<(
sizeof(
signed short int)*8-1))-1 ) {
1112 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1115 std::cout << std::endl;
1120 std::cout <<
"Selecting `signed int' datatype";
1122 std::cout <<
", with ZLE" << std::endl;
1124 std::cout <<
", without ZLE";
1125 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed int)*8-1))-1 &&
1126 n < (((
unsigned long int)1)<<(
sizeof(
signed int)*8-1))-1 ) {
1127 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1130 std::cout << std::endl;
1135 std::cout <<
"Selecting `signed long int' datatype";
1137 std::cout <<
", with ZLE" << std::endl;
1139 std::cout <<
", without ZLE";
1140 if( 2*m < (((
unsigned long int)1)<<(
sizeof(
signed long int)*8-1))-1 &&
1141 n < (((
unsigned long int)1)<<(
sizeof(
signed long int)*8-1))-1 ) {
1142 std::cout <<
"; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1145 std::cout << std::endl;
1150 std::cerr <<
"Error in tuning, invalid data type selected (" << choice <<
")!" << std::endl;
1151 exit( EXIT_FAILURE );
1153 std::cerr <<
"CBICRS not yet implemented!" << std::endl;
1154 exit( EXIT_FAILURE );
ULI nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
static std::vector< Triplet< double > > parse(std::string filename)
Parses a matrix-market input file.
Definition: FileToVT.cpp:36
CBICRS(std::string file, _t_value zero=0)
Base constructor.
Definition: CBICRS.hpp:222
Automatically transforms a row-major scheme into an column-major scheme.
Definition: CCSWrapper.hpp:49
_master_i_value * c_start
Stores the column chunk start increments; size is the number of nonzeros plus one.
Definition: CBICRS.hpp:78
unsigned char * mask2
Bitmask used for switching between r_start and r_ind.
Definition: CBICRS.hpp:90
_i_value * r_inc
Stores the row jumps; size is the number of nonzeros plus 2.
Definition: CBICRS.hpp:81
Bi-directional Incremental Compressed Row Storage scheme.
Definition: BICRS.hpp:58
static Matrix< _t_value > * getCBICCS(std::string file, _t_value zero=0)
Factory function for column-based compressed BICRS functions, file-based.
Definition: CBICRS.hpp:855
Compressed Bi-directional Incremental Compressed Row Storage (BICRS) scheme.
Definition: CBICRS.hpp:70
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
unsigned char * mask1
Bitmask used for switching between c_start and c_ind.
Definition: CBICRS.hpp:87
_i_value * c_inc
Stores the column jumps; size is exactly the number of nonzeros.
Definition: CBICRS.hpp:84
static unsigned long int memoryUsage(const ULI nnz, const ULI jumps, const ULI row_o, const ULI col_o, const ULI sim_o)
Estimates the number of bytes required by this data structure.
Definition: CBICRS.hpp:160
virtual void load(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero)
This function will rewrite the std::vector< Triplet > structure to one suitable for the other load fu...
Definition: CBICRS.hpp:254
static Matrix< _t_value > * getCBICRS(std::string file, _t_value zero=0)
Factory function for row-based compressed BICRS functions, file-based.
Definition: CBICRS.hpp:842
_master_j_value ntt
Caches n times two.
Definition: CBICRS.hpp:99
CBICRS(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero=0)
Base constructor.
Definition: CBICRS.hpp:244
void loadFromFile(const std::string file, const _t_valuezero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
virtual ~CBICRS()
Base deconstructor.
Definition: CBICRS.hpp:200
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
static unsigned long int getMemoryUsage(ULI *row, ULI *col, const ULI nz, const ULI m, const ULI n)
Calculates and returns the number of bytes used when employing this data structure.
Definition: CBICRS.hpp:173
static Matrix< _t_value > * getCBICCS(std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero=0)
Factory function for row-based compressed BICRS functions, Triplet-based.
Definition: CBICRS.hpp:1018
ULI noc
Number of columns.
Definition: SparseMatrix.hpp:55
virtual void zxa(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Calculates y=xA, but does not allocate y itself.
Definition: CBICRS.hpp:448
static void investigateCCS(const std::string tn, std::vector< Triplet< _t_value > > input, unsigned long int m, unsigned long int n, unsigned long int &usage, unsigned long int &zle_usage)
Used for auto-tunes the index type.
Definition: CBICRS.hpp:795
CBICRS(ULI *row, ULI *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Base constructor.
Definition: CBICRS.hpp:237
Factory for the Compressed Bi-directional Incremental Compressed Row Storage scheme.
Definition: CBICRS.hpp:789
ULI nor
Number of rows.
Definition: SparseMatrix.hpp:52
size_t bytes
Stores the number of bytes used for storage.
Definition: CBICRS.hpp:96
_t_value zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
virtual void getFirstIndexPair(ULI &row, ULI &col)
Returns the first nonzero index, per reference.
Definition: CBICRS.hpp:438
static Matrix< _t_value > * getCBICRS(std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero=0)
Factory function for row-based compressed BICRS functions, Triplet-based.
Definition: CBICRS.hpp:870
void load(ULI *row, ULI *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Definition: CBICRS.hpp:273
CBICRS()
Base constructor.
Definition: CBICRS.hpp:211
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
static void investigate(const std::string tn, std::vector< Triplet< _t_value > > triplets, ULI m, ULI n, unsigned long int &usage, unsigned long int &zle_usage)
Used for auto-tuning of the index type.
Definition: CBICRS.hpp:821
_master_i_value * r_start
Stores the row chunk start increments; size is the number of nonzeros plus one.
Definition: CBICRS.hpp:75
A single triplet value.
Definition: Triplet.hpp:52
_t_value * vals
Stores the values of the individual nonzeros.
Definition: CBICRS.hpp:93
virtual void zax(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Calculates y=Ax, but does not allocate y itself.
Definition: CBICRS.hpp:525
static void getNumberOfOverflows(const ULI nnz, ULI *const row, ULI *const col, const ULI ntt, ULI &row_overflows, ULI &col_overflows, ULI &sim_overflows, ULI &jumps)
Calculates the number of overflows given a triplet-form input.
Definition: CBICRS.hpp:113
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: CBICRS.hpp:768
static unsigned long int getMemoryUsage(std::vector< Triplet< _t_value > > &input, const ULI m, const ULI n)
Calculates and returns the number of bytes used when employing this data structure.
Definition: CBICRS.hpp:183