SparseLibrary  Version 1.6.0
CBICRS.hpp
1 /*
2  * Copyright (c) 2007-2014, A. N. Yzelman, Utrecht University 2007-2011;
3  * KU Leuven 2011-2014.
4  * R. H. Bisseling, Utrecht University 2007-2014.
5  *
6  * This file is part of the Sparse Library.
7  *
8  * This library was developed under supervision of Prof. dr. Rob H. Bisseling at
9  * Utrecht University, from 2007 until 2011. From 2011-2014, development continued
10  * at KU Leuven, where Prof. dr. Dirk Roose contributed significantly to the ideas
11  * behind the newer parts of the library code.
12  *
13  * The Sparse Library is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by the
15  * Free Software Foundation, either version 3 of the License, or (at your
16  * option) any later version.
17  *
18  * The Sparse Library is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21  * for more details.
22  *
23  * You should have received a copy of the GNU General Public License along
24  * with the Sparse Library. If not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 /*
29  * File created by:
30  * A. N. Yzelman, Dept. of Mathematics, Utrecht University, 2011 (June).
31  */
32 
33 
34 #include "Matrix.hpp"
35 #include "Triplet.hpp"
36 #include "FileToVT.hpp"
37 #include "BICRS.hpp"
38 #include "CCSWrapper.hpp"
39 #include <assert.h>
40 #include <iostream>
41 #include <vector>
42 
43 // Set when in debug mode
44 //#define _DEBUG
45 
46 //NOTE: this file defines two classes: CBICRS and CBICRS_FACTORY
47 
48 #ifndef _H_CBICRS
49 #define _H_CBICRS
50 
51 //#define TRACE
52 
67 template< typename _t_value,
68  typename _master_i_value=signed long int, typename _master_j_value=signed long int,
69  typename _i_value=LI, typename _j_value=LI >
70 class CBICRS: public SparseMatrix< _t_value, ULI > {
71 
72  protected:
73 
75  _master_i_value* r_start;
76 
78  _master_i_value* c_start;
79 
81  _i_value* r_inc;
82 
84  _i_value* c_inc;
85 
87  unsigned char* mask1;
88 
90  unsigned char* mask2;
91 
93  _t_value* vals;
94 
96  size_t bytes;
97 
99  _master_j_value ntt;
100 
113  static void getNumberOfOverflows( const ULI nnz, ULI * const row, ULI * const col, const ULI ntt,
114  ULI &row_overflows, ULI &col_overflows, ULI &sim_overflows, ULI &jumps ) {
115  unsigned long int row_max = (((unsigned long int)1) << (sizeof(_i_value)*8-1)) - 1; //2^{k-1}-1
116  unsigned long int col_max = (((unsigned long int)1) << (sizeof(_j_value)*8-1)) - 1;
117  ULI prevrow = row[ 0 ];
118  ULI prevcol = col[ 0 ];
119  for( unsigned long int i=1; i<nnz; i++ ) {
120  bool overflow = false;
121  if( row[ i ] > prevrow ) {
122  if( row[ i ] - prevrow > row_max ) {
123  row_overflows++;
124  overflow = true;
125  }
126  } else if( prevrow > row[ i ] ) {
127  if( prevrow - row[ i ] > row_max ) {
128  row_overflows++;
129  overflow = true;
130  }
131  }
132  if( row[ i ] != prevrow ) {
133  jumps++;
134  if( col[ i ] - prevcol + ntt > col_max ) {
135  if( overflow ) {
136  row_overflows--;
137  sim_overflows++;
138  } else
139  col_overflows++;
140  }
141  } else {
142  if( col[ i ] > prevcol ) {
143  if( col[ i ] - prevcol > col_max ) {
144  col_overflows++;
145  }
146  } else {
147  if( prevcol - col[ i ] > col_max ) {
148  col_overflows++;
149  }
150  }
151  }
152 
153  prevrow = row[ i ];
154  prevcol = col[ i ];
155  }
156  }
157 
158 
160  static unsigned long int memoryUsage( const ULI nnz, const ULI jumps, const ULI row_o, const ULI col_o, const ULI sim_o ) {
161  return nnz/8 + (((nnz % 8) > 0) ? 2 : 1) +
162  jumps/8 + (((jumps % 8) > 0) ? 1 : 0) +
163  sizeof(_master_i_value) * (row_o + sim_o + 1) +
164  sizeof(_master_j_value) * (col_o + sim_o + 2) +
165  sizeof(_i_value) * (jumps - row_o - sim_o) +
166  sizeof(_j_value) * (nnz - col_o - sim_o) +
167  sizeof(_t_value) * nnz;
168  }
169 
170  public:
171 
173  static unsigned long int getMemoryUsage( ULI *row, ULI *col, const ULI nz, const ULI m, const ULI n ) {
174  ULI jumps = 0;
175  ULI row_overflows = 0;
176  ULI col_overflows = 0;
177  ULI sim_overflows = 0; //simultaneous overflows
178  getNumberOfOverflows( nz, row, col, 2l*n, row_overflows, col_overflows, sim_overflows, jumps );
179  return memoryUsage( nz, jumps, row_overflows, col_overflows, sim_overflows );
180  }
181 
183  static unsigned long int getMemoryUsage( std::vector< Triplet< _t_value > > &input, const ULI m, const ULI n ) {
184  ULI nz = input.size();
185  ULI* row = new ULI[ nz ];
186  ULI* col = new ULI[ nz ];
187  unsigned long int c = 0;
188  typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
189  for( ; it!=input.end(); it++, c++ ) {
190  row[ c ] = (*it).i();
191  col[ c ] = (*it).j();
192  }
193  unsigned long int ret = getMemoryUsage( row, col, nz, m, n );
194  delete [] row;
195  delete [] col;
196  return ret;
197  }
198 
200  virtual ~CBICRS() {
201  delete [] r_start;
202  delete [] c_start;
203  delete [] r_inc;
204  delete [] c_inc;
205  delete [] mask1;
206  delete [] mask2;
207  delete [] vals;
208  }
209 
211  CBICRS() {
212  r_start = c_start = NULL;
213  r_inc = c_inc = NULL;
214  mask1 = mask2 = NULL;
215  vals = NULL;
216  }
217 
222  CBICRS( std::string file, _t_value zero = 0 ) {
223  loadFromFile( file, zero );
224  }
225 
237  CBICRS( ULI* row, ULI* col, _t_value* val, ULI m, ULI n, ULI nz, _t_value zero ) {
238  load( row, col, val, m, n, nz, zero );
239  }
240 
244  CBICRS( std::vector< Triplet< _t_value > >& input, ULI m, ULI n, _t_value zero = 0 ) {
245  load( input, m, n, zero );
246  }
247 
254  virtual void load( std::vector< Triplet< _t_value > >& input, ULI m, ULI n, _t_value zero ) {
255  ULI nz = input.size();
256  ULI* row = new ULI[ nz ];
257  ULI* col = new ULI[ nz ];
258  _t_value* val = new _t_value[ nz ];
259  unsigned long int c = 0;
260  typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
261  for( ; it!=input.end(); it++, c++ ) {
262  row[ c ] = (*it).i();
263  col[ c ] = (*it).j();
264  val[ c ] = (*it).value;
265  }
266  load( row, col, val, m, n, nz, zero );
267  delete [] row;
268  delete [] col;
269  delete [] val;
270  }
271 
273  void load( ULI* row, ULI* col, _t_value* val, ULI m, ULI n, ULI nz, _t_value zero ) {
274 #ifdef _DEBUG
275  std::cerr << "Warning: _DEBUG flag set." << std::endl;
276 #endif
277  this->zero_element = zero;
278  this->nnz = nz;
279  this->nor = m;
280  this->noc = n;
281  this->ntt = 2l*n;
282  unsigned long int row_max = (((unsigned long int)1) << (sizeof(_i_value)*8-1)) - 1; //2^{k-1}-1
283  unsigned long int col_max = (((unsigned long int)1) << (sizeof(_j_value)*8-1)) - 1;
284  ULI jumps = 0;
285  ULI row_overflows = 0;
286  ULI col_overflows = 0;
287  ULI sim_overflows = 0; //simultaneous overflows
288  getNumberOfOverflows( nz, row, col, this->ntt, row_overflows, col_overflows, sim_overflows, jumps );
289 #ifndef NDEBUG
290  std::cout << jumps << " row jumps found." << std::endl;
291  std::cout << row_overflows << " exclusive row overflows found." << std::endl;
292  std::cout << col_overflows << " exclusive column overflows found." << std::endl;
293  std::cout << sim_overflows << " simultaneous row/column overflows found." << std::endl;
294  std::cout << "Total array memory usage: "
295  << memoryUsage( this->nnz, jumps, row_overflows, col_overflows, sim_overflows )
296  << " bytes." << std::endl;
297 #endif
298 
299  //allocate arrays
300  const size_t mask1size = (this->nnz+1)/8 + (((this->nnz+1)%8) > 0 ? 1 : 0) + 1;
301  const size_t mask2size = jumps / 8 + ((jumps % 8) > 0 ? 1 : 0 ) + 1;
302  mask1 = new unsigned char[ mask1size ];
303  mask2 = new unsigned char[ mask2size ];
304  r_start = new _master_i_value[ row_overflows + sim_overflows + 1 ];
305  c_start = new _master_j_value[ col_overflows + sim_overflows + 2 ];
306  r_inc = new _i_value[ jumps - row_overflows - sim_overflows ];
307  c_inc = new _j_value[ this->nnz - col_overflows - sim_overflows - 1 ];
308  vals = new _t_value[ this->nnz ];
309 
310  //remember memory usage
311  bytes = sizeof( unsigned char ) * ( mask1size + mask2size );
312  bytes += sizeof( _master_i_value ) * ( row_overflows + col_overflows + 2 * sim_overflows + 3 );
313  bytes += sizeof( _i_value ) * ( jumps - row_overflows - sim_overflows );
314  bytes += sizeof( _j_value ) * ( this->nnz - col_overflows - sim_overflows - 1 );
315  bytes += sizeof( _t_value ) * this->nnz;
316 
317  //copy nonzero values
318  for( unsigned long int i=0; i<this->nnz; ++i ) vals[i] = val[i];
319 
320  //fill initial values
321  r_start[ 0 ] = (_master_i_value)row[ 0 ];
322  ULI prevrow = row[ 0 ];
323  c_start[ 0 ] = (_master_j_value)col[ 0 ];
324  ULI prevcol = col[ 0 ];
325 
326 #ifdef _DEBUG
327  std::cout << "r_start: " << r_start[0] << std::endl;
328  std::cout << "c_start: " << c_start[0] << std::endl;
329 #endif
330 
331  unsigned long int cincc = 0;
332  unsigned long int rincc = 0;
333  unsigned long int cstartc = 1;
334  unsigned long int rstartc = 1;
335  unsigned long int mask2c = 0;
336  this->mask1[ 0 ] = 1;
337  for( unsigned long int i=1; i<this->nnz; i++ ) {
338  if( i%8 == 0 ) this->mask1[ i/8 ] = 0;
339  if( mask2c%8 == 0 ) this->mask2[ mask2c/8 ] = 0;
340 
341  //different handling upon row changes
342  if( row[ i ] != prevrow ) {
343  if( static_cast< unsigned long int >( col[ i ] + ntt - prevcol ) > col_max ) {
344  assert( cstartc < col_overflows + sim_overflows + 1 );
345  this->c_start[ cstartc++ ] = col[ i ] - prevcol + ntt;
346  this->mask1[ i/8 ] |= ((unsigned char)1)<<(i%8);
347  } else {
348  assert( cincc < this->nnz );
349  this->c_inc[ cincc++ ] = col[ i ] - prevcol + ntt;
350  }
351  if( row[ i ] > prevrow ) {
352  if( row[ i ] - prevrow > row_max ) {
353  assert( rstartc < row_overflows + sim_overflows + 1 );
354  this->r_start[ rstartc++ ] = row[ i ] - prevrow;
355  this->mask2[ mask2c/8 ] |= ((unsigned char)1)<<(mask2c%8);mask2c++;
356  } else {
357  assert( rincc < jumps - row_overflows );
358  this->r_inc[ rincc++ ] = row[ i ] - prevrow;
359  mask2c++;
360  }
361  } else {
362  if( prevrow - row[ i ] > row_max ) {
363  assert( rstartc < row_overflows + sim_overflows + 1 );
364  this->r_start[ rstartc++ ] = row[ i ] - prevrow;
365  this->mask2[ mask2c/8 ] |= ((unsigned char)1)<<(mask2c%8);mask2c++;
366  } else {
367  assert( rincc < jumps - row_overflows );
368  this->r_inc[ rincc++ ] = row[ i ] - prevrow;
369  mask2c++;
370  }
371  }
372  prevrow = row[ i ];
373  prevcol = col[ i ];
374 #ifdef _DEBUG
375  std::cout << i << ", (" << prevrow << "," << prevcol << ")), column increment = " <<
376  ( this->mask1[ i/8 ] & ((unsigned char)1)<<(i%8) ? this->c_start[ cstartc-1 ] : this->c_inc[ cincc-1 ] ) <<
377  ", row increment " << ( this->mask2[ (mask2c-1)/8 ] & ((unsigned char)1)<<((mask2c-1)%8) ?
378  this->r_start[ rstartc-1 ] : this->r_inc[ rincc-1 ] ) << ", mask1 index " <<
379  (i/8) << "(" << ( (this->mask1[ i/8 ] & ((unsigned char)1)<<(i%8)) > 0 ) << ")" << std::endl;
380 #endif
381  continue;
382  }
383 
384  //detect normal column overflow
385  if( col[ i ] > prevcol ) {
386  if( col[ i ] - prevcol > col_max ) {
387  assert( cstartc < col_overflows + sim_overflows + 1 );
388  this->c_start[ cstartc++ ] = col[ i ] - prevcol;
389  this->mask1[ i/8 ] |= ((unsigned char)1)<<(i%8);
390  } else {
391  assert( cincc < this->nnz );
392  this->c_inc[ cincc++ ] = col[ i ] - prevcol;
393  }
394  } else if( prevcol > col[ i ] ) {
395  if( prevcol - col[ i ] > col_max ) {
396  assert( cstartc < col_overflows + sim_overflows + 1 );
397  this->c_start[ cstartc++ ] = col[ i ] - prevcol;
398  this->mask1[ i/8 ] |= ((unsigned char)1)<<(i%8);
399  } else {
400  assert( cincc < this->nnz );
401  this->c_inc[ cincc++ ] = col[ i ] - prevcol;
402  }
403  }
404  prevcol = col[ i ];
405 #ifdef _DEBUG
406  std::cout << i << ", (" << prevrow << "," << prevcol << "), column increment = " <<
407  ( this->mask1[ i/8 ] & ((unsigned char)1)<<(i%8) ? this->c_start[ cstartc-1 ] : this->c_inc[ cincc-1 ] ) <<
408  ", row increment " << ( this->mask2[ (mask2c-1)/8 ] & ((unsigned char)1)<<((mask2c-1)%8) ?
409  this->r_start[ rstartc-1 ] : this->r_inc[ rincc-1 ] ) << ", mask1 index = " << (i/8) <<
410  "(" << ( (this->mask1[ i/8 ] & ((unsigned char)1)<<(i%8)) > 0 ) << ")" << std::endl;
411 #endif
412  }
413  assert( cincc == this->nnz - col_overflows - sim_overflows - 1 );
414  assert( rincc == jumps - row_overflows - sim_overflows );
415  assert( cstartc == col_overflows + sim_overflows + 1 ); //last one is added below
416  assert( rstartc == row_overflows + sim_overflows + 1 );
417  assert( mask2c == jumps );
418 
419  //force last overflow
420  c_start[ col_overflows + sim_overflows + 1 ] = ntt;
421  this->mask1[ this->nnz/8 ] |= ((unsigned char)1)<<(this->nnz%8);
422 
423  //check number of overflow masks
424  assert( this->mask1[0] & 1 );
425  assert( this->mask1[this->nnz/8] & ((unsigned char)1<<(this->nnz%8)) );
426  unsigned long int mask1c = 0;
427  for( unsigned long int k=0; k<=this->nnz; k++ )
428  if( this->mask1[ k/8 ] & ((unsigned char)1<<(k%8)) )
429  mask1c++;
430  assert( mask1c == col_overflows + sim_overflows + 2 );
431  assert( this->nnz+1-mask1c == this->nnz - col_overflows - sim_overflows - 1 );
432 #ifdef _DEBUG
433  std::cout << "Construction done." << std::endl;
434 #endif
435  }
436 
438  virtual void getFirstIndexPair( ULI &row, ULI &col ) {
439  row = (ULI)(this->r_start[ 0 ]);
440  col = (ULI)(this->c_start[ 0 ]);
441  }
442 
448  virtual void zxa( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
449  unsigned char *__restrict__ mask1_p = this->mask1;
450  unsigned char *__restrict__ mask2_p = this->mask2;
451  _master_i_value *__restrict__ r_start_p = r_start;
452  _master_j_value *__restrict__ c_start_p = c_start;
453  _i_value *__restrict__ r_inc_p = r_inc;
454  _j_value *__restrict__ c_inc_p = c_inc;
455  _t_value *__restrict__ v_p = vals;
456  char maskc1 = 0;
457  char maskc2 = 0;
458 
459 #ifndef NDEBUG
460  const _t_value * const x = x_p;
461  const _t_value * const x_end = x+this->nor;
462 #endif
463  const _t_value * const y = y_p;
464  const _t_value * const y_end = y+this->noc;
465  const _t_value * const v_end = vals+this->nnz;
466 
467  x_p += *r_start_p++;
468  while( v_p < v_end ) {
469  assert( y_p >= y );
470  assert( y_p < y_end );
471  assert( v_p >= vals );
472  assert( v_p < v_end );
473  assert( x_p >= x );
474  assert( x_p < x_end );
475  assert( c_inc_p >= c_inc );
476  assert( r_inc_p >= r_inc );
477  assert( mask1_p < this->mask1 + (this->nnz/8 + (this->nnz%8==0 ? 0 : 1)) );
478  assert( mask1_p >= this->mask1 );
479  assert( maskc1 == (v_p-vals) % 8 );
480  assert( mask1_p == &(this->mask1[ (v_p-vals)/8 ]) );
481 
482  if( *mask1_p & ((unsigned char)1<<maskc1) ) {
483 #ifdef TRACE
484  std::cout << "Overflowed column increment is " << *c_start_p << std::endl;
485 #endif
486  y_p += *c_start_p++;
487  } else {
488 #ifdef TRACE
489  std::cout << "Compressed column increment is " << *c_inc_p << std::endl;
490 #endif
491  y_p += *c_inc_p++;
492  }
493  if( ++maskc1 == 8 ) {
494  maskc1 = 0;
495  mask1_p++;
496  }
497  if( y_p >= y_end ) {
498 #ifdef TRACE
499  std::cout << (y_p-y) << " > " << this->noc << " so performing a row jump." << std::endl;
500 #endif
501  if( *mask2_p & ((unsigned char)1<<maskc2) ) {
502  x_p += *r_start_p++;
503  } else {
504  x_p += *r_inc_p++;
505  }
506  if( ++maskc2 == 8 ) {
507  maskc2 = 0;
508  mask2_p++;
509  }
510  y_p -= ntt;
511  }
512 #ifdef TRACE
513  assert( mask1_p == &(this->mask1[ (v_p - vals + 1)/8 ]) );
514  std::cout << (v_p-vals) << ", position: " << (x_p-x) << "(<=" << (this->noc) << ") by " << (y_p-y) << "(<=" << (this->nor) << "), mask1 index is " << (mask1_p-this->mask1) << ", mask1 was " << ((this->mask1[(v_p-vals)/8]&(unsigned char)1<<((v_p-vals)%8))>0) << std::endl;
515 #endif
516  *y_p += *v_p++ * *x_p;
517  }
518  }
519 
525  virtual void zax( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
526  const _t_value * const x = x_p;
527  unsigned char *__restrict__ mask1_p = this->mask1;
528  unsigned char *__restrict__ mask2_p = this->mask2;
529  _master_i_value *__restrict__ r_start_p = r_start;
530  _master_j_value *__restrict__ c_start_p = c_start;
531  _i_value *__restrict__ r_inc_p = r_inc;
532  _j_value *__restrict__ c_inc_p = c_inc;
533  _t_value *__restrict__ v_p = vals;
534  /* CODE SWITCH, non-unrolled version: */
535  char maskc1 = 0;
536  char maskc2 = 0;
537  unsigned char tmask1 = *mask1_p++;
538  unsigned char tmask2 = *mask2_p++;
539  //END, non-unrolled version */
540  /* CODE SWITCH, unrolled version:
541  char maskc2 = 1;
542  unsigned char tmask1;
543  unsigned char tmask2 = *mask2_p++;
544  //END, unrolled version */
545 
546 #ifndef NDEBUG
547  const _t_value * const y = y_p;
548  const _t_value * const y_end = y+this->nor;
549 #endif
550  const _t_value * const x_end = x+this->noc;
551  const _t_value * const v_end = vals+this->nnz;
552 
553  y_p += *r_start_p++;
554  while( v_p < v_end ) {
555  assert( y_p >= y );
556  assert( y_p < y_end );
557  assert( v_p >= vals );
558  assert( v_p < v_end );
559  assert( x_p >= x );
560  assert( x_p < x_end );
561  assert( c_inc_p >= c_inc );
562  assert( r_inc_p >= r_inc );
563  if ( mask1_p > this->mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) ) {
564  std::cout << "Mask1 is at start position for index " << (mask1_p-this->mask1)*8 <<
565  " of " << this->nnz << ", maskc1=" << (int)maskc1 << std::endl;
566  }
567  assert( mask1_p <= this->mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) );
568  assert( mask1_p >= this->mask1 );
569 
570  /* CODE SWITCH, non-unrolled version: */
571  if( tmask1 & 1 ) {
572 #ifdef TRACE
573  if ( mask1_p >=this->mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) )
574  std::cout << "Overflowed column increment is " << (int)(*c_start_p) << std::endl;
575 #endif
576  x_p += *c_start_p++;
577  } else {
578 #ifdef TRACE
579  if ( mask1_p >=this->mask1 + ((this->nnz+1)/8 + ((this->nnz+1)%8==0 ? 0 : 1)) )
580  std::cout << "Compressed column increment is " << (int)(*c_inc_p) << std::endl;
581 #endif
582  x_p += *c_inc_p++;
583  }
584  tmask1 >>= 1;
585  if( ++maskc1 == 8 ) {
586  maskc1 = 0;
587  tmask1 = *mask1_p++;
588  }
589  if( x_p >= x_end ) {
590 #ifdef TRACE
591  std::cout << (x_p-x) << " > " << this->noc << " so performing a row jump." << std::endl;
592 #endif
593  if( tmask2 & 1 ) {
594  y_p += *r_start_p++;
595  } else {
596  y_p += *r_inc_p++;
597  }
598  tmask2 >>= 1;
599  if( ++maskc2 == 8 ) {
600  maskc2 = 0;
601  tmask2 = *mask2_p++;
602  }
603  x_p -= ntt;
604  }
605 #ifdef TRACE
606  std::cout << (v_p-vals) << ", position: " << (y_p-y) << "(<=" << (this->nor) << ") by " << (x_p-x) << "(<=" << (this->noc) << "), mask1 index is " << (mask1_p-this->mask1) << std::endl;
607 #endif
608  *y_p += *v_p++ * *x_p;
609  // END, non-unrolled version. */
610  /* CODE SWITCH, unrolled version:
611  tmask1 = *mask1_p++;
612  // 1
613  if( tmask1 & 1 )
614  x_p += *c_start_p++;
615  else
616  x_p += *c_inc_p++;
617  if( x_p >= x_end ) {
618  if( tmask2 & maskc2 )
619  y_p += *r_start_p++;
620  else
621  y_p += *r_inc_p++;
622  if( maskc2 == 128 ) {
623  tmask2 = *mask2_p++;
624  maskc2 = 1;
625  } else
626  maskc2 *= 2;
627  x_p -= ntt;
628  }
629  std::cout << (v_p-vals) << ", position: " << (y_p-y) << "(<=" << (this->nor) << ") by " << (x_p-x) << "(<=" << (this->noc) << "), mask1 index is " << (mask1_p-this->mask1) << std::endl;
630  *y_p += *v_p++ * *x_p;
631  if( v_p >= v_end ) break;
632  // 2
633  if( tmask1 & 2 )
634  x_p += *c_start_p++;
635  else
636  x_p += *c_inc_p++;
637  if( x_p >= x_end ) {
638  if( tmask2 & maskc2 )
639  y_p += *r_start_p++;
640  else
641  y_p += *r_inc_p++;
642  if( maskc2 == 128 ) {
643  tmask2 = *mask2_p++;
644  maskc2 = 1;
645  } else
646  maskc2 *= 2;
647  x_p -= ntt;
648  }
649  *y_p += *v_p++ * *x_p;
650  if( v_p >= v_end ) break;
651  // 4
652  if( tmask1 & 4 )
653  x_p += *c_start_p++;
654  else
655  x_p += *c_inc_p++;
656  if( x_p >= x_end ) {
657  if( tmask2 & maskc2 )
658  y_p += *r_start_p++;
659  else
660  y_p += *r_inc_p++;
661  if( maskc2 == 128 ) {
662  tmask2 = *mask2_p++;
663  maskc2 = 1;
664  } else
665  maskc2 *= 2;
666  x_p -= ntt;
667  }
668  *y_p += *v_p++ * *x_p;
669  if( v_p >= v_end ) break;
670  // 8
671  if( tmask1 & 8 )
672  x_p += *c_start_p++;
673  else
674  x_p += *c_inc_p++;
675  if( x_p >= x_end ) {
676  if( tmask2 & maskc2 )
677  y_p += *r_start_p++;
678  else
679  y_p += *r_inc_p++;
680  if( maskc2 == 128 ) {
681  tmask2 = *mask2_p++;
682  maskc2 = 1;
683  } else
684  maskc2 *= 2;
685  x_p -= ntt;
686  }
687  *y_p += *v_p++ * *x_p;
688  if( v_p >= v_end ) break;
689  // 16
690  if( tmask1 & 16 )
691  x_p += *c_start_p++;
692  else
693  x_p += *c_inc_p++;
694  if( x_p >= x_end ) {
695  if( tmask2 & maskc2 )
696  y_p += *r_start_p++;
697  else
698  y_p += *r_inc_p++;
699  if( maskc2 == 128 ) {
700  tmask2 = *mask2_p++;
701  maskc2 = 1;
702  } else
703  maskc2 *= 2;
704  x_p -= ntt;
705  }
706  *y_p += *v_p++ * *x_p;
707  if( v_p >= v_end ) break;
708  // 32
709  if( tmask1 & 32 )
710  x_p += *c_start_p++;
711  else
712  x_p += *c_inc_p++;
713  if( x_p >= x_end ) {
714  if( tmask2 & maskc2 )
715  y_p += *r_start_p++;
716  else
717  y_p += *r_inc_p++;
718  if( maskc2 == 128 ) {
719  tmask2 = *mask2_p++;
720  maskc2 = 1;
721  } else
722  maskc2 *= 2;
723  x_p -= ntt;
724  }
725  *y_p += *v_p++ * *x_p;
726  if( v_p >= v_end ) break;
727  // 64
728  if( tmask1 & 64 )
729  x_p += *c_start_p++;
730  else
731  x_p += *c_inc_p++;
732  if( x_p >= x_end ) {
733  if( tmask2 & maskc2 )
734  y_p += *r_start_p++;
735  else
736  y_p += *r_inc_p++;
737  if( maskc2 == 128 ) {
738  tmask2 = *mask2_p++;
739  maskc2 = 1;
740  } else
741  maskc2 *= 2;
742  x_p -= ntt;
743  }
744  *y_p += *v_p++ * *x_p;
745  if( v_p >= v_end ) break;
746  // 128
747  if( tmask1 & 128 )
748  x_p += *c_start_p++;
749  else
750  x_p += *c_inc_p++;
751  if( x_p >= x_end ) {
752  if( tmask2 & maskc2 )
753  y_p += *r_start_p++;
754  else
755  y_p += *r_inc_p++;
756  if( maskc2 == 128 ) {
757  tmask2 = *mask2_p++;
758  maskc2 = 1;
759  } else
760  maskc2 *= 2;
761  x_p -= ntt;
762  }
763  *y_p += *v_p++ * *x_p;
764  //END, unrolled version */
765  }
766  }
767 
768  virtual size_t bytesUsed() {
769  return bytes;
770  }
771 };
772 
773 
774 #endif
775 
776 
777 #ifndef _H_CBICRS_FACTORY
778 #define _H_CBICRS_FACTORY
779 
788 template< typename _t_value >
790 
791  protected:
792 
794  template< typename _master_i_value, typename _master_j_value, typename _i_value, typename _j_value >
795  static void investigateCCS( const std::string tn, std::vector< Triplet< _t_value > > input,
796  unsigned long int m, unsigned long int n, unsigned long int &usage, unsigned long int &zle_usage ) {
797  ULI nz = input.size();
798  ULI* row = new ULI[ nz ];
799  ULI* col = new ULI[ nz ];
800  unsigned long int c = 0;
801  typename std::vector< Triplet< _t_value > >::iterator it = input.begin();
802  for( ; it!=input.end(); it++, c++ ) {
803  row[ c ] = (*it).i();
804  col[ c ] = (*it).j();
805  }
807  delete [] row;
808  delete [] col;
809 
810 #ifndef NDEBUG
811  std::cout << "Total array (" << tn << ") memory usage: " << usage << " bytes." << std::endl;
812 #endif
813  zle_usage = usage + 1;
814 #ifndef NDEBUG
815  std::cout << "Warning: no implementation for ZLE CBICRS yet, so no estimate is given!" << std::endl;
816 #endif
817  }
818 
820  template< typename _master_i_value, typename _master_j_value, typename _i_value, typename _j_value >
821  static void investigate( const std::string tn, std::vector< Triplet< _t_value > > triplets,
822  ULI m, ULI n, unsigned long int &usage, unsigned long int &zle_usage ) {
824 #ifndef NDEBUG
825  std::cout << "Total array (" << tn << ") memory usage: " << usage << " bytes." << std::endl;
826 #endif
827  zle_usage = usage + 1;
828 #ifndef NDEBUG
829  std::cout << "Warning: no implementation for ZLE CBICRS yet, so no estimate is given!" << std::endl;
830 #endif
831  }
832 
833  public:
834 
842  static Matrix< _t_value >* getCBICRS( std::string file, _t_value zero = 0 ) {
843  ULI m, n;
844  std::vector< Triplet< double > > triplets = FileToVT::parse( file, m, n );
845  return getCBICRS( triplets, m, n, zero );
846  }
847 
855  static Matrix< _t_value >* getCBICCS( std::string file, _t_value zero = 0 ) {
856  ULI m, n;
857  std::vector< Triplet< double > > triplets = FileToVT::parse( file, m, n );
858  return getCBICCS( triplets, m, n, zero );
859  }
860 
870  static Matrix< _t_value >* getCBICRS( std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero = 0 ) {
871  unsigned int rowbit = log2( m );
872  if( ((unsigned long int)1)<<rowbit < m ) rowbit++;
873  unsigned int colbit = log2( n );
874  if( ((unsigned long int)1)<<colbit < n ) colbit++;
875  std::cout << "Finding optimal expected index type." << std::endl;
876  unsigned long int usage, zle_usage;
877  /*if( rowbit > 31 ) {
878  if( colbit > 31 )
879  investigate< signed long int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
880  else if( colbit > 15 )
881  investigate< signed long int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
882  else if( colbit > 7 )
883  investigate< signed long int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
884  else
885  investigate< signed long int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
886  } else if( rowbit > 15 ) {
887  if( colbit > 31 )
888  investigate< signed int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
889  else if( colbit > 15 )
890  investigate< signed int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
891  else if( colbit > 7 )
892  investigate< signed int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
893  else
894  investigate< signed int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
895  } else if( rowbit > 7 ) {
896  if( colbit > 31 )
897  investigate< signed int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
898  else if( colbit > 15 )
899  investigate< signed int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
900  else if( colbit > 7 )
901  investigate< signed int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
902  else
903  investigate< signed int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
904  } else {
905  if( colbit > 31 )
906  investigate< signed char, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
907  else if( colbit > 15 )
908  investigate< signed char, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
909  else if( colbit > 7 )
910  investigate< signed char, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
911  else
912  investigate< signed char, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
913  }*/
914  investigate< signed long int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
915 
916  char choice = 0;
917  bool zle = zle_usage < usage;
918  unsigned long int min = zle ? zle_usage : usage;
919  investigate< signed long int, signed long int, signed short int, signed short int >( "short int", triplets, m, n, usage, zle_usage );
920  unsigned long int curmin = zle_usage < usage ? zle_usage : usage;
921  if( curmin < min ) {
922  choice = 1;
923  zle = zle_usage < usage;
924  min = curmin;
925  }
926  investigate< signed long int, signed long int, signed int, signed int >( "int", triplets, m, n, usage, zle_usage );
927  curmin = zle_usage < usage ? zle_usage : usage;
928  if( curmin < min ) {
929  choice = 2;
930  zle = zle_usage < usage;
931  min = curmin;
932  }
933  investigate< signed long int, signed long int, signed long int, signed long int >( "long int", triplets, m, n, usage, zle_usage );
934  curmin = zle_usage < usage ? zle_usage : usage;
935  if( curmin < min ) {
936  choice = 3;
937  zle = zle_usage < usage;
938  min = curmin;
939  }
940  switch( choice ) {
941  case 0:
942  std::cout << "Selecting `signed char' datatype";
943  if( zle ) {
944  std::cout << ", with ZLE" << std::endl;
945  } else {
946  std::cout << ", without ZLE";
947  if( 2*m < (((unsigned long int)1)<<(sizeof(signed char)*8-1))-1 &&
948  n < (((unsigned long int)1)<<(sizeof(signed char)*8-1))-1 ) {
949  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
950  return new BICRS< _t_value, signed char >( triplets, m, n, zero );
951  } // else
952  std::cout << std::endl;
954  }
955  break;
956  case 1:
957  std::cout << "Selecting `signed short int' datatype";
958  if( zle ) {
959  std::cout << ", with ZLE" << std::endl;
960  } else {
961  std::cout << ", without ZLE";
962  if( 2*m < (((unsigned long int)1)<<(sizeof(signed short int)*8-1))-1 &&
963  n < (((unsigned long int)1)<<(sizeof(signed short int)*8-1))-1 ) {
964  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
965  return new BICRS< _t_value, signed short int >( triplets, m, n, zero );
966  } // else
967  std::cout << std::endl;
969  }
970  break;
971  case 2:
972  std::cout << "Selecting `signed int' datatype";
973  if( zle ) {
974  std::cout << ", with ZLE" << std::endl;
975  } else {
976  std::cout << ", without ZLE";
977  if( 2*m < (((unsigned long int)1)<<(sizeof(signed int)*8-1))-1 &&
978  n < (((unsigned long int)1)<<(sizeof(signed int)*8-1))-1 ) {
979  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
980  return new BICRS< _t_value, signed int >( triplets, m, n, zero );
981  } // else
982  std::cout << std::endl;
984  }
985  break;
986  case 3:
987  std::cout << "Selecting `signed long int' datatype";
988  if( zle ) {
989  std::cout << ", with ZLE" << std::endl;
990  } else {
991  std::cout << ", without ZLE";
992  if( 2*m < (((unsigned long int)1)<<(sizeof(signed long int)*8-1))-1 &&
993  n < (((unsigned long int)1)<<(sizeof(signed long int)*8-1))-1 ) {
994  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
995  return new BICRS< _t_value, signed long int >( triplets, m, n, zero );
996  } // else
997  std::cout << std::endl;
999  }
1000  break;
1001  default:
1002  std::cerr << "Error in tuning, invalid data type selected (" << choice << ")!" << std::endl;
1003  exit( EXIT_FAILURE );
1004  }
1005  std::cerr << "CBICRS not yet implemented!" << std::endl;
1006  exit( EXIT_FAILURE );
1007  }
1008 
1018  static Matrix< _t_value >* getCBICCS( std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero = 0 ) {
1019  unsigned int rowbit = log2( m );
1020  if( ((unsigned long int)1)<<rowbit < m ) rowbit++;
1021  unsigned int colbit = log2( n );
1022  if( ((unsigned long int)1)<<colbit < n ) colbit++;
1023  std::cout << "Finding optimal expected index type." << std::endl;
1024  unsigned long int usage, zle_usage;
1025  /*if( rowbit > 31 ) {
1026  if( colbit > 31 )
1027  investigateCCS< signed long int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1028  else if( colbit > 15 )
1029  investigateCCS< signed long int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1030  else if( colbit > 7 )
1031  investigateCCS< signed long int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1032  else
1033  investigateCCS< signed long int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1034  } else if( rowbit > 15 ) {
1035  if( colbit > 31 )
1036  investigateCCS< signed int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1037  else if( colbit > 15 )
1038  investigateCCS< signed int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1039  else if( colbit > 7 )
1040  investigateCCS< signed int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1041  else
1042  investigateCCS< signed int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1043  } else if( rowbit > 7 ) {
1044  if( colbit > 31 )
1045  investigateCCS< signed int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1046  else if( colbit > 15 )
1047  investigateCCS< signed int, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1048  else if( colbit > 7 )
1049  investigateCCS< signed int, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1050  else
1051  investigateCCS< signed int, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1052  } else {
1053  if( colbit > 31 )
1054  investigateCCS< signed char, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1055  else if( colbit > 15 )
1056  investigateCCS< signed char, signed int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1057  else if( colbit > 7 )
1058  investigateCCS< signed char, signed short int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1059  else
1060  investigateCCS< signed char, signed char, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1061  }*/
1062  investigateCCS< signed long int, signed long int, signed char, signed char >( "signed char", triplets, m, n, usage, zle_usage );
1063 
1064  char choice = 0;
1065  bool zle = zle_usage < usage;
1066  unsigned long int min = zle ? zle_usage : usage;
1067  investigateCCS< signed long int, signed long int, signed short int, signed short int >( "short int", triplets, m, n, usage, zle_usage );
1068  unsigned long int curmin = zle_usage < usage ? zle_usage : usage;
1069  if( curmin < min ) {
1070  choice = 1;
1071  zle = zle_usage < usage;
1072  min = curmin;
1073  }
1074  investigateCCS< signed long int, signed long int, signed int, signed int >( "int", triplets, m, n, usage, zle_usage );
1075  curmin = zle_usage < usage ? zle_usage : usage;
1076  if( curmin < min ) {
1077  choice = 2;
1078  zle = zle_usage < usage;
1079  min = curmin;
1080  }
1081  investigateCCS< signed long int, signed long int, signed long int, signed long int >( "long int", triplets, m, n, usage, zle_usage );
1082  curmin = zle_usage < usage ? zle_usage : usage;
1083  if( curmin < min ) {
1084  choice = 3;
1085  zle = zle_usage < usage;
1086  min = curmin;
1087  }
1088  switch( choice ) {
1089  case 0:
1090  std::cout << "Selecting `signed char' datatype";
1091  if( zle ) {
1092  std::cout << ", with ZLE" << std::endl;
1093  } else {
1094  std::cout << ", without ZLE";
1095  if( 2*m < (((unsigned long int)1)<<(sizeof(signed char)*8-1))-1 &&
1096  n < (((unsigned long int)1)<<(sizeof(signed char)*8-1))-1 ) {
1097  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1098  return new CCSWrapper< _t_value, BICRS< _t_value, signed char >, ULI >( triplets, m, n, zero );
1099  } // else
1100  std::cout << std::endl;
1102  }
1103  break;
1104  case 1:
1105  std::cout << "Selecting `signed short int' datatype";
1106  if( zle ) {
1107  std::cout << ", with ZLE" << std::endl;
1108  } else {
1109  std::cout << ", without ZLE";
1110  if( 2*m < (((unsigned long int)1)<<(sizeof(signed short int)*8-1))-1 &&
1111  n < (((unsigned long int)1)<<(sizeof(signed short int)*8-1))-1 ) {
1112  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1113  return new CCSWrapper< _t_value, BICRS< _t_value, signed short int >, ULI >( triplets, m, n, zero );
1114  } // else
1115  std::cout << std::endl;
1117  }
1118  break;
1119  case 2:
1120  std::cout << "Selecting `signed int' datatype";
1121  if( zle ) {
1122  std::cout << ", with ZLE" << std::endl;
1123  } else {
1124  std::cout << ", without ZLE";
1125  if( 2*m < (((unsigned long int)1)<<(sizeof(signed int)*8-1))-1 &&
1126  n < (((unsigned long int)1)<<(sizeof(signed int)*8-1))-1 ) {
1127  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1128  return new CCSWrapper< _t_value, BICRS< _t_value, signed int >, ULI >( triplets, m, n, zero );
1129  } // else
1130  std::cout << std::endl;
1132  }
1133  break;
1134  case 3:
1135  std::cout << "Selecting `signed long int' datatype";
1136  if( zle ) {
1137  std::cout << ", with ZLE" << std::endl;
1138  } else {
1139  std::cout << ", without ZLE";
1140  if( 2*m < (((unsigned long int)1)<<(sizeof(signed long int)*8-1))-1 &&
1141  n < (((unsigned long int)1)<<(sizeof(signed long int)*8-1))-1 ) {
1142  std::cout << "; matrix dimensions are small enough; reverting to plain BICRS." << std::endl;
1143  return new CCSWrapper< _t_value, BICRS< _t_value, signed long int > , ULI>( triplets, m, n, zero );
1144  } // else
1145  std::cout << std::endl;
1147  }
1148  break;
1149  default:
1150  std::cerr << "Error in tuning, invalid data type selected (" << choice << ")!" << std::endl;
1151  exit( EXIT_FAILURE );
1152  }
1153  std::cerr << "CBICRS not yet implemented!" << std::endl;
1154  exit( EXIT_FAILURE );
1155  }
1156 
1157 };
1158 
1159 #endif
1160 
ULI nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
static std::vector< Triplet< double > > parse(std::string filename)
Parses a matrix-market input file.
Definition: FileToVT.cpp:36
CBICRS(std::string file, _t_value zero=0)
Base constructor.
Definition: CBICRS.hpp:222
File created by: A.
Automatically transforms a row-major scheme into an column-major scheme.
Definition: CCSWrapper.hpp:49
_master_i_value * c_start
Stores the column chunk start increments; size is the number of nonzeros plus one.
Definition: CBICRS.hpp:78
unsigned char * mask2
Bitmask used for switching between r_start and r_ind.
Definition: CBICRS.hpp:90
_i_value * r_inc
Stores the row jumps; size is the number of nonzeros plus 2.
Definition: CBICRS.hpp:81
Bi-directional Incremental Compressed Row Storage scheme.
Definition: BICRS.hpp:58
static Matrix< _t_value > * getCBICCS(std::string file, _t_value zero=0)
Factory function for column-based compressed BICRS functions, file-based.
Definition: CBICRS.hpp:855
Compressed Bi-directional Incremental Compressed Row Storage (BICRS) scheme.
Definition: CBICRS.hpp:70
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
unsigned char * mask1
Bitmask used for switching between c_start and c_ind.
Definition: CBICRS.hpp:87
_i_value * c_inc
Stores the column jumps; size is exactly the number of nonzeros.
Definition: CBICRS.hpp:84
static unsigned long int memoryUsage(const ULI nnz, const ULI jumps, const ULI row_o, const ULI col_o, const ULI sim_o)
Estimates the number of bytes required by this data structure.
Definition: CBICRS.hpp:160
virtual void load(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero)
This function will rewrite the std::vector< Triplet > structure to one suitable for the other load fu...
Definition: CBICRS.hpp:254
static Matrix< _t_value > * getCBICRS(std::string file, _t_value zero=0)
Factory function for row-based compressed BICRS functions, file-based.
Definition: CBICRS.hpp:842
_master_j_value ntt
Caches n times two.
Definition: CBICRS.hpp:99
CBICRS(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero=0)
Base constructor.
Definition: CBICRS.hpp:244
void loadFromFile(const std::string file, const _t_valuezero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
virtual ~CBICRS()
Base deconstructor.
Definition: CBICRS.hpp:200
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
static unsigned long int getMemoryUsage(ULI *row, ULI *col, const ULI nz, const ULI m, const ULI n)
Calculates and returns the number of bytes used when employing this data structure.
Definition: CBICRS.hpp:173
static Matrix< _t_value > * getCBICCS(std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero=0)
Factory function for row-based compressed BICRS functions, Triplet-based.
Definition: CBICRS.hpp:1018
ULI noc
Number of columns.
Definition: SparseMatrix.hpp:55
virtual void zxa(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Calculates y=xA, but does not allocate y itself.
Definition: CBICRS.hpp:448
static void investigateCCS(const std::string tn, std::vector< Triplet< _t_value > > input, unsigned long int m, unsigned long int n, unsigned long int &usage, unsigned long int &zle_usage)
Used for auto-tunes the index type.
Definition: CBICRS.hpp:795
CBICRS(ULI *row, ULI *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Base constructor.
Definition: CBICRS.hpp:237
Factory for the Compressed Bi-directional Incremental Compressed Row Storage scheme.
Definition: CBICRS.hpp:789
ULI nor
Number of rows.
Definition: SparseMatrix.hpp:52
size_t bytes
Stores the number of bytes used for storage.
Definition: CBICRS.hpp:96
_t_value zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
virtual void getFirstIndexPair(ULI &row, ULI &col)
Returns the first nonzero index, per reference.
Definition: CBICRS.hpp:438
static Matrix< _t_value > * getCBICRS(std::vector< Triplet< _t_value > > &triplets, unsigned long int m, unsigned long int n, _t_value zero=0)
Factory function for row-based compressed BICRS functions, Triplet-based.
Definition: CBICRS.hpp:870
void load(ULI *row, ULI *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Definition: CBICRS.hpp:273
CBICRS()
Base constructor.
Definition: CBICRS.hpp:211
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
static void investigate(const std::string tn, std::vector< Triplet< _t_value > > triplets, ULI m, ULI n, unsigned long int &usage, unsigned long int &zle_usage)
Used for auto-tuning of the index type.
Definition: CBICRS.hpp:821
_master_i_value * r_start
Stores the row chunk start increments; size is the number of nonzeros plus one.
Definition: CBICRS.hpp:75
A single triplet value.
Definition: Triplet.hpp:52
_t_value * vals
Stores the values of the individual nonzeros.
Definition: CBICRS.hpp:93
virtual void zax(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Calculates y=Ax, but does not allocate y itself.
Definition: CBICRS.hpp:525
static void getNumberOfOverflows(const ULI nnz, ULI *const row, ULI *const col, const ULI ntt, ULI &row_overflows, ULI &col_overflows, ULI &sim_overflows, ULI &jumps)
Calculates the number of overflows given a triplet-form input.
Definition: CBICRS.hpp:113
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: CBICRS.hpp:768
static unsigned long int getMemoryUsage(std::vector< Triplet< _t_value > > &input, const ULI m, const ULI n)
Calculates and returns the number of bytes used when employing this data structure.
Definition: CBICRS.hpp:183