SparseLibrary  Version 1.6.0
FBICRS.hpp
1 /*
2  * Copyright (c) 2007-2014, A. N. Yzelman, Utrecht University 2007-2011;
3  * KU Leuven 2011-2014.
4  * R. H. Bisseling, Utrecht University 2007-2014.
5  *
6  * This file is part of the Sparse Library.
7  *
8  * This library was developed under supervision of Prof. dr. Rob H. Bisseling at
9  * Utrecht University, from 2007 until 2011. From 2011-2014, development continued
10  * at KU Leuven, where Prof. dr. Dirk Roose contributed significantly to the ideas
11  * behind the newer parts of the library code.
12  *
13  * The Sparse Library is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by the
15  * Free Software Foundation, either version 3 of the License, or (at your
16  * option) any later version.
17  *
18  * The Sparse Library is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21  * for more details.
22  *
23  * You should have received a copy of the GNU General Public License along
24  * with the Sparse Library. If not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 /*
29  * File created by:
30  * A. N. Yzelman, Dept. of Computer Science, KU Leuven, 2011.
31  */
32 
33 
34 #include "SparseMatrix.hpp"
35 #include "ICRS.hpp"
36 #include "CRS.hpp"
37 #include <assert.h>
38 #include <iostream>
39 #include <stdint.h>
40 
41 #ifndef _H_FBICRS
42 #define _H_FBICRS
43 
74 template< typename _t_value, typename _i_value=LI, typename _sub_ds = ICRS< _t_value, uint16_t >, unsigned char logBeta = 15 >
75 class FBICRS: public SparseMatrix< _t_value, _i_value > {
76 
77  protected:
78 
80  ULI r_start;
81 
83  ULI c_start;
84 
86  ULI r_end;
87 
89  ULI c_end;
90 
92  ULI jumps;
93 
95  _i_value* r_inc;
96 
98  _i_value* c_inc;
99 
100  public:
101 
103  static const ULI beta_m = 1l << (logBeta-1);
104 
106  static const ULI beta_n = beta_m;
107 
109  size_t fillIn;
110 
112  _sub_ds** dss;
113 
116 
118  virtual ~FBICRS() {
119  if( r_inc != NULL ) delete [] r_inc;
120  if( c_inc != NULL ) delete [] c_inc;
121  for( size_t i = 0; i < static_cast< size_t >(this->nnz); i++ ) {
122  if( dss[ i ] != NULL ) {
123  delete dss[ i ];
124  }
125  }
126  if( dss != NULL ) {
127  delete [] dss;
128  }
129  }
130 
132  FBICRS(): r_start( -1 ), c_start( -1 ), r_inc( NULL ), c_inc( NULL ), dss( NULL ), n_overflow( 0 ) {}
133 
135  FBICRS( std::string file, _t_value zero = 0 ) {
136  loadFromFile( file, zero );
137  }
138 
140  FBICRS( _i_value* row, _i_value* col, _t_value* val, ULI m, ULI n, ULI nz, _t_value zero ) {
141  this->load( row, col, val, m, n, nz, zero );
142  }
143 
145  FBICRS( std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero = 0 ) {
146  this->load( input, m, n, zero );
147  }
148 
150  FBICRS( std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero = 0 ) {
151  this->load( input, m, n, zero );
152  }
153 
162  virtual void load( std::vector< Triplet< _t_value > >& input, _i_value m, _i_value n, _t_value zero ) {
163  //flat loader
164  this->zero_element = zero;
165  this->nnz = 1;
166  this->nor = m;
167  this->noc = n;
168  this->n_overflow = n;
169  r_start = c_start = 0;
170  dss = new _sub_ds*[ 1 ];
171  if( static_cast< ULI >( m ) > beta_m || static_cast< ULI >( n ) > beta_n ) {
172  std::cerr << "Matrix is too large to fit into a flat FBICRS structure!" << std::endl;
173  exit( 1 );
174  }
175  dss[ 0 ] = new _sub_ds( input, m, n, zero );
176  r_inc = new _i_value[ 1 ];
177  c_inc = new _i_value[ 1 ];
178  c_inc[ 0 ] = this->n_overflow;
179  }
180 
192  void load( _i_value* row, _i_value* col, _t_value* val, ULI m, ULI n, ULI nz, _t_value zero ) {
193  //flat loader
194  std::vector< Triplet< _t_value > > packed;
195  for( unsigned long int i=0; i<nz; i++ ) {
196  packed.push_back( Triplet< _t_value >( row[i], col[i], val[i] ) );
197  }
198  load( packed, m, n, zero );
199  }
200 
209  void load( std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero ) {
210  //full hierarchical loader using fixed blocks
211  this->zero_element = zero;
212  this->nor = m;
213  this->noc = n;
214  this->n_overflow = n;
215  this->jumps = 0;
216 
217 #ifndef NDEBUG
218  unsigned long int cursum = 0;
219  for( unsigned long int i=0; i<input.size(); i++ ) cursum += input[ i ].size();
220  const unsigned long int local_nnz = cursum;
221 #endif
222 
223  //reconstruct input while ignoring empty subblocks
224  std::vector< ULI > startpos;
225  std::vector< std::vector< Triplet< _t_value > > > replace;
226  typename std::vector< std::vector< Triplet< _t_value > > >::iterator it = input.begin();
227  for( ; it != input.end(); ++it ) {
228  if( it->size() > 0 ) {
229  //put in CRS order
230  qsort( &((*it)[0]), it->size(), sizeof( Triplet< _t_value > ), &(ICRS< _t_value >::compareTriplets) );
231  //find lowest column index
232  ULI lowest_col = (*it)[0].j(); //initial guess
233  //full nonzero traversal
234  for( size_t i = 1; i<it->size(); i++ )
235  if( (*it)[ i ].j() < lowest_col )
236  lowest_col = (*it)[i].j();
237  //find lowest row index (note already CRS-ordered)
238  const ULI lowest_row = (*it)[0].i();
239  //get block start position
240  const ULI startpos_row = beta_m * ( lowest_row / beta_m );
241  const ULI startpos_col = beta_n * ( lowest_col / beta_n );
242  startpos.push_back( startpos_row );
243  startpos.push_back( startpos_col );
244 
245  //move blocks into a local view
246  std::vector< Triplet< _t_value > > toPush;
247  //reserve room
248  toPush.reserve( it->size() );
249  //copy
250  for( unsigned long int c = 0; c<it->size(); c++ ) {
251  const ULI new_i = (*it)[ c ].i() - startpos_row;
252  const ULI new_j = (*it)[ c ].j() - startpos_col;
253  assert( new_i < m );
254  assert( new_j < n );
255  toPush.push_back( Triplet< _t_value >( new_i, new_j, (*it)[ c ].value ) );
256  }
257  replace.push_back( toPush );
258  }
259  }
260 
261 #ifndef NDEBUG
262  //check for errors
263  for( size_t r = 0, c = 0, q = 0; r < replace.size(); ++r, ++q ) {
264  while( input[ q ].size() == 0 ) ++q;
265  assert( q < input.size() );
266  const ULI cur_block_i = startpos[ c++ ];
267  const ULI cur_block_j = startpos[ c++ ];
268  assert( replace[ r ].size() == input[ q ].size() );
269  for( size_t k = 0; k < replace[ r ].size(); ++k ) {
270  const ULI cur_i = cur_block_i + replace[ r ][ k ].i();
271  const ULI cur_j = cur_block_j + replace[ r ][ k ].j();
272  assert( cur_i == input[ q ][ k ].i() );
273  assert( cur_j == input[ q ][ k ].j() );
274  }
275  }
276 #endif
277 
278  //check for empty matrix
279  if( replace.size() == 0 ) {
280  r_inc = c_inc = NULL;
281  dss = NULL;
282  this->nnz = 0;
283  return;
284  }
285 
286  //not empty, continue
287  this->nnz = replace.size();
288 
289 #ifndef NDEBUG
290  cursum = 0;
291  for( size_t i=0; i<replace.size(); i++ ) cursum += replace[i].size();
292  assert( local_nnz == cursum );
293 #endif
294 
295  //initialise fillIn
296  fillIn = 0;
297  //fill dss
298  dss = new _sub_ds*[ this->nnz ];
299  for( size_t i = 0; i < static_cast< size_t >( this->nnz ); ++i ) {
300  //note this implicitly assumes the input is properly cut s.t.
301  //replace[i] indeed fits into a beta x beta matrix.
302  ULI msize = beta_m;
303  ULI nsize = beta_n;
304  //if this is the last block, my size might be smaller
305  //(the modulo check makes sure the last block is smaller than beta_m,
306  // and not exactly equal to it)
307  if( startpos[ 2*i ] == beta_m * ((m-1)/beta_m) && (m%beta_m) != 0 )
308  msize = (m%beta_m);
309  if( startpos[ 2*i+1 ] == beta_n * ((n-1)/beta_n) && (n%beta_n) != 0 )
310  nsize = (n%beta_n);
311 #ifndef NDEBUG
312  const ULI cur_block_i = startpos[ 2*i ];
313  const ULI cur_block_j = startpos[ 2*i+1 ];
314  for( size_t k = 0; k < replace[ i ].size(); ++k ) {
315  const ULI cur_i = cur_block_i + replace[ i ][ k ].i();
316  const ULI cur_j = cur_block_j + replace[ i ][ k ].j();
317  assert( cur_i < m );
318  assert( cur_j < n );
319  assert( replace[ i ][ k ].i() < msize );
320  assert( replace[ i ][ k ].j() < nsize );
321  }
322 #endif
323  dss[ i ] = new _sub_ds( replace[ i ], msize, nsize, zero );
324 
325  //update fillIn
326  fillIn += dss[ i ]->fillIn;
327  }
328 
329  //count row jumps
330  size_t prevrow, prevcol;
331  size_t currow, curcol;
332  size_t walk = 0;
333  prevrow = startpos[ walk++ ];
334  prevcol = startpos[ walk++ ];
335  for( size_t i = 1; i < replace.size(); i++ ) {
336  currow = startpos[ walk++ ];
337  curcol = startpos[ walk++ ];
338  if( currow != prevrow ) jumps++;
339  prevrow = currow;
340  }
341 #ifdef _DEBUG
342  std::cout << jumps << " upper-level jumps found." << std::endl;
343 #endif
344 
345  //allocate arrays
346  r_inc = new _i_value[ jumps + 1 ];
347  c_inc = new _i_value[ this->nnz ];
348 
349  //get starting position
350  walk = 0;
351  r_start = startpos[ walk++ ];
352  c_start = startpos[ walk++ ];
353 
354  //get ending position
355  c_end = startpos[ 2 * this->nnz - 1 ];
356  r_end = startpos[ 2 * this->nnz - 2 ];
357 
358  //set prev variables
359  prevrow = r_start;
360  prevcol = c_start;
361 
362  //and build the index arrays
363  ULI c = 0;
364  for( size_t i = 1; i < static_cast< size_t >(this->nnz); i++ ) {
365  currow = startpos[ walk++ ];
366  curcol = startpos[ walk++ ];
367  assert( currow < static_cast< size_t >(this->nor) );
368  assert( curcol < static_cast< size_t >(this->noc) );
369  this->c_inc[ i-1 ] = curcol - prevcol;
370  if( currow != prevrow ) {
371  this->c_inc[ i-1 ] += n_overflow;
372  this->r_inc[ c++ ] = currow - prevrow;
373  assert( this->r_inc[c-1] + prevrow == currow );
374  prevrow = currow;
375  }
376  assert( this->c_inc[i-1] + prevcol == curcol || this->c_inc[i-1] + prevcol - n_overflow == curcol );
377  prevcol = curcol;
378  }
379 
380  //set last c_inc value
381  this->c_inc[ this->nnz - 1 ] = n_overflow;
382 
383 #ifndef NDEBUG
384  //check starting positions, first one is given by r_start and c_start
385  ULI rowwalk = prevrow = r_start;
386  ULI colwalk = c_start;
387  walk = 0;
388  currow = startpos[ walk++ ];
389  curcol = startpos[ walk++ ];
390  assert( currow == rowwalk );
391  assert( curcol == colwalk );
392 
393  //now deduce and check other starting positions
394  c = 0;
395  for( size_t i = 1; i < static_cast< size_t >(this->nnz); ++i ) {
396  colwalk += this->c_inc[ i - 1 ];
397  if( colwalk >= n_overflow ) {
398  colwalk -= n_overflow;
399  rowwalk += this->r_inc[ c++ ];
400  }
401  assert( rowwalk < static_cast< size_t >(this->nor) );
402  assert( colwalk < static_cast< size_t >(this->noc) );
403  currow = startpos[ walk++ ];
404  curcol = startpos[ walk++ ];
405  assert( currow == rowwalk );
406  assert( curcol == colwalk );
407  }
408  assert( c == jumps );
409 
410  //now do check in reverse direction
411  rowwalk = prevrow = r_end;
412  colwalk = c_end;
413  walk = 2 * this->nnz - 1;
414  curcol = startpos[ walk-- ];
415  currow = startpos[ walk-- ];
416  assert( currow == rowwalk );
417  assert( curcol == colwalk );
418  c = jumps - 1;
419  for( size_t i = static_cast< size_t >(this->nnz) - 2; i < static_cast< size_t >(this->nnz) - 1; --i ) {
420  colwalk -= this->c_inc[ i ];
421  if( colwalk >= n_overflow ) {
422  colwalk += n_overflow;
423  rowwalk -= this->r_inc[ c-- ];
424  }
425  curcol = startpos[ walk-- ];
426  currow = startpos[ walk-- ];
427  assert( currow == rowwalk );
428  assert( curcol == colwalk );
429  }
430 #endif
431  }
432 
433  virtual void getFirstIndexPair( _i_value &row, _i_value &col ) {
434  row = this->r_start;
435  col = this->c_start;
436  }
437 
438  virtual void zxa( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
439  if( this->nnz == 0 ) return;
440 
441  //set parameters
442  _sub_ds**__restrict__ ds_f = this->dss;
443  _i_value*__restrict__ c_f = this->c_inc;
444  _i_value*__restrict__ r_f = this->r_inc;
445  const _t_value*__restrict__ x_f = x_p + this->r_start;
446  _t_value*__restrict__ y_f = y_p + this->c_start;
447 
448  _t_value* const y_end = y_p + this->noc;
449  _sub_ds**__restrict__ const ds_end = ds_f + this->nnz;
450 
451  //enter loop
452  while( ds_f < ds_end ) {
453  //forward direction
454  (*ds_f++)->zxa( x_f, y_f );
455  y_f += *c_f++;
456  if( y_f >= y_end ) {
457  y_f -= this->n_overflow;
458  x_f += *r_f++;
459  }
460  }
461  }
462 
464  template< size_t k >
465  void ZXa( const _t_value *__restrict__ const *__restrict__ const X, _t_value *__restrict__ const *__restrict__ const Z ) {
466  if( this->nnz == 0 ) return;
467 
468  //set parameters
469  _sub_ds**__restrict__ ds_f = this->dss;
470  _i_value*__restrict__ c_f = this->c_inc;
471  _i_value*__restrict__ r_f = this->r_inc;
472  _t_value* const y_end = Z[0] + this->noc;
473  _sub_ds**__restrict__ const ds_end = ds_f + this->nnz;
474 
475  //set pointer buffers
476  const _t_value*__restrict__ X_f[ k ];
477  _t_value*__restrict__ Y_f[ k ];
478  for( size_t i = 0; i < k; ++i ) {
479  X_f[ i ] = X[ i ] + r_start;
480  Y_f[ i ] = Z[ i ] + c_start;
481  }
482 
483  //do kernel
484  while( ds_f < ds_end ) {
485  (*ds_f++)->ZXa< k >( X_f, Y_f );
486  for( size_t i = 0; i < k; ++i ) {
487  Y_f[ i ] += *c_f;
488  }
489  ++c_f;
490  if( Y_f[0] >= y_end ) {
491  for( size_t i = 0; i < k; ++i ) {
492  Y_f[ i ] -= n_overflow;
493  X_f[ i ] += *r_f;
494  }
495  ++r_f;
496  }
497  }
498  }
499 
501  virtual void zxa_fb( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
502  if( this->nnz == 0 ) return;
503 
504  //set parameters
505  _sub_ds**__restrict__ ds_f = this->dss;
506  _sub_ds**__restrict__ ds_b = ds_f + this->nnz - 1;
507  _i_value*__restrict__ c_f = this->c_inc;
508  _i_value*__restrict__ c_b = this->c_inc + this->nnz - 2;
509  _i_value*__restrict__ r_f = this->r_inc;
510  _i_value*__restrict__ r_b = this->r_inc + this->jumps - 1;
511  const _t_value*__restrict__ x_f = x_p + this->r_start;
512  const _t_value*__restrict__ x_b = x_p + this->r_end;
513  _t_value*__restrict__ y_f = y_p + this->c_start;
514  _t_value*__restrict__ y_b = y_p + this->c_end;
515 
516  const _t_value* y_end = y_p + this->noc;
517 
518  //enter loop
519  while( ds_f <= ds_b ) {
520  //forward direction
521  (*ds_f++)->zxa( x_f, y_f );
522  y_f += *c_f++;
523  if( y_f >= y_end ) {
524  y_f -= this->n_overflow;
525  x_f += *r_f++;
526  }
527 
528  //check whether we are done
529  if( ds_f > ds_b ) break;
530 
531  //backward direction
532  (*ds_b--)->zxa( x_b, y_b );
533  y_b -= *c_b--;
534  assert( c_b >= this->c_inc-1 );
535  if( y_b < y_p ) {
536  y_b += this->n_overflow;
537  x_b -= *r_b--;
538  assert( y_b >= y_p );
539  assert( y_b < y_end );
540  }
541  }
542  }
543 
544  virtual void zax( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
545  if( this->nnz == 0 ) return;
546 
547  //set parameters
548  _sub_ds**__restrict__ ds_f = this->dss;
549  _i_value*__restrict__ c_f = this->c_inc;
550  _i_value*__restrict__ r_f = this->r_inc;
551  const _t_value*__restrict__ x_f = x_p + this->c_start;
552  _t_value*__restrict__ y_f = y_p + this->r_start;
553 
554  const _t_value* const x_end = x_p + this->noc;
555  _sub_ds**__restrict__ const ds_end = ds_f + this->nnz;
556 
557  //enter loop
558  while( ds_f < ds_end ) {
559  //forward direction
560  assert( x_f >= x_p );
561  assert( x_f < x_end );
562  (*ds_f++)->zax( x_f, y_f );
563  x_f += *c_f++;
564  if( x_f < x_p || x_f >= x_end ) {
565  x_f -= this->n_overflow;
566  y_f += *r_f++;
567  }
568  }
569  }
570 
572  template< size_t k >
573  void ZaX( const _t_value*__restrict__ const *__restrict__ const X, _t_value*__restrict__ const *__restrict__ const Z ) {
574  if( this->nnz == 0 ) return;
575 
576  //set parameters
577  _sub_ds**__restrict__ ds_f = this->dss;
578  _i_value*__restrict__ c_f = this->c_inc;
579  _i_value*__restrict__ r_f = this->r_inc;
580 
581  //local buffer of pointers
582  const _t_value *__restrict__ X_f[ k ];
583  _t_value *__restrict__ Y_f[ k ];
584 
585  //fill local pointer buffer
586  for( size_t s = 0; s < k; ++s ) {
587  X_f[s] = X[s] + c_start;
588  Y_f[s] = Z[s] + r_start;
589  }
590 
591  //end parameters
592  const _t_value* const x_end = X[0] + this->noc;
593  _sub_ds**__restrict__ const ds_end = ds_f + this->nnz;
594 
595  //enter loop
596  while( ds_f < ds_end ) {
597  for( size_t s = 0; s < k; ++s ) {
598  assert( X_f[s] >= X[s] );
599  assert( X_f[s] < X[0] + this->noc );
600  }
601  (*ds_f++)->ZaX< k >( X_f, Y_f );
602  for( size_t s = 0; s < k; ++s ) {
603  X_f[s] += *c_f;
604  }
605  ++c_f;
606  if( X_f[0] < X[0] || X_f[0] >= x_end ) {
607  for( size_t s = 0; s < k; ++s ) {
608  X_f[s] -= this->n_overflow;
609  Y_f[s] += *r_f;
610  }
611  ++r_f;
612  }
613  }
614  }
615 
617  virtual void zax_fb( const _t_value*__restrict__ x_p, _t_value*__restrict__ y_p ) {
618  if( this->nnz == 0 ) return;
619 
620  //set parameters
621  _sub_ds**__restrict__ ds_f = this->dss;
622  _sub_ds**__restrict__ ds_b = ds_f + this->nnz - 1;
623  _i_value*__restrict__ c_f = this->c_inc;
624  _i_value*__restrict__ c_b = this->c_inc + this->nnz - 2;
625  _i_value*__restrict__ r_f = this->r_inc;
626  _i_value*__restrict__ r_b = this->r_inc + this->jumps - 1;
627  const _t_value*__restrict__ x_f = x_p + this->c_start;
628  const _t_value*__restrict__ x_b = x_p + this->c_end;
629  _t_value*__restrict__ y_f = y_p + this->r_start;
630  _t_value*__restrict__ y_b = y_p + this->r_end;
631 
632  const _t_value* x_end = x_p + this->noc;
633 
634  //enter loop
635  while( ds_f <= ds_b ) {
636  //forward direction
637  assert( x_f >= x_p );
638  assert( x_f < x_end );
639  (*ds_f++)->zax( x_f, y_f );
640  x_f += *c_f++;
641  if( x_f < x_p || x_f >= x_end ) {
642  x_f -= this->n_overflow;
643  y_f += *r_f++;
644  }
645 
646  //check whether we are done
647  if( ds_f > ds_b ) break;
648 
649  //backward direction
650  assert( x_b >= x_p );
651  assert( x_b < x_end );
652  (*ds_b--)->zax( x_b, y_b );
653  x_b -= *c_b--;
654  assert( c_b >= this->c_inc-1 );
655  if( x_b < x_p || x_b >= x_end ) {
656  x_b += this->n_overflow;
657  y_b -= *r_b--;
658  assert( x_b >= x_p );
659  assert( x_b < x_end );
660  }
661  }
662  }
663 
664  virtual size_t bytesUsed() {
665  //upper-level bytes
666  size_t ret = sizeof( ULI ) * 4 + sizeof( _i_value ) * ( this->nnz + jumps + 1 );
667  //pointers to sub-level data structures
668  ret += sizeof( void* ) * this->nnz;
669  //recursively add lower-level data use
670  for( size_t i = 0; i < static_cast< size_t >(this->nnz); ++i ) {
671  ret += dss[ i ]->bytesUsed();
672  }
673  //return result
674  return ret;
675  }
676 };
677 #endif
Hierarchical BICRS with fixed subblock size and distribution.
Definition: FBICRS.hpp:75
_i_value nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
_i_value * c_inc
Column increment array.
Definition: FBICRS.hpp:98
static const ULI beta_n
Maximum matrix size for _sub_ds with the above data type; column size.
Definition: FBICRS.hpp:106
FBICRS(std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero=0)
Base hierarchical constructor.
Definition: FBICRS.hpp:150
virtual void zax(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
In-place z=Ax function.
Definition: FBICRS.hpp:544
The incremental compressed row storage sparse matrix data structure.
Definition: ICRS.hpp:53
FBICRS(_i_value *row, _i_value *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Base COO-based constructor.
Definition: FBICRS.hpp:140
virtual void zxa_fb(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Interleaved zxa kernel for use with the BetaHilbert scheme.
Definition: FBICRS.hpp:501
ULI c_end
Column index of the last nonzero.
Definition: FBICRS.hpp:89
_sub_ds ** dss
Stores the lower-level data structures.
Definition: FBICRS.hpp:112
ULI jumps
How many row jumps are stored.
Definition: FBICRS.hpp:92
void load(_i_value *row, _i_value *col, _t_value *val, ULI m, ULI n, ULI nz, _t_value zero)
Builds input matrix from COO input.
Definition: FBICRS.hpp:192
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
ULI r_start
Row index of the first nonzero.
Definition: FBICRS.hpp:80
static const ULI beta_m
Maximum matrix size for _sub_ds with the above data type; row size.
Definition: FBICRS.hpp:103
ULI r_end
Row index of the last nonzero.
Definition: FBICRS.hpp:86
ULI c_start
Column index of the first nonzero.
Definition: FBICRS.hpp:83
void load(std::vector< std::vector< Triplet< _t_value > > > &input, ULI m, ULI n, _t_value zero)
Builds input matrix from Triplet input.
Definition: FBICRS.hpp:209
void loadFromFile(const std::string file, const _t_valuezero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
FBICRS(std::string file, _t_value zero=0)
Base file-based constructor.
Definition: FBICRS.hpp:135
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
File created by: A.
virtual ~FBICRS()
Default destructor.
Definition: FBICRS.hpp:118
FBICRS(std::vector< Triplet< _t_value > > &input, ULI m, ULI n, _t_value zero=0)
Base Triplet-based constructor.
Definition: FBICRS.hpp:145
_i_value noc
Number of columns.
Definition: SparseMatrix.hpp:55
virtual void getFirstIndexPair(_i_value &row, _i_value &col)
Returns the first nonzero index, per reference.
Definition: FBICRS.hpp:433
FBICRS()
Base constructor (initialises with invalid data).
Definition: FBICRS.hpp:132
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: FBICRS.hpp:664
void ZXa(const _t_value *__restrict__ const *__restrict__ const X, _t_value *__restrict__ const *__restrict__ const Z)
Definition: FBICRS.hpp:465
_i_value nor
Number of rows.
Definition: SparseMatrix.hpp:52
void ZaX(const _t_value *__restrict__ const *__restrict__ const X, _t_value *__restrict__ const *__restrict__ const Z)
Definition: FBICRS.hpp:573
_t_value zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
virtual void zxa(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
In-place z=xA function.
Definition: FBICRS.hpp:438
ULI n_overflow
Caches the overflow forcing value.
Definition: FBICRS.hpp:115
_i_value * r_inc
Row increment array.
Definition: FBICRS.hpp:95
virtual void zax_fb(const _t_value *__restrict__ x_p, _t_value *__restrict__ y_p)
Interleaved zax kernel for use with the BetaHilbert scheme.
Definition: FBICRS.hpp:617
virtual void load(std::vector< Triplet< _t_value > > &input, _i_value m, _i_value n, _t_value zero)
Builds input matrix from Triplet input.
Definition: FBICRS.hpp:162
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
A single triplet value.
Definition: Triplet.hpp:52
size_t fillIn
Stores the total fillIn.
Definition: FBICRS.hpp:109