SparseLibrary  Version 1.6.0
ICRS.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007-2014, A. N. Yzelman, Utrecht University 2007-2011;
3  * KU Leuven 2011-2014.
4  * R. H. Bisseling, Utrecht University 2007-2014.
5  *
6  * This file is part of the Sparse Library.
7  *
8  * This library was developed under supervision of Prof. dr. Rob H. Bisseling at
9  * Utrecht University, from 2007 until 2011. From 2011-2014, development continued
10  * at KU Leuven, where Prof. dr. Dirk Roose contributed significantly to the ideas
11  * behind the newer parts of the library code.
12  *
13  * The Sparse Library is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by the
15  * Free Software Foundation, either version 3 of the License, or (at your
16  * option) any later version.
17  *
18  * The Sparse Library is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21  * for more details.
22  *
23  * You should have received a copy of the GNU General Public License along
24  * with the Sparse Library. If not, see <http://www.gnu.org/licenses/>.
25  */
26 
33 #include "Triplet.hpp"
34 #include "SparseMatrix.hpp"
35 #include <assert.h>
36 #include <vector>
37 #include <algorithm>
38 #include <cmath>
39 
40 //#define _DEBUG
41 
42 #ifndef _H_ICRS
43 #define _H_ICRS
44 
45 #ifdef _DEBUG
46 #include<iostream>
47 #endif
48 
52 template< typename T, typename _i_value=ULI >
53 class ICRS: public SparseMatrix< T, ULI > {
54 
55  private:
56 
57  protected:
58 
60  T* ds;
61 
63  ULI r_start;
64 
66  ULI c_start;
67 
69  _i_value* c_ind;
70 
72  _i_value* r_ind;
73 
75  size_t bytes;
76 
77  public:
78 
80  static const size_t fillIn = 0;
81 
83  static int compareTriplets( const void * left, const void * right ) {
84  const Triplet< T > one = *( (Triplet< T > *)left );
85  const Triplet< T > two = *( (Triplet< T > *)right );
86  if( one.i() < two.i() )
87  return -1;
88  if ( one.i() > two.i() )
89  return 1;
90  if ( one.j() < two.j() )
91  return -1;
92  if ( one.j() > two.j() )
93  return 1;
94  return 0;
95  }
96 
98  ICRS() {}
99 
104  ICRS( std::string file, T zero = 0 ) {
105  this->loadFromFile( file, zero );
106  }
107 
118  ICRS( const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero ):
119  SparseMatrix< T, ULI >( number_of_nonzeros, number_of_cols, number_of_rows, zero ) {
120  ds = new T[ this->nnz ];
121  c_ind = new _i_value[ this->nnz ];
122  r_ind = new _i_value[ this->nnz ];
123  bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * 2 * this->nnz + sizeof( T ) * this->nnz;
124  }
125 
130  ICRS( ICRS< T >& toCopy ) {
131  this->zero_element = toCopy.zero_element;
132  this->nnz = toCopy.nnz;
133  this->noc = toCopy.noc;
134  this->nor = toCopy.nor;
135  this->r_start = toCopy.r_start;
136  this->c_start = toCopy.c_start;
137  ds = new T[ this->nnz ];
138  c_ind = new _i_value[ this->nnz - 1 ];
139  r_ind = new _i_value[ this->nnz - 1 ];
140  for( ULI i=0; i<this->nnz; i = i + 1 ) {
141  ds[ i ] = toCopy.ds[ i ];
142  c_ind[ i ]= toCopy.c_ind[ i ];
143  r_ind[ i ] = toCopy.r_ind[ i ];
144  }
145  bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * 2 * this->nnz + sizeof( T ) * this->nnz;
146  }
147 
159  ICRS( std::vector< Triplet< T > >& input, const ULI m, const ULI n, const T zero = 0 ) {
160  load( input, m, n, zero );
161  }
162 
164  virtual void load( std::vector< Triplet< T > >& input, const ULI m, const ULI n, const T zero ) {
165  this->zero_element = zero;
166 
167  this->nor = m;
168  this->noc = n;
169 
170  if( log2(this->noc) > sizeof( _i_value ) * 8 )
171  std::cerr << "Warning: the matrix with " << this->noc << " columns cannot be represented within " << (sizeof( _i_value )*8) << "-bit index values this ICRS instance uses!" << std::endl;
172 
173  if( log2(this->nor) > sizeof( _i_value ) * 8 )
174  std::cerr << "Warning: the matrix with " << this->nor << " rows cannot be represented within " << (sizeof( _i_value )*8) << "-bit index values this ICRS instance uses!" << std::endl;
175 
176  if( m==0 || n==0 || input.size() == 0 ) { //empty matrix
177  this->nor = this->noc = this->nnz = 0;
178  ds = NULL;
179  r_ind = NULL;
180  c_ind = NULL;
181  return;
182  }
183 
184  typename std::vector< Triplet< T > >::iterator in_it;
185 
186  //WARNING: noc*nor typically overflows on 32 bits!
187  // immediately start recording differences
188  // instead of trying to directly calculate
189  // the index as i*noc+j.
190 
191  //Complexity compiler-package dependent. Probably O(nnz log(nnz)) average, O(nnz^2) worst-case.
192  //for standard C++ sort:
193  //sort( input.begin(), input.end(), compareTriplets );
194  //for C quicksort:
195  qsort( &input[ 0 ], input.size(), sizeof( Triplet< T > ), &compareTriplets );
196 
197  //filtering out zeros is skipped for now.
198 
199  //Count the number of row jumps
200  std::vector< ULI > r_ind_temp;
201  typename std::vector< Triplet< T > >::iterator it = input.begin();
202  ULI prev_row = (*it).i();
203  r_ind_temp.push_back( prev_row );
204  it++;
205 
206  //O(nnz log(nnz)); push_back on a vector uses amortised log(n) array growing algorithm on inserts
207  for( ; it!=input.end(); it++ ) {
208  assert( (*it).i() < this->nor );
209  assert( (*it).j() < this->noc );
210  assert( (*it).i() >= prev_row );
211  if( (*it).i() > prev_row ) {
212  assert( (*it).i() - prev_row < m );
213  r_ind_temp.push_back( (*it).i() - prev_row );
214  prev_row = (*it).i();
215  }
216  }
217  this->nnz = input.size();
218 
219  //allocate arrays
220  const unsigned long int allocsize = this->nnz;
221  ds = new T[ allocsize ];
222  c_ind = new _i_value[ allocsize ];
223  r_ind = new _i_value[ r_ind_temp.size() ];
224 
225  //record #bytes used
226  bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * ( allocsize + r_ind_temp.size() ) + sizeof( T ) * allocsize;
227 
228  //set last entry
229  c_ind[ allocsize - 1 ] = this->noc;
230  //r_ind does not have to be set; altough the last element is read, it is actually never used.
231 
232  //copy row-jump vector
233  //O(m) worst case
234  r_start = r_ind_temp[ 0 ];
235  for( ULI i=1; i<r_ind_temp.size(); i++ ) {
236  r_ind[i-1] = r_ind_temp[ i ];
237  if( static_cast< ULI >( r_ind[i-1] ) != r_ind_temp[ i ] ) {
238  std::cerr << "Row increment too large to store in this ICRS instance!" << std::endl;
239  exit( 1 );
240  }
241  }
242 
243  //make ICRS
244  prev_row = r_start;
245  ULI prev_col = c_start = input[ 0 ].j(); //now r_- and c_-start have been set
246  //O(nnz)
247  unsigned long int check_jumps = 0;
248  unsigned long int check_row = r_ind_temp[0];
249  assert( r_ind_temp[ 0 ] == r_start );
250  ds[ 0 ] = input[ 0 ].value;
251  for( ULI i=1; i<this->nnz; ++i ) {
252  const Triplet< T > cur = input[ i ];
253  const ULI currow = cur.i();
254  const ULI curcol = cur.j();
255  if( currow == prev_row ) {
256  c_ind[i-1] = curcol - prev_col;
257  if( static_cast< ULI >( c_ind[i-1] ) != curcol - prev_col ) {
258  std::cerr << "Column increment too large to store in this ICRS instance!" << std::endl;
259  exit( 1 );
260  }
261  assert( currow == check_row );
262  } else {
263  assert( currow > prev_row );
264  c_ind[i-1] = this->noc + ( curcol - prev_col );
265  if( static_cast< ULI >( c_ind[i-1] ) - this->noc + prev_col != curcol ) {
266  std::cerr << "Overflowed column increment too large to store in this ICRS instance!" << std::endl;
267  exit( 1 );
268  }
269  check_row += r_ind[ check_jumps++ ];
270  assert( currow == check_row );
271  prev_row = currow;
272  }
273  ds[ i ] = cur.value;
274  prev_col = curcol;
275 
276 #ifdef _DEBUG
277  std::cout << currow << "," << curcol << "(" << cur.value << ") maps to " << c_ind[ i ] << std::endl;
278 #endif
279  }
280 
281  //append with zeroes
282  for( unsigned long int i=this->nnz; i<allocsize; ++i ) {
283  c_ind[ i - 1 ] = 0;
284  ds[ i ] = 0;
285  }
286 
287  //assert row jumps is equal to r_ind_temp's size
288  assert( check_jumps == r_ind_temp.size()-1 );
289 
290  //clear temporary r_ind vector
291  r_ind_temp.clear();
292  }
293 
295  virtual void getFirstIndexPair( ULI &row, ULI &col ) {
296  row = this->r_start;
297  col = this->c_start;
298  }
299 
301  void getStartingPos( ULI &row_start, ULI &column_start ) {
302  row_start = this->r_start;
303  column_start = this->c_start;
304  }
305 
314  void setStartingPos( const ULI row_start, const ULI column_start ) {
315  assert( row_start <= this->r_start );
316  assert( column_start <= this->c_start );
317  this->r_start = row_start;
318  this->c_start = column_start;
319  }
320 
327  virtual void zxa( const T*__restrict__ pDataX, T*__restrict__ pDataZ ) {
328  if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;
329  T *__restrict__ pDataA = ds;
330  const T *__restrict__ pDataAend = ds + this->nnz;
331  //const T *__restrict__ const pDataXend = pDataX + this->nor;
332  const T * const pDataZend = pDataZ + this->noc;
333  //const T *pDataZend = z + nor; //unused
334 
335  _i_value *__restrict__ pIncRow = r_ind;
336  _i_value *__restrict__ pIncCol = c_ind;
337 
338  //go to first position
339  pDataZ += this->c_start;
340  pDataX += this->r_start;
341  while( pDataA < pDataAend ) {
342  while( pDataZ < pDataZend ) {
343  *pDataZ += *pDataA * *pDataX;
344  pDataA++;
345  pDataZ += *pIncCol;
346  pIncCol++;
347  }
348  pDataZ -= this->noc;
349  pDataX += *pIncRow++;
350  }
351  }
352 
359  virtual void zax( const T*__restrict__ pDataX, T*__restrict__ pDataZ ) {
360  if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;
361  //go to first position
362  T *__restrict__ pDataA = ds;
363  const T * const pDataAend = ds + this->nnz;
364  const T * const pDataXend = pDataX + this->noc;
365 #ifndef NDEBUG
366  const T * const pDataXst = pDataX;
367  const T * const pDataZst = pDataZ;
368  const T * const pDataZend = pDataZ + this->nor;
369 #endif
370 
371  _i_value *__restrict__ pIncRow = r_ind;
372  _i_value *__restrict__ pIncCol = c_ind;
373 
374  //go to first column
375  assert( r_start < this->nor );
376  assert( c_start < this->noc );
377  assert( this->nnz > 0 );
378  pDataX += c_start;
379  pDataZ += r_start;
380  while( pDataA < pDataAend ) {
381  while( pDataX < pDataXend ) {
382  assert( pDataA < pDataAend );
383  assert( pDataX >= pDataXst );
384  assert( pDataX < pDataXend );
385  assert( pDataZ >= pDataZst );
386  assert( pDataZ < pDataZend );
387  assert( pDataX + this->noc >= pDataXend ); //otherwise pDataX is before the start of x!
388  assert( pDataA + this->nnz >= pDataAend );
389  assert( pDataZ + this->nor >= pDataZend );
390 
391  *pDataZ += *pDataA++ * *pDataX;
392  pDataX += *pIncCol++;
393  }
394  pDataX -= this->noc;
395  //jump to correct row
396  pDataZ += *pIncRow++;
397  }
398  }
399 
401  template< size_t k >
402  void ZaX( const T*__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z ) {
403 
404  //catch boundary cases
406 
407  //go to first position
408  T *__restrict__ pDataA = ds;
409  const T *__restrict__ const pDataAend = ds + SparseMatrix< T, ULI >::nnz;
410  const T *__restrict__ const pDataXend = X[0] + SparseMatrix< T, ULI >::noc;
411 
412  //get data structure handles
413  _i_value *__restrict__ pIncRow = r_ind;
414  _i_value *__restrict__ pIncCol = c_ind;
415 
416  //local buffer of pointers
417  const T *__restrict__ pDataX[ k ];
418  T *__restrict__ pDataZ[ k ];
419 
420  //fill local pointer buffer
421  for( size_t s = 0; s < k; ++s ) {
422  pDataX[s] = X[s] + c_start;
423  pDataZ[s] = Z[s] + r_start;
424  }
425 
426  //for each nonzero
427  while( pDataA < pDataAend ) {
428  //while on the same row
429  while( pDataX[0] < pDataXend ) {
430  //do ICRS inner kernel on each of the k vectors
431  for( size_t s = 0; s < k; ++s ) {
432  *(pDataZ[s]) += *pDataA * *(pDataX[s]);
433  pDataX[s] += *pIncCol;
434  }
435  //go to next nonzero
436  ++pDataA;
437  ++pIncCol;
438  }
439  //for each of the k vectors
440  for( size_t s = 0; s < k; ++s ) {
441  //jump back in range
442  pDataX[s] -= SparseMatrix< T, ULI >::noc;
443  //jump to correct row
444  pDataZ[s] += *pIncRow;
445  }
446  //go to next row
447  ++pIncRow;
448  }
449  }
450 
452  template< size_t k >
453  void ZXa( const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z ) {
454  if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;
455  T *__restrict__ pDataA = ds;
456  const T *__restrict__ pDataAend = ds + this->nnz;
457  const T *__restrict__ const pDataZend = Z[0] + this->noc;
458 
459  //for code comments, see the ZaX function
460 
461  _i_value *__restrict__ pIncRow = r_ind;
462  _i_value *__restrict__ pIncCol = c_ind;
463 
464  const T *__restrict__ pDataX[ k ];
465  T *__restrict__ pDataZ[ k ];
466 
467  for( size_t s = 0; s < k; ++s ) {
468  pDataZ[s] = Z[s] + c_start;
469  pDataX[s] = X[s] + r_start;
470  }
471 
472  while( pDataA < pDataAend ) {
473  while( pDataZ[0] < pDataZend ) {
474  for( size_t s = 0; s < k; ++s ) {
475  *(pDataZ[s]) += *pDataA * *(pDataX[s]);
476  pDataZ[s] += *pIncCol;
477  }
478  ++pDataA;
479  ++pIncCol;
480  }
481  for( size_t s = 0; s < k; ++s ) {
482  pDataZ[s] -= this->noc;
483  pDataX[s] += *pIncRow;
484  }
485  ++pIncRow;
486  }
487  }
488 
490  ~ICRS() {
491  if( ds != NULL ) delete [] ds;
492  if( c_ind != NULL ) delete [] c_ind;
493  if( r_ind != NULL ) delete [] r_ind;
494  }
495 
496  virtual size_t bytesUsed() {
497  return bytes;
498  }
499 
500 };
501 
502 #endif
503 
ULI nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
ULI r_start
Start position, row.
Definition: ICRS.hpp:63
ICRS(std::string file, T zero=0)
Base constructor.
Definition: ICRS.hpp:104
The incremental compressed row storage sparse matrix data structure.
Definition: ICRS.hpp:53
void ZaX(const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z)
Definition: ICRS.hpp:402
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: ICRS.hpp:496
virtual void getFirstIndexPair(ULI &row, ULI &col)
Returns the first nonzero index, per reference.
Definition: ICRS.hpp:295
ULI i() const
Definition: Triplet.hpp:70
virtual void zxa(const T *__restrict__ pDataX, T *__restrict__ pDataZ)
In-place z=xA function.
Definition: ICRS.hpp:327
ICRS(ICRS< T > &toCopy)
Copy constructor.
Definition: ICRS.hpp:130
_i_value * r_ind
Array containing the row jumps.
Definition: ICRS.hpp:72
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
static int compareTriplets(const void *left, const void *right)
Comparison function used for sorting input data.
Definition: ICRS.hpp:83
void ZXa(const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z)
Definition: ICRS.hpp:453
void loadFromFile(const std::string file, const T zero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
_i_value * c_ind
Array containing the column jumps.
Definition: ICRS.hpp:69
ULI noc
Number of columns.
Definition: SparseMatrix.hpp:55
ICRS(std::vector< Triplet< T > > &input, const ULI m, const ULI n, const T zero=0)
Constructor which transforms a collection of input triplets to CRS format.
Definition: ICRS.hpp:159
size_t bytes
Remembers the number of bytes allocated.
Definition: ICRS.hpp:75
ICRS(const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero)
Base constructor which only initialises the internal arrays.
Definition: ICRS.hpp:118
T value
Value stored at this triplet.
Definition: Triplet.hpp:95
~ICRS()
Base deconstructor.
Definition: ICRS.hpp:490
static const size_t fillIn
Fill-in field for interoperability with vecBICRS.
Definition: ICRS.hpp:80
ULI nor
Number of rows.
Definition: SparseMatrix.hpp:52
virtual void zax(const T *__restrict__ pDataX, T *__restrict__ pDataZ)
In-place z=Ax function.
Definition: ICRS.hpp:359
T zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
ULI j() const
Definition: Triplet.hpp:73
ICRS()
Base constructor.
Definition: ICRS.hpp:98
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
ULI c_start
Start position, column.
Definition: ICRS.hpp:66
T * ds
Array containing the actual nnz non-zeros.
Definition: ICRS.hpp:60
A single triplet value.
Definition: Triplet.hpp:52
void setStartingPos(const ULI row_start, const ULI column_start)
Sets starting position of matrix multiplication.
Definition: ICRS.hpp:314
void getStartingPos(ULI &row_start, ULI &column_start)
Gets starting position (first nonzero location)
Definition: ICRS.hpp:301
virtual void load(std::vector< Triplet< T > > &input, const ULI m, const ULI n, const T zero)
Definition: ICRS.hpp:164