SparseLibrary  Version 1.6.0
McCRS.hpp
1 /*
2  * Copyright (c) 2007-2014, A. N. Yzelman, Utrecht University 2007-2011;
3  * KU Leuven 2011-2014.
4  * R. H. Bisseling, Utrecht University 2007-2014.
5  *
6  * This file is part of the Sparse Library.
7  *
8  * This library was developed under supervision of Prof. dr. Rob H. Bisseling at
9  * Utrecht University, from 2007 until 2011. From 2011-2014, development continued
10  * at KU Leuven, where Prof. dr. Dirk Roose contributed significantly to the ideas
11  * behind the newer parts of the library code.
12  *
13  * The Sparse Library is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by the
15  * Free Software Foundation, either version 3 of the License, or (at your
16  * option) any later version.
17  *
18  * The Sparse Library is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21  * for more details.
22  *
23  * You should have received a copy of the GNU General Public License along
24  * with the Sparse Library. If not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 
28 /*
29  * File created by:
30  * A. N. Yzelman, Dept. of Mathematics, Utrecht University, 2008.
31  */
32 
33 
34 #include "Triplet.hpp"
35 #include "SparseMatrix.hpp"
36 #include <vector>
37 #include <assert.h>
38 
39 //#define _DEBUG
40 
41 #ifndef _H_McCRS
42 #define _H_McCRS
43 
44 #ifdef _DEBUG
45 #include<iostream>
46 #endif
47 
48 //Uncomment the below to enable interleaved allocation of the matrix data structure
49 //(this can lead to performance gains,
50 // but on our testset average performance dropped with about 30 percent!)
51 #define INTERLEAVE_A
52 
53 //no libnuma, no interleave
54 #ifdef _NO_LIBNUMA
55  #undef INTERLEAVE_A
56 #endif
57 
61 template< typename T >
62 class McCRS: public SparseMatrix< T, ULI > {
63 
64  private:
65 
66  protected:
67 
69  ULI* row_start;
70 
72  T* ds;
73 
75  ULI* col_ind;
76 
78  static int compareTriplets( const void * left, const void * right ) {
79  const Triplet< T > one = **( (Triplet< T > **)left );
80  const Triplet< T > two = **( (Triplet< T > **)right );
81  if ( one.j() < two.j() )
82  return -1;
83  if ( one.j() > two.j() )
84  return 1;
85  return 0;
86  }
87 
96  bool find( const ULI col_index, const ULI search_start, const ULI search_end, ULI &ret ) {
97  for( ULI i=search_start; i<search_end; i++ )
98  if( col_ind[ i ] == col_index ) {
99  ret = i;
100  return true;
101  }
102  return false;
103  }
104 
105  public:
106 
108  McCRS() {}
109 
114  McCRS( std::string file, T zero = 0 ) {
115  this->loadFromFile( file, zero );
116  }
117 
127  McCRS( const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero ) {
128  this->nnz = number_of_nonzeros;
129  this->nor = number_of_rows;
130  this->noc = number_of_cols;
131  this->zero_element = zero;
132 #ifdef INTERLEAVE_A
133  row_start = (ULI*) numa_alloc_interleaved( (this->nor + 1) * sizeof( ULI ) );
134  ds = (T*) numa_alloc_interleaved( (this->nnz) * sizeof( T ) );
135  col_ind = (ULI*) numa_alloc_interleaved( this->nnz * sizeof( ULI ) );
136 #else
137  row_start = new ULI[ (this->nor + 1) ];
138  ds = new T[ this->nnz ];
139  col_ind = new ULI[ this->nnz ];
140 #endif
141  }
142 
146  McCRS( McCRS< T >& toCopy ) {
147  this->zero_element = toCopy.zero_element;
148  this->nnz = toCopy.nnz;
149  this->nor = toCopy.nor;
150 #ifdef INTERLEAVE_A
151  row_start = (ULI*) numa_alloc_interleaved( (this->nor + 1) * sizeof( ULI ) );
152  ds = (T*) numa_alloc_interleaved( (this->nnz) * sizeof( T ) );
153  col_ind = (ULI*) numa_alloc_interleaved( (this->nnz) * sizeof( ULI ) );
154 #else
155  row_start = new ULI[ (this->nor + 1) ];
156  ds = new T[ this->nnz ];
157  col_ind = new ULI[ this->nnz ];
158 #endif
159  for( ULI i=0; i<this->nnz; i++ ) {
160  ds[ i ] = toCopy.ds[ i ];
161  col_ind[ i ] = toCopy.col_ind[ i ];
162  }
163  for( ULI i=0; i<this->nor; i++ )
164  row_start[ i ] = toCopy.row_start[ i ];
165  }
166 
177  McCRS( std::vector< Triplet< T > > input, ULI m, ULI n, T zero ) {
178  load( input, m, n, zero );
179  }
180 
182  virtual void load( std::vector< Triplet< T > >& input, ULI m, ULI n, T zero ) {
183  std::cout << "\tLoading in a vector of " << input.size() << " triplets into McCRS..." << std::endl;
184 
185  this->zero_element = zero;
186  //find nnz
187  this->nnz = input.size();
188 
189  this->nor = m;
190  this->noc = n;
191 
192  //build better datastructure
193  std::vector< std::vector< Triplet< T >* > > ds( this->nor );
194 
195  //move input there
196  typename std::vector< Triplet< T > >::iterator in_it = input.begin();
197  for( ; in_it != input.end(); ++in_it ) {
198  //Triplet< T >* cur = &(*in_it);
199  const ULI currow = in_it->i();
200  const T value = in_it->value;
201  if( value == this->zero_element ) {
202  this->nnz--;
203  continue;
204  }
205  ds.at( currow ).push_back( &(*in_it) );
206  }
207 
208  //allocate arrays
209 #ifdef INTERLEAVE_A
210  row_start = (ULI*) numa_alloc_interleaved( (this->nor + 1) * sizeof( ULI ) );
211  this->ds = (T*) numa_alloc_interleaved( (this->nnz) * sizeof( T ) );
212  col_ind = (ULI*) numa_alloc_interleaved( (this->nnz) * sizeof( ULI ) );
213 #else
214  row_start = new ULI[ (this->nor + 1) ];
215  this->ds = new T[ this->nnz ];
216  col_ind = new ULI[ this->nnz ];
217 #endif
218 
219  //make McCRS
220  ULI index = 0;
221  for( ULI currow = 0; currow < this->nor; currow++ ) {
222  row_start[ currow ] = index;
223  if( ds.at( currow ).size() == 0 ) continue;
224  qsort( &( ds.at( currow )[ 0 ] ), ds.at( currow ).size(), sizeof( Triplet< T >* ), &compareTriplets );
225  typename std::vector< Triplet< T >* >::iterator row_it = ds.at( currow ).begin();
226  for( ; row_it!=ds.at( currow ).end(); row_it++ ) {
227  const Triplet< T > cur = *(*row_it);
228  this->ds[ index ] = cur.value;
229  col_ind[ index ] = cur.j();
230  index++;
231  }
232  }
233  row_start[ this->nor ] = this->nnz;
234  std::cout << "\t" << index << " nonzeroes loaded into McCRS structure." << std::endl;
235  assert( index == this->nnz );
236  }
237 
244  T& random_access( ULI i, ULI j ) {
245  ULI found_index;
246  if ( find( j, row_start[ i ], row_start[ i+1 ], found_index ) ) {
247 #ifdef _DEBUG
248  std::cout << "Searched col_ind between " << row_start[ i ] << " and " << row_start[ i + 1 ] << ", found: " << std::endl;
249  std::cout << "Element (" << i << "," << j << ") found on index " << found_index << ", returning " << ds[ found_index ] << std::endl;
250 #endif
251  return ds[ found_index ];
252  } else
253  return this->zero_element;
254  }
255 
257  virtual void getFirstIndexPair( ULI &row, ULI &col ) {
258  row = this->row_start[ 0 ];
259  col = this->col_ind[ 0 ];
260  }
261 
262 #ifndef _NO_LIBNUMA
263 
264  virtual T* mv( const T* x ) {
265  T* ret = (T*) numa_alloc_interleaved( this->nor * sizeof( T ) );
266  for( ULI i=0; i<this->nor; i++ ) ret[ i ] = this->zero_element;
267  zax( x, ret );
268  return ret;
269  }
270 #endif
271 
275  virtual void zxa( const T*__restrict__ x, T*__restrict__ z ) {
276  std::cerr << "CRS z=xA (left-sided SpMV multiplication) is not possible to do in parallel using CRS and OpenMP parallel fors" << std::endl;
277  std::cerr << "Exiting..." << std::endl;
278  exit( 1 );
279  }
280 
287  virtual void zax( const T*__restrict__ x, T*__restrict__ z ) {
288  const unsigned long int nor = this->nor;
289  ULI * const col_ind = this->col_ind;
290  ULI * const row_start = this->row_start;
291  T * const ds = this->ds;
292  #pragma omp parallel for shared( x, z ) schedule( dynamic, 8 )
293  for( ULI row = 0; row < nor; row++ ) {
294  ULI index, *j_p;
295  T sum, *v_p, x_e;
296  sum = 0.0;
297  v_p = ds + row_start[ row ];
298  j_p = col_ind + row_start[ row ];
299  for( index = row_start[ row ]; index < row_start[ row + 1 ]; index++ ) {
300  x_e = *(x + (*j_p++));
301  sum += (*v_p++) * x_e;
302  }
303  z[ row ] = sum;
304  }
305  }
306 
307  virtual size_t bytesUsed() {
308  return sizeof( ULI ) * ( this->nnz + this->nor ) + sizeof( T ) * this->nnz;
309  }
310 
312  ULI* rowJump() { return row_start; }
313 
315  ULI* columnIndices() { return col_ind; }
316 
318  T* values() { return ds; }
319 
321  virtual ~McCRS() {
322 #ifdef INTERLEAVE_A
323  numa_free( row_start, (this->nor + 1) * sizeof( ULI ) );
324  numa_free( ds, (this->nnz) * sizeof( T ) );
325  numa_free( col_ind, (this->nnz) * sizeof( ULI ) );
326 #else
327  delete [] row_start;
328  delete [] ds;
329  delete [] col_ind;
330 #endif
331  }
332 
333 };
334 
335 #undef _DEBUG
336 #endif
337 
ULI nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58
virtual void getFirstIndexPair(ULI &row, ULI &col)
Returns the first nonzero index, per reference.
Definition: McCRS.hpp:257
The compressed row storage sparse matrix data structure.
Definition: McCRS.hpp:62
T & random_access(ULI i, ULI j)
Method which provides random matrix access to the stored sparse matrix.
Definition: McCRS.hpp:244
McCRS(std::string file, T zero=0)
Base constructor.
Definition: McCRS.hpp:114
T * values()
Returns pointer to the matrix nonzeros vector.
Definition: McCRS.hpp:318
virtual void load(std::vector< Triplet< T > > &input, ULI m, ULI n, T zero)
Definition: McCRS.hpp:182
virtual void zax(const T *__restrict__ x, T *__restrict__ z)
In-place z=Ax function.
Definition: McCRS.hpp:287
McCRS()
Base constructor.
Definition: McCRS.hpp:108
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107
McCRS(std::vector< Triplet< T > > input, ULI m, ULI n, T zero)
Constructor which transforms a collection of input triplets to McCRS format.
Definition: McCRS.hpp:177
ULI * col_ind
Array containing the column indeces corresponding to the elements in ds.
Definition: McCRS.hpp:75
virtual void zxa(const T *__restrict__ x, T *__restrict__ z)
In-place z=xA function.
Definition: McCRS.hpp:275
void loadFromFile(const std::string file, const T zero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46
static int compareTriplets(const void *left, const void *right)
Sorts 1D columnwise.
Definition: McCRS.hpp:78
ULI * row_start
Array keeping track of individual row starting indices.
Definition: McCRS.hpp:69
ULI * rowJump()
Returns pointer to the row_start vector.
Definition: McCRS.hpp:312
ULI noc
Number of columns.
Definition: SparseMatrix.hpp:55
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: McCRS.hpp:307
T value
Value stored at this triplet.
Definition: Triplet.hpp:95
McCRS(McCRS< T > &toCopy)
Copy constructor.
Definition: McCRS.hpp:146
ULI nor
Number of rows.
Definition: SparseMatrix.hpp:52
ULI * columnIndices()
Returns pointer to the column index vector.
Definition: McCRS.hpp:315
bool find(const ULI col_index, const ULI search_start, const ULI search_end, ULI &ret)
Helper function which finds a value with a given column index on a given subrange of indices...
Definition: McCRS.hpp:96
T zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63
ULI j() const
Definition: Triplet.hpp:73
virtual ~McCRS()
Base deconstructor.
Definition: McCRS.hpp:321
virtual T * mv(const T *x)
Overloaded mv call; allocates output vector using numa_interleaved.
Definition: McCRS.hpp:264
T * ds
Array containing the actual nnz non-zeros.
Definition: McCRS.hpp:72
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115
A single triplet value.
Definition: Triplet.hpp:52
McCRS(const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero)
Base constructor which only initialises the internal arrays.
Definition: McCRS.hpp:127