d7/d0a/ICRS_8hpp_source.html

 /*

  * Copyright (c) 2007-2014, A. N. Yzelman,   Utrecht University 2007-2011;

  *                                                    KU Leuven 2011-2014.

  *                          R. H. Bisseling, Utrecht University 2007-2014.

  *

  * This file is part of the Sparse Library.

  *

  * This library was developed under supervision of Prof. dr. Rob H. Bisseling at

  * Utrecht University, from 2007 until 2011. From 2011-2014, development continued

  * at KU Leuven, where Prof. dr. Dirk Roose contributed significantly to the ideas

  * behind the newer parts of the library code.

  *

  *     The Sparse Library is free software: you can redistribute it and/or modify

  *     it under the terms of the GNU General Public License as published by the

  *     Free Software Foundation, either version 3 of the License, or (at your

  *     option) any later version.

  *

  *     The Sparse Library is distributed in the hope that it will be useful, but

  *     WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

  *     or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

  *     for more details.

  *

  *     You should have received a copy of the GNU General Public License along

  *     with the Sparse Library. If not, see <http://www.gnu.org/licenses/>.

  */


 #include "Triplet.hpp"

 #include "SparseMatrix.hpp"

 #include <assert.h>

 #include <vector>

 #include <algorithm>

 #include <cmath>


 //#define _DEBUG


 #ifndef _H_ICRS

 #define _H_ICRS


 #ifdef _DEBUG

 #include<iostream>

 #endif


 template< typename T, typename _i_value=ULI >

 class ICRS: public SparseMatrix< T, ULI > {


   private:


   protected:


         T* ds;


         ULI r_start;


         ULI c_start;


         _i_value* c_ind;


         _i_value* r_ind;


         size_t bytes;


   public:


         static const size_t fillIn = 0;


         static int compareTriplets( const void * left, const void * right ) {

                 const Triplet< T > one = *( (Triplet< T > *)left );

                 const Triplet< T > two = *( (Triplet< T > *)right );

                 if( one.i() < two.i() )

                         return -1;

                 if ( one.i() > two.i() )

                         return 1;

                 if ( one.j() < two.j() )

                         return -1;

                 if ( one.j() > two.j() )

                         return 1;

                 return 0;

         }


         ICRS() {}


         ICRS( std::string file, T zero = 0 ) {

                 this->loadFromFile( file, zero );

         }


         ICRS( const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero ):

                 SparseMatrix< T, ULI >( number_of_nonzeros, number_of_cols, number_of_rows, zero ) {

                 ds = new T[ this->nnz ];

                 c_ind = new _i_value[ this->nnz ];

                 r_ind = new _i_value[ this->nnz ];

                 bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * 2 * this->nnz + sizeof( T ) * this->nnz;

         }


         ICRS( ICRS< T >& toCopy ) {

                 this->zero_element = toCopy.zero_element;

                 this->nnz = toCopy.nnz;

                 this->noc = toCopy.noc;

                 this->nor = toCopy.nor;

                 this->r_start = toCopy.r_start;

                 this->c_start = toCopy.c_start;

                 ds = new T[ this->nnz ];

                 c_ind = new _i_value[ this->nnz - 1 ];

                 r_ind = new _i_value[ this->nnz - 1 ];

                 for( ULI i=0; i<this->nnz; i = i + 1 ) {

                         ds[ i ] = toCopy.ds[ i ];

                         c_ind[ i ]= toCopy.c_ind[ i ];

                         r_ind[ i ] = toCopy.r_ind[ i ];

                 }

                 bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * 2 * this->nnz + sizeof( T ) * this->nnz;

         }


         ICRS( std::vector< Triplet< T > >& input, const ULI m, const ULI n, const T zero = 0 ) {

                 load( input, m, n, zero );

         }


         virtual void load( std::vector< Triplet< T > >& input, const ULI m, const ULI n, const T zero ) {

                 this->zero_element = zero;


                 this->nor = m;

                 this->noc = n;


                 if( log2(this->noc) > sizeof( _i_value ) * 8 )

                         std::cerr << "Warning: the matrix with " << this->noc << " columns cannot be represented within " << (sizeof( _i_value )*8) << "-bit index values this ICRS instance uses!" << std::endl;


                 if( log2(this->nor) > sizeof( _i_value ) * 8 )

                         std::cerr << "Warning: the matrix with " << this->nor << " rows cannot be represented within " << (sizeof( _i_value )*8) << "-bit index values this ICRS instance uses!" << std::endl;


                 if( m==0 || n==0 || input.size() == 0 ) { //empty matrix

                         this->nor = this->noc = this->nnz = 0;

                         ds = NULL;

                         r_ind = NULL;

                         c_ind = NULL;

                         return;

                 }


                 typename std::vector< Triplet< T > >::iterator in_it;


                 //WARNING: noc*nor typically overflows on 32 bits!

                 //         immediately start recording differences

                 //         instead of trying to directly calculate

                 //         the index as i*noc+j.


                 //Complexity compiler-package dependent. Probably O(nnz log(nnz)) average, O(nnz^2) worst-case.

                 //for standard C++ sort:

                 //sort( input.begin(), input.end(), compareTriplets );

                 //for C quicksort:

                 qsort( &input[ 0 ], input.size(), sizeof( Triplet< T > ), &compareTriplets );


                 //filtering out zeros is skipped for now.


                 //Count the number of row jumps

                 std::vector< ULI > r_ind_temp;

                 typename std::vector< Triplet< T > >::iterator it = input.begin();

                 ULI prev_row = (*it).i();

                 r_ind_temp.push_back( prev_row );

                 it++;


                 //O(nnz log(nnz)); push_back on a vector uses amortised log(n) array growing algorithm on inserts

                 for( ; it!=input.end(); it++ ) {

                         assert( (*it).i() < this->nor );

                         assert( (*it).j() < this->noc );

                         assert( (*it).i() >= prev_row );

                         if( (*it).i() > prev_row ) {

                                 assert( (*it).i() - prev_row < m );

                                 r_ind_temp.push_back( (*it).i() - prev_row );

                                 prev_row = (*it).i();

                         }

                 }

                 this->nnz = input.size();


                 //allocate arrays

                 const unsigned long int allocsize =  this->nnz;

                 ds    = new T[ allocsize ];

                 c_ind = new _i_value[ allocsize ];

                 r_ind = new _i_value[ r_ind_temp.size() ];


                 //record #bytes used

                 bytes = sizeof( ULI ) * 2 + sizeof( _i_value ) * ( allocsize + r_ind_temp.size() ) + sizeof( T ) * allocsize;


                 //set last entry

                 c_ind[ allocsize - 1 ] = this->noc;

                 //r_ind does not have to be set; altough the last element is read, it is actually never used.


                 //copy row-jump vector

                 //O(m) worst case

                 r_start = r_ind_temp[ 0 ];

                 for( ULI i=1; i<r_ind_temp.size(); i++ ) {

                         r_ind[i-1] = r_ind_temp[ i ];

                         if( static_cast< ULI >( r_ind[i-1] ) != r_ind_temp[ i ] ) {

                                 std::cerr << "Row increment too large to store in this ICRS instance!" << std::endl;

                                 exit( 1 );

                         }

                 }


                 //make ICRS

                 prev_row = r_start;

                 ULI prev_col = c_start = input[ 0 ].j(); //now r_- and c_-start have been set

                 //O(nnz)

                 unsigned long int check_jumps = 0;

                 unsigned long int check_row   = r_ind_temp[0];

                 assert( r_ind_temp[ 0 ] == r_start );

                 ds[ 0 ] = input[ 0 ].value;

                 for( ULI i=1; i<this->nnz; ++i ) {

                         const Triplet< T > cur = input[ i ];

                         const ULI currow = cur.i();

                         const ULI curcol = cur.j();

                         if( currow == prev_row ) {

                                 c_ind[i-1] = curcol - prev_col;

                                 if( static_cast< ULI >( c_ind[i-1] ) != curcol - prev_col ) {

                                         std::cerr << "Column increment too large to store in this ICRS instance!" << std::endl;

                                         exit( 1 );

                                 }

                                 assert( currow == check_row );

                         } else {

                                 assert( currow > prev_row );

                                 c_ind[i-1] = this->noc + ( curcol - prev_col );

                                 if( static_cast< ULI >( c_ind[i-1] ) - this->noc + prev_col != curcol ) {

                                         std::cerr << "Overflowed column increment too large to store in this ICRS instance!" << std::endl;

                                         exit( 1 );

                                 }

                                 check_row += r_ind[ check_jumps++ ];

                                 assert( currow == check_row );

                                 prev_row = currow;

                         }

                         ds[ i ] = cur.value;

                         prev_col = curcol;


 #ifdef _DEBUG

                         std::cout << currow << "," << curcol << "(" << cur.value << ") maps to " << c_ind[ i ] << std::endl;

 #endif

                 }


                 //append with zeroes

                 for( unsigned long int i=this->nnz; i<allocsize; ++i ) {

                         c_ind[ i - 1 ] = 0;

                         ds[ i ] = 0;

                 }


                 //assert row jumps is equal to r_ind_temp's size

                 assert( check_jumps == r_ind_temp.size()-1 );


                 //clear temporary r_ind vector

                 r_ind_temp.clear();

         }


         virtual void getFirstIndexPair( ULI &row, ULI &col ) {

                 row = this->r_start;

                 col = this->c_start;

         }


         void getStartingPos( ULI &row_start, ULI &column_start ) {

                 row_start = this->r_start;

                 column_start = this->c_start;

         }


         void setStartingPos( const ULI row_start, const ULI column_start ) {

                 assert( row_start <= this->r_start );

                 assert( column_start <= this->c_start );

                 this->r_start = row_start;

                 this->c_start = column_start;

         }


         virtual void zxa( const T*__restrict__ pDataX, T*__restrict__ pDataZ ) {

                 if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;

                          T *__restrict__ pDataA    = ds;

                 const    T *__restrict__ pDataAend = ds + this->nnz;

                 //const    T *__restrict__ const pDataXend = pDataX + this->nor;

                 const T * const pDataZend = pDataZ + this->noc;

                 //const    T *pDataZend = z + nor; //unused


                 _i_value *__restrict__ pIncRow   = r_ind;

                 _i_value *__restrict__ pIncCol   = c_ind;


                 //go to first position

                 pDataZ += this->c_start;

                 pDataX += this->r_start;

                 while( pDataA < pDataAend ) {

                         while( pDataZ < pDataZend ) {

                                 *pDataZ += *pDataA * *pDataX;

                                 pDataA++;

                                 pDataZ  += *pIncCol;

                                 pIncCol++;

                         }

                         pDataZ -= this->noc;

                         pDataX += *pIncRow++;

                 }

         }


         virtual void zax( const T*__restrict__ pDataX, T*__restrict__ pDataZ ) {

                 if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;

                 //go to first position

                       T *__restrict__ pDataA    = ds;

                 const T * const       pDataAend = ds     + this->nnz;

                 const T * const       pDataXend = pDataX + this->noc;

 #ifndef NDEBUG

                 const T * const pDataXst  = pDataX;

                 const T * const pDataZst  = pDataZ;

                 const T * const pDataZend = pDataZ + this->nor;

 #endif


                 _i_value *__restrict__ pIncRow   = r_ind;

                 _i_value *__restrict__ pIncCol   = c_ind;


                 //go to first column

                 assert( r_start < this->nor );

                 assert( c_start < this->noc );

                 assert( this->nnz > 0 );

                 pDataX += c_start;

                 pDataZ += r_start;

                 while( pDataA < pDataAend ) {

                         while( pDataX < pDataXend ) {

                                 assert( pDataA < pDataAend );

                                 assert( pDataX >= pDataXst );

                                 assert( pDataX < pDataXend );

                                 assert( pDataZ >= pDataZst );

                                 assert( pDataZ < pDataZend );

                                 assert( pDataX + this->noc >= pDataXend ); //otherwise pDataX is before the start of x!

                                 assert( pDataA + this->nnz >= pDataAend );

                                 assert( pDataZ + this->nor >= pDataZend );


                                 *pDataZ += *pDataA++ * *pDataX;

                                  pDataX += *pIncCol++;

                         }

                         pDataX -= this->noc;

                         //jump to correct row

                         pDataZ += *pIncRow++;

                 }

         }


         template< size_t k >

         void ZaX( const T*__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z ) {


                 //catch boundary cases

                 if( SparseMatrix< T, ULI >::nor == 0 || SparseMatrix< T, ULI >::noc == 0 || SparseMatrix< T, ULI >::nnz == 0 ) return;


                 //go to first position

                 T *__restrict__ pDataA = ds;

                 const T *__restrict__ const pDataAend = ds   + SparseMatrix< T, ULI >::nnz;

                 const T *__restrict__ const pDataXend = X[0] + SparseMatrix< T, ULI >::noc;


                 //get data structure handles

                 _i_value *__restrict__ pIncRow   = r_ind;

                 _i_value *__restrict__ pIncCol   = c_ind;


                 //local buffer of pointers

                 const T *__restrict__ pDataX[ k ];

                       T *__restrict__ pDataZ[ k ];


                 //fill local pointer buffer

                 for( size_t s = 0; s < k; ++s ) {

                         pDataX[s] = X[s] + c_start;

                         pDataZ[s] = Z[s] + r_start;

                 }


                 //for each nonzero

                 while( pDataA < pDataAend ) {

                         //while on the same row

                         while( pDataX[0] < pDataXend ) {

                                 //do ICRS inner kernel on each of the k vectors

                                 for( size_t s = 0; s < k; ++s ) {

                                         *(pDataZ[s]) += *pDataA * *(pDataX[s]);

                                           pDataX[s]  += *pIncCol;

                                 }

                                 //go to next nonzero

                                 ++pDataA;

                                 ++pIncCol;

                         }

                         //for each of the k vectors

                         for( size_t s = 0; s < k; ++s ) {

                                 //jump back in range

                                 pDataX[s] -= SparseMatrix< T, ULI >::noc;

                                 //jump to correct row

                                 pDataZ[s] += *pIncRow;

                         }

                         //go to next row

                         ++pIncRow;

                 }

         }


         template< size_t k >

         void ZXa( const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z ) {

                 if( this->nor == 0 || this->noc == 0 || this->nnz == 0 ) return;

                          T *__restrict__ pDataA    = ds;

                 const    T *__restrict__ pDataAend = ds + this->nnz;

                 const T *__restrict__ const pDataZend = Z[0] + this->noc;


                 //for code comments, see the ZaX function


                 _i_value *__restrict__ pIncRow = r_ind;

                 _i_value *__restrict__ pIncCol = c_ind;


                 const T *__restrict__ pDataX[ k ];

                       T *__restrict__ pDataZ[ k ];


                 for( size_t s = 0; s < k; ++s ) {

                         pDataZ[s] = Z[s] + c_start;

                         pDataX[s] = X[s] + r_start;

                 }


                 while( pDataA < pDataAend ) {

                         while( pDataZ[0] < pDataZend ) {

                                 for( size_t s = 0; s < k; ++s ) {

                                         *(pDataZ[s]) += *pDataA * *(pDataX[s]);

                                           pDataZ[s]  += *pIncCol;

                                 }

                                 ++pDataA;

                                 ++pIncCol;

                         }

                         for( size_t s = 0; s < k; ++s ) {

                                 pDataZ[s] -= this->noc;

                                 pDataX[s] += *pIncRow;

                         }

                         ++pIncRow;

                 }

         }


         ~ICRS() {

                 if( ds    != NULL ) delete [] ds;

                 if( c_ind != NULL ) delete [] c_ind;

                 if( r_ind != NULL ) delete [] r_ind;

         }


         virtual size_t bytesUsed() {

                 return bytes;

         }


 };


 #endif


SparseMatrix< T, ULI >::nnz
ULI nnz
Number of non-zeros.
Definition: SparseMatrix.hpp:58

ICRS::r_start
ULI r_start
Start position, row.
Definition: ICRS.hpp:63

ICRS::ICRS
ICRS(std::string file, T zero=0)
Base constructor.
Definition: ICRS.hpp:104

ICRS
The incremental compressed row storage sparse matrix data structure.
Definition: ICRS.hpp:53

ICRS::ZaX
void ZaX(const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z)
Definition: ICRS.hpp:402

ICRS::bytesUsed
virtual size_t bytesUsed()
Function to query the amount of storage required by this sparse matrix.
Definition: ICRS.hpp:496

ICRS::getFirstIndexPair
virtual void getFirstIndexPair(ULI &row, ULI &col)
Returns the first nonzero index, per reference.
Definition: ICRS.hpp:295

Triplet::i
ULI i() const
Definition: Triplet.hpp:70

ICRS::zxa
virtual void zxa(const T *__restrict__ pDataX, T *__restrict__ pDataZ)
In-place z=xA function.
Definition: ICRS.hpp:327

ICRS::ICRS
ICRS(ICRS< T > &toCopy)
Copy constructor.
Definition: ICRS.hpp:130

ICRS::r_ind
_i_value * r_ind
Array containing the row jumps.
Definition: ICRS.hpp:72

SparseMatrix< T, ULI >::m
virtual unsigned long int m()
Queries the number of rows this matrix contains.
Definition: SparseMatrix.hpp:107

ICRS::compareTriplets
static int compareTriplets(const void *left, const void *right)
Comparison function used for sorting input data.
Definition: ICRS.hpp:83

ICRS::ZXa
void ZXa(const T *__restrict__ const *__restrict__ const X, T *__restrict__ const *__restrict__ const Z)
Definition: ICRS.hpp:453

SparseMatrix< T, ULI >::loadFromFile
void loadFromFile(const std::string file, const T zero=0)
Function which loads a matrix from a matrix market file.
Definition: SparseMatrix.hpp:89

SparseMatrix
Interface common to all sparse matrix storage schemes.
Definition: SparseMatrix.hpp:46

ICRS::c_ind
_i_value * c_ind
Array containing the column jumps.
Definition: ICRS.hpp:69

SparseMatrix< T, ULI >::noc
ULI noc
Number of columns.
Definition: SparseMatrix.hpp:55

ICRS::ICRS
ICRS(std::vector< Triplet< T > > &input, const ULI m, const ULI n, const T zero=0)
Constructor which transforms a collection of input triplets to CRS format.
Definition: ICRS.hpp:159

ICRS::bytes
size_t bytes
Remembers the number of bytes allocated.
Definition: ICRS.hpp:75

ICRS::ICRS
ICRS(const ULI number_of_nonzeros, const ULI number_of_rows, const ULI number_of_cols, T zero)
Base constructor which only initialises the internal arrays.
Definition: ICRS.hpp:118

Triplet::value
T value
Value stored at this triplet.
Definition: Triplet.hpp:95

ICRS::~ICRS
~ICRS()
Base deconstructor.
Definition: ICRS.hpp:490

ICRS::fillIn
static const size_t fillIn
Fill-in field for interoperability with vecBICRS.
Definition: ICRS.hpp:80

SparseMatrix< T, ULI >::nor
ULI nor
Number of rows.
Definition: SparseMatrix.hpp:52

ICRS::zax
virtual void zax(const T *__restrict__ pDataX, T *__restrict__ pDataZ)
In-place z=Ax function.
Definition: ICRS.hpp:359

SparseMatrix< T, ULI >::zero_element
T zero_element
The element considered to be zero.
Definition: SparseMatrix.hpp:63

Triplet::j
ULI j() const
Definition: Triplet.hpp:73

ICRS::ICRS
ICRS()
Base constructor.
Definition: ICRS.hpp:98

SparseMatrix< T, ULI >::n
virtual unsigned long int n()
Queries the number of columns this matrix contains.
Definition: SparseMatrix.hpp:115

ICRS::c_start
ULI c_start
Start position, column.
Definition: ICRS.hpp:66

ICRS::ds
T * ds
Array containing the actual nnz non-zeros.
Definition: ICRS.hpp:60

Triplet
A single triplet value.
Definition: Triplet.hpp:52

ICRS::setStartingPos
void setStartingPos(const ULI row_start, const ULI column_start)
Sets starting position of matrix multiplication.
Definition: ICRS.hpp:314

ICRS::getStartingPos
void getStartingPos(ULI &row_start, ULI &column_start)
Gets starting position (first nonzero location)
Definition: ICRS.hpp:301

ICRS::load
virtual void load(std::vector< Triplet< T > > &input, const ULI m, const ULI n, const T zero)
Definition: ICRS.hpp:164