ALP User Documentation  0.8.preview
Algebraic Programming User Documentation
simple_pagerank.hpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright 2021 Huawei Technologies Co., Ltd.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
27 #ifndef _H_GRB_PAGERANK
28 #define _H_GRB_PAGERANK
29 
30 #include <graphblas.hpp>
31 
32 #ifndef _GRB_NO_STDIO
33 #include <iostream>
34 #endif
35 
36 
37 namespace grb {
38 
39  namespace algorithms {
40 
130  template<
132  typename IOType, typename NonzeroT
133  >
135  Vector< IOType > &pr,
136  const Matrix< NonzeroT > &L,
137  Vector< IOType > &pr_next,
138  Vector< IOType > &pr_nextnext,
139  Vector< IOType > &row_sum,
140  const IOType alpha = 0.85,
141  const IOType conv = 0.0000001,
142  const size_t max = 1000,
143  size_t * const iterations = nullptr,
144  double * const quality = nullptr
145  ) {
146  grb::Monoid<
149  > addM;
153  > realRing;
154 #ifdef _DEBUG
155  const auto s = spmd<>::pid();
156 #endif
157  const size_t n = nrows( L );
158  const IOType zero = realRing.template getZero< IOType >();
159 
160  // runtime sanity checks
161  {
162  if( n != ncols( L ) ) {
163  return ILLEGAL;
164  }
165  if( size( pr ) != n ) {
166  return MISMATCH;
167  }
168  if( size( pr ) != n ) {
169  return MISMATCH;
170  }
171  if( size( pr_next ) != n ||
172  size( pr_nextnext ) != n ||
173  size( row_sum ) != n
174  ) {
175  return MISMATCH;
176  }
177  if( capacity( pr ) != n ) {
178  return ILLEGAL;
179  }
180  if( capacity( pr_next ) != n ||
181  capacity( pr_nextnext ) != n ||
182  capacity( row_sum ) != n
183  ) {
184  return ILLEGAL;
185  }
186  // alpha must be within 0 and 1 (both exclusive)
187  if( alpha <= 0 || alpha >= 1 ) {
188  return ILLEGAL;
189  }
190  // max must be larger than 0
191  if( max <= 0 ) {
192  return ILLEGAL;
193  }
194  }
195 
196  // running error code
197  RC ret = SUCCESS;
198 
199  // make initial guess if the user did not make one
200  if( nnz( pr ) != n ) {
201  ret = set( pr, static_cast< IOType >( 1 ) / static_cast< IOType >( n ) );
202  assert( ret == SUCCESS );
203  }
204 
205  // initialise all temporary vectors to default dense values
206  ret = ret ? ret : set( pr_nextnext, zero );
207  assert( ret == SUCCESS );
208 
209  // calculate row sums
210  Semiring<
215  > pattern_ring;
216 
217  ret = ret ? ret : set( pr_next, 1 ); // abuses pr_next as temporary vector
218  ret = ret ? ret : set( row_sum, 0 );
219  ret = ret ? ret :
220  vxm< descr | descriptors::dense | descriptors::transpose_matrix >(
221  row_sum, pr_next, L, pattern_ring
222  );
223  // pr_next is now free for further use
224  assert( ret == SUCCESS );
225 
226 #ifdef _DEBUG
227  std::cout << "Prelude to iteration 0:\n";
228  (void) eWiseLambda(
229  [ &row_sum, &pr_next, &pr ]( const size_t i ) {
230  #pragma omp critical
231  {
232  std::cout << i << ": " << row_sum[ i ] << "\t" << pr[ i ] << "\t"
233  << pr_next[ i ] << "\n";
234  }
235  },
236  pr, pr_next, row_sum );
237 #endif
238 
239  // calculate in-place the inverse of row sums, and keep zero if dangling
240  // this eWiseLambda is always supported since alpha is read-only
241  ret = ret ? ret : eWiseLambda(
242  [ &row_sum, &alpha, &zero ]( const size_t i ) {
243  assert( row_sum[ i ] >= zero );
244  if( row_sum[ i ] > zero ) {
245  row_sum[ i ] = alpha / row_sum[ i ];
246  }
247  },
248  row_sum
249  );
250  assert( ret == SUCCESS );
251 
252 #ifdef _DEBUG
253  std::cout << "Row sum array:\n";
254  (void) eWiseLambda(
255  [ &row_sum ]( const size_t i ) {
256  #pragma omp critical
257  std::cout << i << ": " << row_sum[ i ] << "\n";
258  }, row_sum
259  );
260 #endif
261 
262  // some control variables
263  size_t iter = 0; //#iterations, initalise to zero
264  IOType dangling = zero; // used for caching the contribution of random jumps
265  // from dangling nodes
266  IOType residual = zero; // declare residual (computed inside the do-while
267  // loop)
268 
269  // main loop
270  do {
271  // reset iteration-local values
272  residual = dangling = 0;
273 
274 #ifdef _DEBUG
275  std::cout << "Current PR array:\n";
276  (void) eWiseLambda(
277  [ &pr ]( const size_t i ) {
278  #pragma omp critical
279  if( i < 8 ) {
280  std::cout << i << ": " << pr[ i ] << "\n";
281  }
282  }, pr
283  );
284 #endif
285 
286  // calculate dangling factor and do scaling
287  if( ret == SUCCESS ) {
288  // can we reduce via lambdas?
290  // yes we can, so save one unnecessary stream on pr
291  ret = eWiseLambda(
292  [ &pr_next, &row_sum, &dangling, &pr ]( const size_t i ) {
293  // calculate dangling contribution
294  if( row_sum[ i ] == 0 ) {
295  dangling += pr[ i ];
296  pr_next[ i ] = 0;
297  } else {
298  // pre-scale input
299  pr_next[ i ] = pr[ i ] * row_sum[ i ];
300  }
301  }, row_sum, pr, pr_next
302  );
303  // allreduce dangling factor
304  assert( ret == SUCCESS );
305  ret = ret ? ret : grb::collectives<>::allreduce(
306  dangling,
308  );
309  assert( ret == SUCCESS );
310  } else {
311  // otherwise we have to handle the reduction separately
312  ret = foldl< grb::descriptors::invert_mask >(
313  dangling, pr, row_sum, addM
314  );
315  assert( ret == SUCCESS );
316 
317  // separately from the element-wise multiplication here
318  ret = ret ? ret : set( pr_next, 0 );
319  ret = ret ? ret : eWiseApply(
320  pr_next, pr, row_sum,
322  );
323  assert( ret == SUCCESS );
324  }
325  }
326 
327 #if defined _DEBUG && defined _GRB_WITH_LPF
328  for( size_t dbg = 0; dbg < spmd<>::nprocs(); ++dbg ) {
329  const auto s = spmd<>::pid();
330  if( dbg == s ) {
331  std::cout << "Next PR array (under construction):\n";
332  eWiseLambda(
333  [ &pr_next, s ]( const size_t i ) {
334  #pragma omp critical
335  if( i < 10 ) {
336  std::cout << i << ", " << s << ": " << pr_next[ i ] << "\n";
337  }
338  },
339  pr_next );
340  }
341  spmd<>::sync();
342  }
343 #endif
344 
345  if( ret == SUCCESS ) {
346 #ifdef _DEBUG
347  std::cout << s << ": dangling (1) = " << dangling << "\n";
348 #endif
349 
350  // complete dangling factor
351  dangling = ( alpha * dangling + 1 - alpha ) / static_cast< IOType >( n );
352 
353 #ifdef _DEBUG
354  std::cout << s << ": dangling (2) = " << dangling << "\n";
355 #endif
356  }
357 
358  // multiply with row-normalised link matrix (no change to dangling rows)
359  // note that the later eWiseLambda requires the output be dense
360  ret = ret ? ret : set( pr_nextnext, 0 ); assert( ret == SUCCESS );
361  ret = ret ? ret : vxm< descr >( pr_nextnext, pr_next, L, realRing );
362  assert( ret == SUCCESS );
363  assert( n == grb::nnz( pr_nextnext ) );
364 
365 #if defined _DEBUG && defined _GRB_WITH_LPF
366  for( size_t dbg = 0; dbg < spmd<>::nprocs(); ++dbg ) {
367  if( dbg == s ) {
368  std::cout << s << ": nextnext PR array (after vxm):\n";
369  (void) eWiseLambda(
370  [ &pr_nextnext, s ]( const size_t i ) {
371  #pragma omp critical
372  if( i < 10 )
373  std::cout << i << ", " << s << ": " << pr_nextnext[ i ] << "\n";
374  }, pr_nextnext
375  );
376  }
377  (void) spmd<>::sync();
378  }
379  for( size_t k = 0; k < spmd<>::nprocs(); ++k ) {
380  if( spmd<>::pid() == k ) {
381  std::cout << "old pr \t scaled input \t alpha * pr * H at PID "
382  << k << "\n";
383  (void) eWiseLambda(
384  [ &pr, &pr_next, &pr_nextnext ]( const size_t i ) {
385  #pragma omp critical
386  {
387  std::cout << pr[ i ] << "\t" << pr_next[ i ] << "\t"
388  << pr_nextnext[ i ] << "\n";
389  }
390  }, pr, pr_next, pr_nextnext
391  );
392  }
393  (void) spmd<>::sync();
394  }
395 #endif
396 
397  // calculate & normalise new pr and calculate residual
398  if( ret == SUCCESS ) {
399  // can we reduce via lambdas?
401  // yes, we can. So update pr[ i ] and calculate residual simultaneously
402  ret = eWiseLambda(
403  [ &pr, &pr_nextnext, &dangling, &residual, &zero ]( const size_t i ) {
404  // cache old pagerank vector
405  const IOType oldval = pr[ i ];
406  // set new pagerank vector
407  pr[ i ] = pr_nextnext[ i ] + dangling;
408  // update residual
409  if( oldval > pr[ i ] ) {
410  residual += oldval - pr[ i ];
411  } else {
412  residual += pr[ i ] - oldval;
413  }
414  pr_nextnext[ i ] = zero;
415  }, pr, pr_nextnext
416  );
417  // reduce process-local residual
418  assert( ret == SUCCESS );
419  if( ret == SUCCESS ) {
421  residual,
423  );
424  }
425  assert( ret == SUCCESS );
426  } else {
427  // we cannot reduce via lambdas, so calculate new pr vector
428  ret = foldl< descriptors::dense >( pr_nextnext, dangling, addM );
429  assert( ret == SUCCESS );
430  // do a dot product under the one-norm ``ring''
431  if( ret == SUCCESS ) {
432  residual = zero;
433  ret = dot< descriptors::dense >(
434  residual,
435  pr, pr_nextnext,
437  );
438  assert( ret == SUCCESS );
439  }
440  if( ret == SUCCESS ) {
441  // next pr vector becomes current pr vector
442  std::swap( pr, pr_nextnext );
443  }
444  }
445  }
446 
447  // update iteration count
448  ++iter;
449 
450  // check convergence
451  if( conv != zero && residual <= conv ) { break; }
452 
453 #ifdef _DEBUG
454  if( grb::spmd<>::pid() == 0 ) {
455  std::cout << "Iteration " << iter << ", "
456  << "residual = " << residual << std::endl;
457  }
458 #endif
459 
460  } while( ret == SUCCESS && iter < max );
461 
462  // check if the user requested any stats, and output if yes
463  if( iterations != nullptr ) {
464  *iterations = iter;
465  }
466  if( quality != nullptr ) {
467  *quality = residual;
468  }
469 
470  // return the appropriate exit code
471  if( ret != SUCCESS ) {
472  if( spmd<>::pid() == 0 ) {
473  std::cerr << "Error while running simple pagerank algorithm: "
474  << toString( ret ) << "\n";
475  }
476  return ret;
477  } else if( residual <= conv ) {
478 #ifdef _DEBUG
479  if( spmd<>::pid() == 0 ) {
480  std::cerr << "Info: simple pagerank converged after " << iter
481  << " iterations.\n";
482  }
483 #endif
484  return SUCCESS; // converged!
485  } else {
486 #ifdef _DEBUG
487  if( spmd<>::pid() == 0 ) {
488  std::cout << "Info: simple pagerank did not converge after "
489  << iter << " iterations.\n";
490  }
491 #endif
492  return FAILED; // not converged
493  }
494  }
495 
496  } // namespace algorithms
497 
498 } // namespace grb
499 
500 #endif // end _H_GRB_PAGERANK
501 
RC simple_pagerank(Vector< IOType > &pr, const Matrix< NonzeroT > &L, Vector< IOType > &pr_next, Vector< IOType > &pr_nextnext, Vector< IOType > &row_sum, const IOType alpha=0.85, const IOType conv=0.0000001, const size_t max=1000, size_t *const iterations=nullptr, double *const quality=nullptr)
The canonical PageRank algorithm.
Definition: simple_pagerank.hpp:134
RC eWiseApply(Vector< OutputType, backend, Coords > &z, const InputType1 alpha, const InputType2 beta, const OP &op=OP(), const Phase &phase=EXECUTE, const typename std::enable_if< !grb::is_object< OutputType >::value &&!grb::is_object< InputType1 >::value &&!grb::is_object< InputType2 >::value &&grb::is_operator< OP >::value, void >::type *const =nullptr)
Computes , out of place, operator version.
Definition: blas1.hpp:208
RC set(Vector< DataType, backend, Coords > &x, const T val, const Phase &phase=EXECUTE, const typename std::enable_if< !grb::is_object< DataType >::value &&!grb::is_object< T >::value, void >::type *const =nullptr) noexcept
Sets all elements of a vector to the given value.
Definition: io.hpp:858
Standard identity for numerical addition.
Definition: identities.hpp:57
A call to a primitive has determined that one of its arguments was illegal as per the specification o...
Definition: rc.hpp:143
An ALP/GraphBLAS matrix.
Definition: matrix.hpp:72
RC
Return codes of ALP primitives.
Definition: rc.hpp:47
A GraphBLAS vector.
Definition: vector.hpp:64
Standard identity for numerical multiplication.
Definition: identities.hpp:79
size_t nnz(const Vector< DataType, backend, Coords > &x) noexcept
Request the number of nonzeroes in a given vector.
Definition: io.hpp:479
static constexpr Descriptor no_operation
Indicates no additional pre- or post-processing on any of the GraphBLAS function arguments.
Definition: descriptors.hpp:63
This operator multiplies the two input parameters and writes the result to the output variable.
Definition: ops.hpp:208
unsigned int Descriptor
Descriptors indicate pre- or post-processing for some or all of the arguments to an ALP/GraphBLAS cal...
Definition: descriptors.hpp:54
size_t nrows(const Matrix< InputType, backend, RIT, CIT, NIT > &A) noexcept
Requests the row size of a given matrix.
Definition: io.hpp:286
static RC allreduce(IOType &inout, const Operator op=Operator())
Schedules an allreduce operation of a single object of type IOType per process.
Definition: collectives.hpp:121
This operator assigns the left-hand input if the right-hand input evaluates true.
Definition: ops.hpp:85
size_t ncols(const Matrix< InputType, backend, RIT, CIT, NIT > &A) noexcept
Requests the column size of a given matrix.
Definition: io.hpp:339
static size_t pid() noexcept
Definition: spmd.hpp:61
Indicates when one of the grb::algorithms has failed to achieve its intended result,...
Definition: rc.hpp:154
Standard identity for the logical AND operator.
Definition: identities.hpp:178
Collection of various properties on the given ALP/GraphBLAS backend.
Definition: properties.hpp:52
This operator takes the sum of the two input parameters and writes it to the output variable.
Definition: ops.hpp:175
The ALP/GraphBLAS namespace.
Definition: graphblas.hpp:477
The main header to include in order to use the ALP/GraphBLAS API.
For backends that support multiple user processes this class defines some basic primitives to support...
Definition: spmd.hpp:51
RC eWiseLambda(const Func f, const Vector< DataType, backend, Coords > &x, Args...)
Executes an arbitrary element-wise user-defined function f on any number of vectors of equal length.
Definition: blas1.hpp:3746
size_t size(const Vector< DataType, backend, Coords > &x) noexcept
Request the size of a given vector.
Definition: io.hpp:235
Indicates the primitive has executed successfully.
Definition: rc.hpp:54
size_t capacity(const Vector< InputType, backend, Coords > &x) noexcept
Queries the capacity of the given ALP/GraphBLAS container.
Definition: io.hpp:388
A generalised semiring.
Definition: semiring.hpp:190
One or more of the ALP/GraphBLAS objects passed to the primitive that returned this error have mismat...
Definition: rc.hpp:90
This operator returns the absolute difference between two numbers.
Definition: ops.hpp:541
std::string toString(const RC code)
A generalised monoid.
Definition: monoid.hpp:54