RTGS.h

Go to the documentation of this file.
00001 /*
00002  *  Copyright (C) 2007  A.N. Yzelman
00003  *  Released under LGPL, see license.txt
00004  *
00005  *  Last modified at 12th of June, 2007, by A.N. Yzelman
00006  *
00007  *  RTGS.h: Definition of a Random TGS R-tree.
00008  *
00009  */
00010 
00011 #ifndef _H_RTGS
00012 #define _H_RTGS
00013 
00014 #include "basic-r-tree.h"
00015 #include <limits>
00016 
00018 #define _TIMINGS
00019 
00021 #define _DEF_SR 0.5
00022 
00025 #define  _EVALUATE_ALL
00026 
00027 /*If defined, shows progress on screen
00028 #define _PROGRESS*/
00029 
00030 #ifdef _TIMINGS
00031         #include <sys/times.h>
00032         struct tms RTIMING;
00033         clock_t RTIMER;
00034         clock_t Rintertimer;
00035         clock_t Rintertimersum = 0;
00036 #endif
00037 
00038 //#define _DEBUG
00039 
00045 template< typename base_tree >
00046 class RTGS_tree : public base_tree {
00047 
00048    private:
00049    
00055         virtual bool checkReady() const { return loaded; }
00056         
00064         //vector< base_tree* > bulkLoad( vector< BB_container* > *input );
00065         
00066    protected:
00067    
00069         bool loaded;
00070 
00072         unsigned int SKIP_PROBABILITY;
00073         
00077         vector< BB_container > cache;
00078    
00085         virtual vector< Ordering< BB_container > * > useOrderings() const;
00086 
00094         virtual double cost_function( const BB_type *x, const BB_type *y ) const;
00095         
00097         base_tree* BulkLoadChunk( vector< BB_container* >* D, unsigned int h );
00098 
00100         vector< vector< BB_container* >* >* BestBinarySplit( vector< BB_container* >* D, unsigned int m );
00101 
00103         vector< vector< BB_container* >* >* Partition( vector< BB_container* >* D, unsigned int m );
00104 
00105         
00106    public:
00107    
00109         RTGS_tree( double skip ) : base_tree() { loaded = false; this->deleteLeaves = false; SKIP_PROBABILITY = static_cast< unsigned int >( skip * RAND_MAX ); }
00110 
00112         RTGS_tree( R_tree_props *props, double skip ) : base_tree( props ) { loaded = false; this->deleteLeaves = false; SKIP_PROBABILITY = static_cast< unsigned int >( skip * RAND_MAX ); }
00113    
00115         RTGS_tree() : base_tree() { loaded = false; this->deleteLeaves = false; SKIP_PROBABILITY = static_cast< unsigned int >( _DEF_SR * RAND_MAX ); }
00116 
00118         RTGS_tree( R_tree_props *props ) : base_tree( props ) { loaded = false; this->deleteLeaves = false; SKIP_PROBABILITY = static_cast< unsigned int >( _DEF_SR * RAND_MAX ); }
00119         
00121         virtual ~RTGS_tree() {}
00122    
00128         virtual void ensureReady();
00129    
00131         void bulkLoad();
00132         
00142          virtual base_tree *insert( BB_container *element );
00143 
00153         virtual base_tree *remove( BB_container *element );
00154 
00164         virtual base_tree *search( BB_container *element );
00165 
00167         virtual vector< int > containedIn( BB_type box ) const;
00168 
00170         virtual vector< int > intersects( const vector< double > begin, const vector< double > end ) const;
00171 
00173         virtual vector< int > intersects( Point point ) const;
00174 
00176         virtual vector< int > intersects( vector<double> a, double b ) const;
00177 
00179         virtual vector< int > neighboursOf( BB_container *item ) const;
00180 
00181 };
00182 
00183 
00184 /***************************************************
00185  *                       PRIVATE                   *
00186  ***************************************************/
00187 
00188 template< typename base_tree >
00189 double RTGS_tree< base_tree >::cost_function( const BB_type *a, const BB_type *b ) const {
00190         return Current_Metric::Volume( *a ) + Current_Metric::Volume ( *b );
00191 }
00192 
00193 template< typename base_tree >
00194 void RTGS_tree< base_tree >::ensureReady() {
00195         if ( !loaded ) {
00196                 bulkLoad();
00197         }
00198 }
00199 
00200 template< typename base_tree >
00201 vector< Ordering< BB_container > * > RTGS_tree< base_tree >::useOrderings() const {
00202 
00203 #ifdef _EVALUATE_ALL
00204         return BB_container::getOrderings();
00205 #else
00206         vector< Ordering< BB_container > * > ret;
00207         ret.push_back( BB_container::getDefaultOrdering() );
00208         return ret;
00209 #endif
00210 }
00211 
00212 template< typename base_tree >
00213 base_tree* RTGS_tree< base_tree >::BulkLoadChunk( vector< BB_container* >* D, unsigned int h ) {
00214         
00215         if ( h==0 ) {
00216                 //calculate mbr
00217                 BB_type mbr;
00218                 typename vector< BB_container* >::iterator it = D->begin();
00219                 for( ; it!=D->end(); it++ )
00220                         mbr.unite( (*it) );
00221                         //mbr = (*it)->unite( mbr );
00222                 
00223                 //build leaf node
00224                 base_tree* ret = this->buildNode( *D, &mbr );
00225 
00226                 //remember leaf flag
00227                 ret->deleteLeaves = false;
00228                 
00229                 //delete used vector
00230                 //delete D;
00231                 
00232                 //return
00233                 return ret;
00234         } else {
00235                 vector< base_tree* > children;
00236                 
00237                 //calculate m = max^h
00238                 unsigned int m = this->properties->maximum;
00239                 for( unsigned int i=1; i<h; i++ )
00240                         m *= this->properties->maximum;
00241                 
00242                 //partition input into partitions containing <= m elements
00243                 vector< vector< BB_container* >* >* partitions = Partition( D, m );
00244 
00245                 //remember this mbr
00246                 BB_type master_mbr;
00247 
00248                 //recurse, and store internal node; loop over all partitions
00249                 typename vector< vector< BB_container* >* >::iterator partIt = partitions->begin();
00250                 for( ; partIt!=partitions->end(); partIt++ ) {
00251                 
00252                         //calculate the mbr of each partition
00253                         BB_type mbr;
00254                         typename vector< BB_container* >::iterator it = (*partIt)->begin();
00255                         for( ; it!=(*partIt)->end(); it++ ) {
00256                                 mbr.unite( (*it) );
00257                                 //mbr = (*it)->unite( mbr );
00258                         }
00259                         master_mbr.unite( &mbr );
00260                         
00261                         //store the R-tree partition in a node, by recursing on the partition itself
00262                         children.push_back( BulkLoadChunk( *partIt, h - 1 ) );
00263                 }
00264 
00265                 while( partitions->size() > 0 ) {
00266                         delete partitions->back();
00267                         partitions->pop_back();
00268                 }
00269 
00270                 //we are done with the partitions vector
00271                 delete partitions;
00272 
00273                 //check
00274                 if( children.size() > this->properties->maximum ) {
00275                         cerr << "Too many partitions returned! (" << children.size() << ")" << endl;
00276                         cerr << "k = |D|/m = " << ( (double) D->size() ) / ( (double) m ) << " \\neq " << children.size() << endl;
00277                         exit( 1 );
00278                 }
00279         
00280                 //nodes
00281                 base_tree* ret = buildNode( children, &master_mbr );
00282 
00283                 //leaves flag
00284                 ret->deleteLeaves = false;
00285 
00286                 //and return
00287                 return ret;
00288         }
00289 }
00290 
00291 template< typename base_tree >
00292 vector< vector< BB_container* >* >* RTGS_tree< base_tree >::Partition( vector< BB_container* >* D, unsigned int m ) {
00293 
00294 #ifdef _DEBUG
00295         cout << "Trying to split a vector containing " << D->size() << " elements into partitions containing " << m << " elements." << endl;
00296 #endif
00297 
00298         //allocate return vector
00299         vector< vector< BB_container* >* >* ret = new vector< vector< BB_container* >* >();
00300         
00301         //return a single partition if input size is smaller than partition size
00302         if ( D->size() <= m ) {
00303                 //copy for safety
00304                 vector< BB_container* >* toAdd = new vector< BB_container* >( D->begin(), D->end() );
00305                 ret->push_back( toAdd );
00306                 //ret->push_back( D );
00307         } else {
00308                 
00309                 //getting binary split
00310                 vector< vector< BB_container* >* >* binarySplit = BestBinarySplit( D, m );
00311                 
00312                 //concatenate results
00313                 vector< BB_container* >* b_left = binarySplit->at( 0 );
00314                 vector< BB_container* >* b_rght = binarySplit->at( 1 );
00315                 vector< vector< BB_container* >* >* left = Partition( b_left, m );
00316                 vector< vector< BB_container* >* >* rght = Partition( b_rght, m );
00317                 ret->assign( left->begin(), left->end() );
00318                 ret->insert( ret->end(), rght->begin(), rght->end() );
00319                 delete left;
00320                 delete rght;
00321                 delete b_left;
00322                 delete b_rght;
00323                 //delete binarySplit->at( 0 );
00324                 //delete binarySplit->at( 1 );
00325                 
00326                 //we are done with the binarySplit vector
00327                 delete binarySplit;
00328         }
00329 
00330 #ifdef _DEBUG
00331         cout << "Number of returned partitions: " << ret->size() << endl;
00332 #endif
00333 
00334         return ret;
00335 }
00336 
00337 template< typename base_tree >
00338 vector< vector< BB_container* >* >* RTGS_tree< base_tree >::BestBinarySplit( vector< BB_container* >* D, unsigned int m ) {
00339 
00340 #ifdef _DEBUG
00341         cout << "Trying to get a binary split of a vector containing " << D->size() << " elements." << endl;
00342 #endif
00343 
00344         vector< BB_container* >* left = new vector< BB_container* >();
00345         vector< BB_container* >* right = new vector< BB_container* >();
00346         BB_type left_mbr;
00347         BB_type right_mbr;
00348 
00349         //M = ceil( |D|/m )
00350         unsigned int M = D->size() / m;
00351         if ( ( (double) D->size() / (double) m ) - ( (double) M ) > 0 )
00352                 M++;
00353         
00354         double c = numeric_limits<double>::infinity();
00355         
00356         vector< Ordering< BB_container > * > orderings = useOrderings();
00357         vector< Ordering< BB_container > * >::iterator it = orderings.begin();
00358 
00359 #ifdef _TIMINGS
00360 #endif
00361                 
00362         for( ; it!=orderings.end(); it++ ) {
00363                 
00364                 if ( orderings.size() > 1 ) {
00365 #ifdef _TIMINGS
00366                         Rintertimer = times( &RTIMING );
00367 #endif
00368                         (*it)->sort( D );
00369 #ifdef _TIMINGS
00370                         Rintertimer = times( &RTIMING ) - Rintertimer;
00371                         Rintertimersum += Rintertimer;
00372 #endif
00373                 }
00374 
00375                 for ( unsigned int i = 0; i < M-1; i++ ) {
00376 
00377                         //decide if we skip this matrix entry
00378                         //cout << static_cast< unsigned int >( rand() ) << "<" << this->SKIP_PROBABILITY << endl;
00379                         if ( ( i > 0 ) && ( static_cast< unsigned int >( rand() ) <= this->SKIP_PROBABILITY ) ) {
00380                                 //cout << "SKIPping " << i << endl;
00381                                 continue;
00382                         }
00383 
00384                         //cout << "Going for " << i << endl;
00385 
00386 /*#ifdef _PROGRESS
00387                         cout << "\r" << i << "/" << ( M - 2 );
00388 #endif*/
00389 
00390                         //find split
00391                         vector< BB_container* >* set_one = new vector< BB_container* >();
00392                         vector< BB_container* >* set_two = new vector< BB_container* >();
00393 
00394                         //the following can be done since input is sorted at this point.
00395                         const unsigned int loc = ( i + 1 ) * m;
00396 
00397                         set_one->assign( D->begin(), D->begin() + loc );
00398                         set_two->assign( D->begin() + loc, D->end() );
00399                                 
00400                         //build mbrs
00401                         BB_type left_split;
00402                         BB_type right_split;
00403 
00404                         vector< BB_container* >::iterator itbb = set_one->begin();
00405                         for( ; itbb!=set_one->end(); itbb++ )
00406                                 left_split.unite( (*itbb) );
00407                                 //left_split = (*itbb)->unite( left_split );
00408                         itbb = set_two->begin();
00409                         for( ; itbb!=set_two->end(); itbb++ )
00410                                 right_split.unite( (*itbb) );
00411                                 //right_split = (*itbb)->unite( right_split );
00412 
00413                         //calculate cost
00414                         double t = cost_function( &left_split, &right_split );
00415                         //determine if minimum
00416                         if ( t < c ) {
00417                                 //set as minimum
00418                                 c = t;
00419                                 left->assign( set_one->begin(), set_one->end() );
00420                                 right->assign( set_two->begin(), set_two->end() );
00421 
00422 #ifdef _DEBUG
00423                                 cout << "Set two contains " << right->size() << "elements" << endl << "Setting set one MBR" << endl;
00424                                 cout << "Input set contained " << input->size() << " elements" << endl; 
00425                                 fflush( stdout );
00426 #endif
00427 
00428                                 left_mbr = left_split;
00429                                 right_mbr = right_split;
00430                         }
00431                         delete set_one;
00432                         delete set_two;
00433                 }
00434         }
00435 
00436 #ifdef _DEBUG   
00437         cout << "First vector has " << left->size() << " elements." << endl;
00438         cout << "Second vector has " << right->size() << " elements." << endl;
00439 #endif
00440 
00441         vector< vector< BB_container* >* >* ret = new vector< vector< BB_container* >* >();
00442         ret->resize( 2 );
00443         (*ret)[ 0 ] = left;
00444         (*ret)[ 1 ] = right;
00445         return ret;
00446 }
00447 
00448 template< typename base_tree >
00449 void RTGS_tree< base_tree >::bulkLoad() {
00450 
00451 #ifdef _TIMINGS
00452         RTIMER = times( &RTIMING ); 
00453 #endif
00454 
00455         if ( loaded ) {
00456                 cerr << "RTGS_tree::bulkLoad() called while already bulkloaded!" << endl;
00457                 exit( 1 );
00458         }
00459 
00460         if ( cache.size() == 0 ) {
00461                 cerr << "Error; tried to bulk load an empty set. Aborting..." << endl;
00462                 return;
00463         }
00464         
00465         vector< BB_container* > p_cache( cache.size() );
00466         for( unsigned int i = 0; i < cache.size(); i++ ){
00467                 p_cache[i] = &cache[i];
00468         }
00469         
00470         if ( useOrderings().size() == 1 ) {
00471 
00472 #ifdef _DEBUG
00473                 cout << "Entering sort... " << endl; fflush( stdout );
00474 #endif
00475                 useOrderings().at( 0 )->sort( &p_cache );
00476 
00477 #ifdef _TIMINGS
00478                 cout << "*Initial sort took: ";
00479                 RTIMER = times( &RTIMING ) - RTIMER;
00480                 cout << TIMER << " ticks" << endl; fflush( stdout );
00481 #endif
00482 #ifdef _DEBUG
00483                 cout << "Exitting sort... " << endl; fflush( stdout );
00484 #endif
00485 
00486         }
00487         
00488         
00489         double frac = p_cache.size() / this->properties->maximum;
00490         unsigned int h;
00491         if ( frac < 0 )
00492                 h = 0;
00493         else {
00494                 frac = log10( frac ) / log10( static_cast< double >( this->properties->maximum ) );
00495                 h = (unsigned int) frac;
00496                 if ( frac - h > 0 )
00497                         h++;
00498         }
00499 
00500 #ifdef _TIMINGS
00501         RTIMER = times( &RTIMING );
00502 #endif  
00503         base_tree* new_root = BulkLoadChunk( &p_cache, h );
00504 #ifdef _TIMINGS
00505         RTIMER = times( &RTIMING) - RTIMER;
00506         cout << "*Intermediate searches took a total of " << Rintertimersum << " ticks" << endl; fflush( stdout );
00507         cout << "*bulkLoading took : " << RTIMER << " ticks" << endl; fflush( stdout );
00508 #endif
00509         vector< base_tree* > new_children;
00510         new_children.push_back( new_root ); //quick hack, also, degenerate root handling should be handled in setChildren
00511         
00512         /*BB_type children_mbr;
00513         
00514         vector< base_tree* >::iterator it = new_children.begin();
00515         for( ; it!=new_children.end(); it++ )
00516                 children_mbr = (*it)->mbr.unite( children_mbr );
00517         
00518         //check for degenerate root
00519         if ( new_children.size() == 1 ) {
00520                 base_tree* walk = new_children[0];
00521                 elements = walk->elements;
00522                 children = walk->children;
00523                 properties = walk->properties; //actually kind of useless
00524                 //mbr = children_mbr;
00525                 updateMBRLocal();
00526         } else {*/
00527                 //simply set new children
00528                 //this->children = new_children;
00529                 this->setChildren( new_children, 0 );
00530                 //mbr = children_mbr;
00531                 this->updateMBRLocal();
00532         //}
00533 
00534         loaded = true;
00535         
00536         this->postBulkLoad();
00537 }
00538 
00539 template< typename base_tree >
00540 base_tree* RTGS_tree< base_tree >::insert( BB_container *element ) {
00541         if ( loaded ) {
00542                 return base_tree::insert( element );
00543         } else {
00544                 cache.push_back( BB_container( element ) );
00545                 delete element;
00546                 return this;
00547         }
00548 }
00549 
00550 template< typename base_tree >
00551 base_tree* RTGS_tree< base_tree >::remove( BB_container *element ) {
00552         if ( loaded ) {
00553                 return base_tree::remove( element );
00554         } else {
00555                 vector< BB_container >:: iterator it = cache.begin();
00556                 for( ; it!=cache.end(); it++ )
00557                         if ( &(*it) == element || (*it).getID() == element->getID() )
00558                                 cache.erase( it );
00559                 return this;
00560         }
00561 }
00562 
00563 template< typename base_tree >
00564 base_tree* RTGS_tree< base_tree >::search( BB_container *element ) {
00565         if ( loaded ) {
00566                 return base_tree::search( element );
00567         } else {
00568                 vector< BB_container >:: iterator it = cache.begin();
00569                 for( ; it!=cache.end(); it++ )
00570                         if ( &(*it) == element || (*it).getID() == element->getID() )
00571                                 return this;
00572                 return 0;
00573         }
00574 }
00575 
00576 template< typename base_tree >
00577 vector< int > RTGS_tree< base_tree >::containedIn( BB_type box ) const {
00578         if ( !checkReady() ) {
00579                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00580                 exit( 1 );
00581         }
00582         return base_tree::containedIn( box );
00583 }
00584 
00585 template< typename base_tree >
00586 vector< int > RTGS_tree< base_tree >::intersects( const vector< double > begin, const vector< double > end ) const {
00587         if ( !checkReady() ) {
00588                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00589                 exit( 1 );
00590         }
00591         return base_tree::intersects( begin, end );
00592 }
00593 
00594 template< typename base_tree >
00595 vector< int > RTGS_tree< base_tree >::intersects( Point point ) const {
00596         if ( !checkReady() ) {
00597                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00598                 exit( 1 );
00599         }
00600         return base_tree::intersects( point );
00601 }
00602 
00603 template< typename base_tree >
00604 vector< int > RTGS_tree< base_tree >::intersects( vector<double> a, double b ) const {
00605         if ( !checkReady() ) {
00606                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00607                 exit( 1 );
00608         }
00609         return base_tree::intersects( a, b );
00610 }
00611 
00612 template< typename base_tree >
00613 vector< int > RTGS_tree< base_tree >::neighboursOf( BB_container *item ) const {
00614         if ( !checkReady() ) {
00615                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00616                 exit( 1 );
00617         }
00618         return base_tree::neighboursOf( item );
00619 }
00620 
00621 #endif

Generated on Sat Oct 13 17:34:42 2007 for R-Tree by  doxygen 1.5.2