TGS.h

Go to the documentation of this file.
00001 /*
00002  *  Copyright (C) 2007  A.N. Yzelman
00003  *  Released under LGPL, see license.txt
00004  *
00005  *  Last modified at 12th of June, 2007, by A.N. Yzelman
00006  *
00007  *  TGS.h: Definition of a bulk-loaded R-tree.
00008  *
00009  */
00010 
00011 #ifndef _H_TGS
00012 #define _H_TGS
00013 
00014 #include "basic-r-tree.h"
00015 #include <limits>
00016 
00018 #define _TIMINGS
00019 
00022 #define  _EVALUATE_ALL
00023 
00024 /*If defined, shows progress on screen
00025 #define _PROGRESS*/
00026 
00027 #ifdef _TIMINGS
00028         #include <sys/times.h>
00029         struct tms TIMING;
00030         clock_t TIMER;
00031         clock_t intertimer;
00032         clock_t intertimersum = 0;
00033 #endif
00034 
00035 //#define _DEBUG
00036 
00042 template< typename base_tree >
00043 class TGS_tree : public base_tree {
00044 
00045    private:
00046    
00052         virtual bool checkReady() const { return loaded; }
00053         
00061         //vector< base_tree* > bulkLoad( vector< BB_container* > *input );
00062         
00063    protected:
00064    
00066         bool loaded;
00067         
00071         vector< BB_container > cache;
00072    
00079         virtual vector< Ordering< BB_container > * > useOrderings() const;
00080 
00088         virtual double cost_function( const BB_type *x, const BB_type *y ) const;
00089         
00091         base_tree* BulkLoadChunk( vector< BB_container* >* D, unsigned int h );
00092 
00094         vector< vector< BB_container* >* >* BestBinarySplit( vector< BB_container* >* D, unsigned int m );
00095 
00097         vector< vector< BB_container* >* >* Partition( vector< BB_container* >* D, unsigned int m );
00098 
00099         
00100    public:
00101    
00103    TGS_tree() : base_tree() { loaded = false; this->deleteLeaves = false; }
00104 
00106    TGS_tree( R_tree_props *props ) : base_tree( props ) { loaded = false; this->deleteLeaves = false; }
00107         
00109    virtual ~TGS_tree() {}
00110    
00116         virtual void ensureReady();
00117    
00119         void bulkLoad();
00120         
00130          virtual base_tree *insert( BB_container *element );
00131 
00141         virtual base_tree *remove( BB_container *element );
00142 
00152         virtual base_tree *search( BB_container *element );
00153 
00155         virtual vector< int > containedIn( BB_type box ) const;
00156 
00158         virtual vector< int > intersects( const vector< double > begin, const vector< double > end ) const;
00159 
00161         virtual vector< int > intersects( Point point ) const;
00162 
00164         virtual vector< int > intersects( vector<double> a, double b ) const;
00165 
00167         virtual vector< int > neighboursOf( BB_container *item ) const;
00168 
00169 };
00170 
00171 
00172 /***************************************************
00173  *                       PRIVATE                   *
00174  ***************************************************/
00175 
00176 template< typename base_tree >
00177 double TGS_tree< base_tree >::cost_function( const BB_type *a, const BB_type *b ) const {
00178         return Current_Metric::Volume( *a ) + Current_Metric::Volume ( *b );
00179 }
00180 
00181 template< typename base_tree >
00182 void TGS_tree< base_tree >::ensureReady() {
00183         if ( !loaded ) {
00184                 bulkLoad();
00185         }
00186 }
00187 
00188 template< typename base_tree >
00189 vector< Ordering< BB_container > * > TGS_tree< base_tree >::useOrderings() const {
00190 
00191 #ifdef _EVALUATE_ALL
00192         return BB_container::getOrderings();
00193 #else
00194         vector< Ordering< BB_container > * > ret;
00195         ret.push_back( BB_container::getDefaultOrdering() );
00196         return ret;
00197 #endif
00198 }
00199 
00200 template< typename base_tree >
00201 base_tree* TGS_tree< base_tree >::BulkLoadChunk( vector< BB_container* >* D, unsigned int h ) {
00202         
00203         if ( h==0 ) {
00204                 //calculate mbr
00205                 BB_type mbr;
00206                 typename vector< BB_container* >::iterator it = D->begin();
00207                 for( ; it!=D->end(); it++ )
00208                         mbr.unite( (*it) );
00209                         //mbr = (*it)->unite( mbr );
00210                 
00211                 //build leaf node
00212                 base_tree* ret = this->buildNode( *D, &mbr );
00213 
00214                 //remember leaf flag
00215                 ret->deleteLeaves = false;
00216                 
00217                 //delete used vector
00218                 //delete D;
00219                 
00220                 //return
00221                 return ret;
00222         } else {
00223                 vector< base_tree* > children;
00224                 
00225                 //calculate m = max^h
00226                 unsigned int m = this->properties->maximum;
00227                 for( unsigned int i=1; i<h; i++ )
00228                         m *= this->properties->maximum;
00229                 
00230                 //partition input into partitions containing <= m elements
00231                 vector< vector< BB_container* >* >* partitions = Partition( D, m );
00232 
00233                 //remember this mbr
00234                 BB_type master_mbr;
00235 
00236                 //recurse, and store internal node; loop over all partitions
00237                 typename vector< vector< BB_container* >* >::iterator partIt = partitions->begin();
00238                 for( ; partIt!=partitions->end(); partIt++ ) {
00239                 
00240                         //calculate the mbr of each partition
00241                         BB_type mbr;
00242                         typename vector< BB_container* >::iterator it = (*partIt)->begin();
00243                         for( ; it!=(*partIt)->end(); it++ ) {
00244                                 mbr.unite( (*it) );
00245                                 //mbr = (*it)->unite( mbr );
00246                         }
00247                         master_mbr.unite( &mbr );
00248                         
00249                         //store the R-tree partition in a node, by recursing on the partition itself
00250                         children.push_back( BulkLoadChunk( *partIt, h - 1 ) );
00251                 }
00252 
00253                 while( partitions->size() > 0 ) {
00254                         delete partitions->back();
00255                         partitions->pop_back();
00256                 }
00257 
00258                 //we are done with the partitions vector
00259                 delete partitions;
00260 
00261                 //check
00262                 if( children.size() > this->properties->maximum ) {
00263                         cerr << "Too many partitions returned! (" << children.size() << ")" << endl;
00264                         cerr << "k = |D|/m = " << ( (double) D->size() ) / ( (double) m ) << " \\neq " << children.size() << endl;
00265                         exit( 1 );
00266                 }
00267         
00268                 //nodes
00269                 base_tree* ret = buildNode( children, &master_mbr );
00270 
00271                 //leaves flag
00272                 ret->deleteLeaves = false;
00273 
00274                 //and return
00275                 return ret;
00276         }
00277 }
00278 
00279 template< typename base_tree >
00280 vector< vector< BB_container* >* >* TGS_tree< base_tree >::Partition( vector< BB_container* >* D, unsigned int m ) {
00281 
00282 #ifdef _DEBUG
00283         cout << "Trying to split a vector containing " << D->size() << " elements into partitions containing " << m << " elements." << endl;
00284 #endif
00285 
00286         //allocate return vector
00287         vector< vector< BB_container* >* >* ret = new vector< vector< BB_container* >* >();
00288         
00289         //return a single partition if input size is smaller than partition size
00290         if ( D->size() <= m ) {
00291                 //copy for safety
00292                 vector< BB_container* >* toAdd = new vector< BB_container* >( D->begin(), D->end() );
00293                 ret->push_back( toAdd );
00294                 //ret->push_back( D );
00295         } else {
00296                 
00297                 //getting binary split
00298                 vector< vector< BB_container* >* >* binarySplit = BestBinarySplit( D, m );
00299                 
00300                 //concatenate results
00301                 vector< BB_container* >* b_left = binarySplit->at( 0 );
00302                 vector< BB_container* >* b_rght = binarySplit->at( 1 );
00303                 vector< vector< BB_container* >* >* left = Partition( b_left, m );
00304                 vector< vector< BB_container* >* >* rght = Partition( b_rght, m );
00305                 ret->assign( left->begin(), left->end() );
00306                 ret->insert( ret->end(), rght->begin(), rght->end() );
00307                 delete left;
00308                 delete rght;
00309                 delete b_left;
00310                 delete b_rght;
00311                 //delete binarySplit->at( 0 );
00312                 //delete binarySplit->at( 1 );
00313                 
00314                 //we are done with the binarySplit vector
00315                 delete binarySplit;
00316         }
00317 
00318 #ifdef _DEBUG
00319         cout << "Number of returned partitions: " << ret->size() << endl;
00320 #endif
00321 
00322         return ret;
00323 }
00324 
00325 template< typename base_tree >
00326 vector< vector< BB_container* >* >* TGS_tree< base_tree >::BestBinarySplit( vector< BB_container* >* D, unsigned int m ) {
00327 
00328 #ifdef _DEBUG
00329         cout << "Trying to get a binary split of a vector containing " << D->size() << " elements." << endl;
00330 #endif
00331 
00332         vector< BB_container* >* left = new vector< BB_container* >();
00333         vector< BB_container* >* right = new vector< BB_container* >();
00334         BB_type left_mbr;
00335         BB_type right_mbr;
00336 
00337         //M = ceil( |D|/m )
00338         unsigned int M = D->size() / m;
00339         if ( ( (double) D->size() / (double) m ) - ( (double) M ) > 0 )
00340                 M++;
00341         
00342         double c = numeric_limits<double>::infinity();
00343         
00344         vector< Ordering< BB_container > * > orderings = useOrderings();
00345         vector< Ordering< BB_container > * >::iterator it = orderings.begin();
00346 
00347 #ifdef _TIMINGS
00348 #endif
00349                 
00350         for( ; it!=orderings.end(); it++ ) {
00351                 
00352                 if ( orderings.size() > 1 ) {
00353 #ifdef _TIMINGS
00354                         intertimer = times( &TIMING );
00355 #endif
00356                         (*it)->sort( D );
00357 #ifdef _TIMINGS
00358                         intertimer = times( &TIMING ) - intertimer;
00359                         intertimersum += intertimer;
00360 #endif
00361                 }
00362                         
00363                 for ( unsigned int i = 0; i < M-1; i++ ) {
00364 
00365 /*#ifdef _PROGRESS
00366                         cout << "\r" << i << "/" << ( M - 2 );
00367 #endif*/
00368 
00369                         //find split
00370                         vector< BB_container* >* set_one = new vector< BB_container* >();
00371                         vector< BB_container* >* set_two = new vector< BB_container* >();
00372 
00373                         //the following can be done since input is sorted at this point.
00374                         const unsigned int loc = ( i + 1 ) * m;
00375 
00376                         set_one->assign( D->begin(), D->begin() + loc );
00377                         set_two->assign( D->begin() + loc, D->end() );
00378                                 
00379                         //build mbrs
00380                         BB_type left_split;
00381                         BB_type right_split;
00382 
00383                         vector< BB_container* >::iterator itbb = set_one->begin();
00384                         for( ; itbb!=set_one->end(); itbb++ )
00385                                 left_split.unite( (*itbb) );
00386                                 //left_split = (*itbb)->unite( left_split );
00387                         itbb = set_two->begin();
00388                         for( ; itbb!=set_two->end(); itbb++ )
00389                                 right_split.unite( (*itbb) );
00390                                 //right_split = (*itbb)->unite( right_split );
00391 
00392                         //calculate cost
00393                         double t = cost_function( &left_split, &right_split );
00394                         //determine if minimum
00395                         if ( t < c ) {
00396                                 //set as minimum
00397                                 c = t;
00398                                 left->assign( set_one->begin(), set_one->end() );
00399                                 right->assign( set_two->begin(), set_two->end() );
00400 
00401 #ifdef _DEBUG
00402                                 cout << "Set two contains " << right->size() << "elements" << endl << "Setting set one MBR" << endl;
00403                                 cout << "Input set contained " << input->size() << " elements" << endl; 
00404                                 fflush( stdout );
00405 #endif
00406 
00407                                 left_mbr = left_split;
00408                                 right_mbr = right_split;
00409                         }
00410                         delete set_one;
00411                         delete set_two;
00412                 }
00413         }
00414 
00415 #ifdef _DEBUG   
00416         cout << "First vector has " << left->size() << " elements." << endl;
00417         cout << "Second vector has " << right->size() << " elements." << endl;
00418 #endif
00419 
00420         vector< vector< BB_container* >* >* ret = new vector< vector< BB_container* >* >();
00421         ret->resize( 2 );
00422         (*ret)[ 0 ] = left;
00423         (*ret)[ 1 ] = right;
00424         return ret;
00425 }
00426 
00427 template< typename base_tree >
00428 void TGS_tree< base_tree >::bulkLoad() {
00429 
00430 #ifdef _TIMINGS
00431         TIMER = times( &TIMING ); 
00432 #endif
00433 
00434         if ( loaded ) {
00435                 cerr << "TGS_tree::bulkLoad() called while already bulkloaded!" << endl;
00436                 exit( 1 );
00437         }
00438 
00439         if ( cache.size() == 0 ) {
00440                 cerr << "Error; tried to bulk load an empty set. Aborting..." << endl;
00441                 return;
00442         }
00443         
00444         vector< BB_container* > p_cache( cache.size() );
00445         for( unsigned int i = 0; i < cache.size(); i++ ){
00446                 p_cache[i] = &cache[i];
00447         }
00448         
00449         if ( useOrderings().size() == 1 ) {
00450 
00451 #ifdef _DEBUG
00452                 cout << "Entering sort... " << endl; fflush( stdout );
00453 #endif
00454                 useOrderings().at( 0 )->sort( &p_cache );
00455 
00456 #ifdef _TIMINGS
00457                 cout << "*Initial sort took: ";
00458                 TIMER = times( &TIMING ) - TIMER;
00459                 cout << TIMER << " ticks" << endl; fflush( stdout );
00460 #endif
00461 #ifdef _DEBUG
00462                 cout << "Exitting sort... " << endl; fflush( stdout );
00463 #endif
00464 
00465         }
00466         
00467         
00468         double frac = p_cache.size() / this->properties->maximum;
00469         unsigned int h;
00470         if ( frac < 0 )
00471                 h = 0;
00472         else {
00473                 frac = log10( frac ) / log10( static_cast< double >( this->properties->maximum ) );
00474                 h = (unsigned int) frac;
00475                 if ( frac - h > 0 )
00476                         h++;
00477         }
00478 
00479 #ifdef _TIMINGS
00480         TIMER = times( &TIMING );
00481 #endif  
00482         base_tree* new_root = BulkLoadChunk( &p_cache, h );
00483 #ifdef _TIMINGS
00484         TIMER = times( &TIMING) - TIMER;
00485         cout << "*Intermediate searches took a total of " << intertimersum << " ticks" << endl; fflush( stdout );
00486         cout << "*bulkLoading took : " << TIMER << " ticks" << endl; fflush( stdout );
00487 #endif
00488         vector< base_tree* > new_children;
00489         new_children.push_back( new_root ); //quick hack, also, degenerate root handling should be handled in setChildren
00490         
00491         /*BB_type children_mbr;
00492         
00493         vector< base_tree* >::iterator it = new_children.begin();
00494         for( ; it!=new_children.end(); it++ )
00495                 children_mbr = (*it)->mbr.unite( children_mbr );
00496         
00497         //check for degenerate root
00498         if ( new_children.size() == 1 ) {
00499                 base_tree* walk = new_children[0];
00500                 elements = walk->elements;
00501                 children = walk->children;
00502                 properties = walk->properties; //actually kind of useless
00503                 //mbr = children_mbr;
00504                 updateMBRLocal();
00505         } else {*/
00506                 //simply set new children
00507                 //this->children = new_children;
00508                 this->setChildren( new_children, 0 );
00509                 //mbr = children_mbr;
00510                 this->updateMBRLocal();
00511         //}
00512 
00513         loaded = true;
00514         
00515         this->postBulkLoad();
00516 }
00517 
00518 template< typename base_tree >
00519 base_tree* TGS_tree< base_tree >::insert( BB_container *element ) {
00520         if ( loaded ) {
00521                 return base_tree::insert( element );
00522         } else {
00523                 cache.push_back( BB_container( element ) );
00524                 delete element;
00525                 return this;
00526         }
00527 }
00528 
00529 template< typename base_tree >
00530 base_tree* TGS_tree< base_tree >::remove( BB_container *element ) {
00531         if ( loaded ) {
00532                 return base_tree::remove( element );
00533         } else {
00534                 vector< BB_container >:: iterator it = cache.begin();
00535                 for( ; it!=cache.end(); it++ )
00536                         if ( &(*it) == element || (*it).getID() == element->getID() )
00537                                 cache.erase( it );
00538                 return this;
00539         }
00540 }
00541 
00542 template< typename base_tree >
00543 base_tree* TGS_tree< base_tree >::search( BB_container *element ) {
00544         if ( loaded ) {
00545                 return base_tree::search( element );
00546         } else {
00547                 vector< BB_container >:: iterator it = cache.begin();
00548                 for( ; it!=cache.end(); it++ )
00549                         if ( &(*it) == element || (*it).getID() == element->getID() )
00550                                 return this;
00551                 return 0;
00552         }
00553 }
00554 
00555 template< typename base_tree >
00556 vector< int > TGS_tree< base_tree >::containedIn( BB_type box ) const {
00557         if ( !checkReady() ) {
00558                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00559                 exit( 1 );
00560         }
00561         return base_tree::containedIn( box );
00562 }
00563 
00564 template< typename base_tree >
00565 vector< int > TGS_tree< base_tree >::intersects( const vector< double > begin, const vector< double > end ) const {
00566         if ( !checkReady() ) {
00567                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00568                 exit( 1 );
00569         }
00570         return base_tree::intersects( begin, end );
00571 }
00572 
00573 template< typename base_tree >
00574 vector< int > TGS_tree< base_tree >::intersects( Point point ) const {
00575         if ( !checkReady() ) {
00576                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00577                 exit( 1 );
00578         }
00579         return base_tree::intersects( point );
00580 }
00581 
00582 template< typename base_tree >
00583 vector< int > TGS_tree< base_tree >::intersects( vector<double> a, double b ) const {
00584         if ( !checkReady() ) {
00585                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00586                 exit( 1 );
00587         }
00588         return base_tree::intersects( a, b );
00589 }
00590 
00591 template< typename base_tree >
00592 vector< int > TGS_tree< base_tree >::neighboursOf( BB_container *item ) const {
00593         if ( !checkReady() ) {
00594                 cerr << "Trying to query while datastructure was not ready for access!" << endl;
00595                 exit( 1 );
00596         }
00597         return base_tree::neighboursOf( item );
00598 }
00599 
00600 #endif

Generated on Sat Oct 13 17:34:42 2007 for R-Tree by  doxygen 1.5.2