containers/berkeley_db_benchmark.cpp

This is a benchmark mentioned in the paper R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets" Software: Practice and Experience Volume 38, Issue 6, Pages 589-637, May 2008 DOI: 10.1002/spe.844

00001 /***************************************************************************
00002  *  containers/berkeley_db_benchmark.cpp
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de>
00007  *
00008  *  Distributed under the Boost Software License, Version 1.0.
00009  *  (See accompanying file LICENSE_1_0.txt or copy at
00010  *  http://www.boost.org/LICENSE_1_0.txt)
00011  **************************************************************************/
00012 
00019 
00020 
00021 #include <stxxl/vector>
00022 #include <stxxl/map>
00023 #include <stxxl/timer>
00024 #include <stxxl/stream>
00025 
00026 
00028 #include <db_cxx.h>
00029 
00031 #include "app_config.h"
00032 #include <portability.h>
00033 #include <ami_btree.h>
00035 
00036 #define BDB_BULK_SCAN
00037 
00038 #define KEY_SIZE                8
00039 #define DATA_SIZE               32
00040 
00041 #define NODE_BLOCK_SIZE         (32 * 1024)
00042 #define LEAF_BLOCK_SIZE         (32 * 1024)
00043 
00044 
00045 #define LEAF_BLOCK_SIZE         (32 * 1024)
00046 
00047 #define TOTAL_CACHE_SIZE        (750 * 1024 * 1024)
00048 //#define TOTAL_CACHE_SIZE      (150 * 1024 * 1024)
00049 
00050 //#define NODE_CACHE_SIZE       (1 * (TOTAL_CACHE_SIZE / 40))
00051 //#define LEAF_CACHE_SIZE       (39 * (TOTAL_CACHE_SIZE / 40))
00052 
00053 #define NODE_CACHE_SIZE         (64 * 1024 * 1024)
00054 #define LEAF_CACHE_SIZE         (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
00055 
00056 #define SORTER_MEM              (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
00057 
00058 
00059 #define SCAN_LIMIT(x)   (x)
00060 
00061 //#define BDB_FILE "/data3/bdb_file"
00062 #define BDB_FILE "/var/tmp/bdb_file"
00063 
00064 
00065 // BDB settings
00066 u_int32_t pagesize = LEAF_BLOCK_SIZE;
00067 u_int bulkbufsize = 4 * 1024 * 1024;
00068 u_int logbufsize = 8 * 1024 * 1024;
00069 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
00070 u_int datasize = DATA_SIZE;
00071 u_int keysize = KEY_SIZE;
00072 u_int numitems = 0;
00073 
00074 const char * letters = "abcdefghijklmnopqrstuvwxuz";
00075 
00076 struct my_key
00077 {
00078     char keybuf[KEY_SIZE];
00079 };
00080 
00081 
00082 std::ostream & operator << (std::ostream & o, const my_key & obj)
00083 {
00084     for (int i = 0; i < KEY_SIZE; ++i)
00085         o << obj.keybuf[i];
00086 
00087     return o;
00088 }
00089 
00090 bool operator == (const my_key & a, const my_key & b)
00091 {
00092     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
00093 }
00094 
00095 bool operator != (const my_key & a, const my_key & b)
00096 {
00097     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
00098 }
00099 
00100 bool operator < (const my_key & a, const my_key & b)
00101 {
00102     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00103 }
00104 
00105 bool operator > (const my_key & a, const my_key & b)
00106 {
00107     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
00108 }
00109 
00110 bool operator <= (const my_key & a, const my_key & b)
00111 {
00112     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
00113 }
00114 
00115 bool operator >= (const my_key & a, const my_key & b)
00116 {
00117     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
00118 }
00119 
00120 
00121 struct my_data
00122 {
00123     char databuf[DATA_SIZE];
00124 };
00125 
00126 std::ostream & operator << (std::ostream & o, const my_data & obj)
00127 {
00128     o << "DATA(size=" << sizeof(obj) << ") ";
00129 
00130     return o;
00131 }
00132 
00133 my_key min_key, max_key;
00134 
00135 struct comp_type : std::binary_function<my_key, my_key, bool>
00136 {
00137     bool operator () (const my_key & a, const my_key & b) const
00138     {
00139         return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
00140     }
00141     static my_key max_value()
00142     {
00143         return max_key;
00144     }
00145     static my_key min_value()
00146     {
00147         return min_key;
00148     }
00149 };
00150 
00151 
00153 // Key type.
00154 typedef my_key bkey_t;
00155 
00156 // Element type for the btree.
00157 struct el_t {
00158     bkey_t key_;
00159     my_data data_;
00160     el_t(bkey_t k) : key_(k) { }
00161     el_t() { }
00162 };
00163 struct key_from_el {
00164     bkey_t operator () (const el_t & v) const
00165     {
00166         return v.key_;
00167     }
00168 };
00169 
00170 
00171 // Temporary distinction btw UN*X and WIN, since there are some
00172 // problems with the MMAP collection implementation.
00173 #ifdef _WIN32
00174 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
00175 #else
00176 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
00177 #endif
00178 
00179 
00180 void init()
00181 {
00182     memset(max_key.keybuf, (std::numeric_limits<char>::max)(), KEY_SIZE);
00183     memset(min_key.keybuf, (std::numeric_limits<char>::min)(), KEY_SIZE);
00184 }
00185 
00186 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
00187 
00188 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
00189 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
00190 #define REAL_NODE_MELEMENTS map_type::node_block_type::size
00191 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
00192 
00193 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
00194 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024>  vector_type;
00195 
00196 
00197 //#define KEYPOS        (i % KEY_SIZE)
00198 //#define VALUE         (myrand() % 26)
00199 
00200 
00201 #if 0
00202 unsigned ran32State = 0xdeadbeef;
00203 inline unsigned myrand()
00204 {
00205     return (ran32State = 1664525 * ran32State + 1013904223);
00206 }
00207 inline void rand_key(stxxl::int64 pos, my_key & Key)
00208 {
00209     for (int i = 0; i < KEY_SIZE; ++i)
00210         Key.keybuf[i] = letters[myrand() % 26];
00211 }
00212 #else // a longer pseudo random sequence
00213 long long unsigned ran32State = 0xdeadbeef;
00214 inline long long unsigned myrand()
00215 {
00216     return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
00217 }
00218 inline void rand_key(stxxl::int64 /*pos*/, my_key & Key)
00219 {
00220     long long unsigned r = myrand();
00221     for (int i = 0; i < KEY_SIZE; ++i)
00222     {
00223         Key.keybuf[i] = letters[r % 26];
00224         r >>= 5;
00225     }
00226 }
00227 #endif
00228 
00229 void run_bdb_btree(stxxl::int64 ops)
00230 {
00231     const char * filename = BDB_FILE;
00232 
00233     my_key key1_storage;
00234     my_data data1_storage;
00235 
00236     unlink(filename);
00237 
00238     memset(key1_storage.keybuf, 'a', KEY_SIZE);
00239     memset(data1_storage.databuf, 'b', DATA_SIZE);
00240 
00241 
00242     Db db(NULL, 0);             // Instantiate the Db object
00243 
00244     try {
00245         db.set_errfile(stderr);
00246         db.set_pagesize(pagesize);
00247         db.set_cachesize(0, cachesize, 1);
00248 
00249         // Open the database
00250         db.open(NULL,           // Transaction pointer
00251                 filename,       // Database file name
00252                 NULL,           // Optional logical database name
00253                 DB_BTREE,       // Database access method
00254                 DB_CREATE,      // Open flags
00255                 0);             // File mode (using defaults)
00256 
00257 
00258         // here we start with the tests
00259         Dbt key1(key1_storage.keybuf, KEY_SIZE);
00260         Dbt data1(data1_storage.databuf, DATA_SIZE);
00261 
00262         stxxl::timer Timer;
00263         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00264         stxxl::int64 i;
00265         //comp_type cmp_;
00266 
00267         ran32State = 0xdeadbeef;
00268 
00269 
00270         DB_BTREE_STAT * dbstat;
00271 
00272         db.stat(NULL, &dbstat, 0);
00273         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00274 
00275         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
00276 
00277         Timer.start();
00278 
00279         for (i = 0; i < n_inserts; ++i)
00280         {
00281             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00282             rand_key(i, key1_storage);
00283             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
00284         }
00285 
00286         Timer.stop();
00287         db.stat(NULL, &dbstat, 0);
00288         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00289         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00290                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00291 
00292         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00293 
00295         Timer.reset();
00296         Timer.start();
00297 
00298 
00299         Dbc * cursorp;
00300         db.cursor(NULL, &cursorp, 0);
00301 
00302         for (i = 0; i < n_locates; ++i)
00303         {
00304             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00305             rand_key(i, key1_storage);
00306             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00307             Dbt datax(data1_storage.databuf, DATA_SIZE);
00308 
00309             cursorp->get(&keyx, &datax, DB_SET_RANGE);
00310         }
00311 
00312         Timer.stop();
00313         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00314                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00315 
00316         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00317 
00319         Timer.reset();
00320 
00321         Timer.start();
00322 
00323         stxxl::int64 n_scanned = 0;
00324 
00325         for (i = 0; i < n_range_queries; ++i)
00326         {
00327             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00328             rand_key(i, key1_storage);
00329             my_key last_key = key1_storage;
00330             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00331             rand_key(i, key1_storage);
00332             if (last_key < key1_storage)
00333                 std::swap(last_key, key1_storage);
00334 
00335 
00336             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00337             Dbt datax(data1_storage.databuf, DATA_SIZE);
00338 
00339             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
00340                 continue;
00341 
00342 
00343             while (*((my_key *)keyx.get_data()) <= last_key)
00344             {
00345                 ++n_scanned;
00346                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
00347                     break;
00348             }
00349 
00350             if (n_scanned >= 10 * n_range_queries)
00351             {
00352                 ++i;
00353                 break;
00354             }
00355         }
00356 
00357         n_range_queries = i;
00358 
00359         Timer.stop();
00360         if (cursorp != NULL)
00361             cursorp->close();
00362 
00363 
00364         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00365                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00366                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00367 
00368         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00369 
00371 
00372         ran32State = 0xdeadbeef;
00373         memset(key1_storage.keybuf, 'a', KEY_SIZE);
00374 
00375         Timer.reset();
00376         Timer.start();
00377 
00378         for (i = 0; i < n_deletes; ++i)
00379         {
00380             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
00381             rand_key(i, key1_storage);
00382             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
00383             db.del(NULL, &keyx, 0);
00384         }
00385 
00386         Timer.stop();
00387         db.stat(NULL, &dbstat, 0);
00388         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
00389         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00390                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00391 
00392         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
00393 
00394         db.close(0);
00395     }
00396     catch (DbException & e)
00397     {
00398         STXXL_ERRMSG("DbException happened");
00399     }
00400     catch (std::exception & e)
00401     {
00402         STXXL_ERRMSG("std::exception happened");
00403     }
00404 
00405     unlink(filename);
00406 }
00407 
00408 void run_stxxl_map(stxxl::int64 ops)
00409 {
00410     map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
00411     Map.enable_prefetching();
00412     stxxl::stats * Stats = stxxl::stats::get_instance();
00413 
00414     std::pair<my_key, my_data> element;
00415 
00416     memset(element.first.keybuf, 'a', KEY_SIZE);
00417     memset(element.second.databuf, 'b', DATA_SIZE);
00418 
00419 
00420     stxxl::timer Timer;
00421     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00422     stxxl::int64 i;
00423     //comp_type cmp_;
00424 
00425     ran32State = 0xdeadbeef;
00426 
00427     //stxxl::random_number32 myrand;
00428 
00429     STXXL_MSG("Records in map: " << Map.size());
00430 
00431     Stats->reset();
00432 
00433     Timer.start();
00434 
00435     for (i = 0; i < n_inserts; ++i)
00436     {
00437         //element.first.keybuf[KEYPOS] = letters[VALUE];
00438         rand_key(i, element.first);
00439         Map.insert(element);
00440     }
00441 
00442     Timer.stop();
00443 
00444     STXXL_MSG("Records in map: " << Map.size());
00445     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00446               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00447 
00448     std::cout << *Stats;
00449     Stats->reset();
00450 
00452     Timer.reset();
00453 
00454     const map_type & CMap(Map);     // const map reference
00455 
00456     Timer.start();
00457 
00458     for (i = 0; i < n_locates; ++i)
00459     {
00460         //element.first.keybuf[KEYPOS] = letters[VALUE];
00461         rand_key(i, element.first);
00462         CMap.lower_bound(element.first);
00463     }
00464 
00465     Timer.stop();
00466     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00467               " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00468 
00469     std::cout << *Stats;
00470     Stats->reset();
00471 
00473     Timer.reset();
00474 
00475     Timer.start();
00476 
00477     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00478 
00479     for (i = 0; i < n_range_queries; ++i)
00480     {
00481         //element.first.keybuf[KEYPOS] = letters[VALUE];
00482         rand_key(i, element.first);
00483         my_key begin_key = element.first;
00484         map_type::const_iterator begin = CMap.lower_bound(element.first);
00485         //element.first.keybuf[KEYPOS] = letters[VALUE];
00486         rand_key(i, element.first);
00487         map_type::const_iterator beyond = CMap.lower_bound(element.first);
00488         if (element.first < begin_key)
00489             std::swap(begin, beyond);
00490 
00491         while (begin != beyond)
00492         {
00493             my_data tmp = begin->second;
00494             ++n_scanned;
00495             ++begin;
00496         }
00497         if (n_scanned >= 10 * n_range_queries)
00498         {
00499             ++i;
00500             break;
00501         }
00502     }
00503 
00504     n_range_queries = i;
00505 
00506     Timer.stop();
00507     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00508               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00509               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00510 
00511     std::cout << *Stats;
00512     Stats->reset();
00513 
00515     ran32State = 0xdeadbeef;
00516     memset(element.first.keybuf, 'a', KEY_SIZE);
00517     memset(element.second.databuf, 'b', DATA_SIZE);
00518 
00519     Timer.reset();
00520     Timer.start();
00521 
00522     for (i = 0; i < n_deletes; ++i)
00523     {
00524         //element.first.keybuf[KEYPOS] = letters[VALUE];
00525         rand_key(i, element.first);
00526         Map.erase(element.first);
00527     }
00528 
00529     Timer.stop();
00530     STXXL_MSG("Records in map: " << Map.size());
00531     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00532               " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00533 
00534     std::cout << *Stats;
00535     Stats->reset();
00536 }
00537 
00538 class rand_key_gen
00539 {
00540     stxxl::int64 counter;
00541     my_key & current;
00542     stxxl::random_number32 myrand;
00543     rand_key_gen();
00544 
00545 public:
00546     typedef my_key value_type;
00547 
00548     rand_key_gen(stxxl::int64 el, my_key & cur) :
00549         counter(el), current(cur)
00550     {
00551         //const stxxl::int64  & i = counter;
00552         //current.keybuf[KEYPOS] = letters[VALUE];
00553         rand_key(counter, current);
00554     }
00555     const my_key & operator * () { return current; }
00556     const my_key * operator -> () { return &current; }
00557 
00558     rand_key_gen & operator ++ ()
00559     {
00560         --counter;
00561         //const stxxl::int64  & i = counter;
00562         //current.keybuf[KEYPOS] = letters[VALUE];
00563         rand_key(counter, current);
00564         return *this;
00565     }
00566     bool empty() const { return counter == 0; }
00567 };
00568 
00569 template <class InputType>
00570 class key2pair
00571 {
00572     InputType & in;
00573     std::pair<my_key, my_data> current;
00574     key2pair();
00575 
00576 public:
00577     typedef std::pair<my_key, my_data> value_type;
00578 
00579     key2pair(InputType & in_) : in(in_)
00580     {
00581         if (!in.empty())
00582             current.first = *in;
00583     }
00584     const value_type & operator * () { return current; }
00585     const value_type * operator -> () { return &current; }
00586 
00587     key2pair & operator ++ ()
00588     {
00589         ++in;
00590         if (!empty())
00591             current.first = *in;
00592 
00593 
00594         return *this;
00595     }
00596     bool empty() const { return in.empty(); }
00597 };
00598 
00599 void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
00600 {
00601     stxxl::stats * Stats = stxxl::stats::get_instance();
00602 
00603     std::pair<my_key, my_data> element;
00604 
00605     memset(element.first.keybuf, 'a', KEY_SIZE);
00606     memset(element.second.databuf, 'b', DATA_SIZE);
00607 
00608     stxxl::timer Timer;
00609     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00610     stxxl::int64 i;
00611     //comp_type cmp_;
00612 
00613     ran32State = 0xdeadbeef;
00614 
00615     //stxxl::random_number32 myrand;
00616 
00617     Timer.start();
00618 
00619     vector_type SortedSeq(n);
00620     const vector_type & CSortedSeq(SortedSeq);
00621     {
00622         rand_key_gen Gen(n, element.first);
00623         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00624         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00625         typedef key2pair<sorter_type> key2pair_type;
00626         key2pair_type Key2Pair(Sorter);
00627         stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
00628     }
00629 
00630 
00631     Timer.stop();
00632     Stats->reset();
00633 
00634     STXXL_MSG("Finished sorting input. Elapsed time: " <<
00635               (Timer.mseconds() / 1000.) << " seconds.");
00636 
00637     Timer.reset();
00638     Timer.start();
00639 
00640     // bulk construction
00641     map_type Map(CSortedSeq.begin(),
00642                  CSortedSeq.end(),
00643                  NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
00644 
00645     Timer.stop();
00646 
00647 
00648     STXXL_MSG("Records in map: " << Map.size());
00649     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00650               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00651 
00652     using std::cout;
00653     Map.print_statistics(cout);
00654     Map.reset_statistics();
00655     std::cout << *Stats;
00656     Stats->reset();
00658     Timer.reset();
00659 
00660 
00661     Map.disable_prefetching();
00662 
00663     Timer.start();
00664 
00665     for (i = 0; i < n_inserts; ++i)
00666     {
00667         //element.first.keybuf[KEYPOS] = letters[VALUE];
00668         rand_key(i, element.first);
00669         Map.insert(element);
00670     }
00671 
00672     Timer.stop();
00673 
00674     STXXL_MSG("Records in map: " << Map.size());
00675     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00676               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00677 
00678     Map.print_statistics(cout);
00679     Map.reset_statistics();
00680 
00681     std::cout << *Stats;
00682     Stats->reset();
00683 
00685 
00686 
00688     Timer.reset();
00689 
00690 
00691     const map_type & CMap(Map);     // const map reference
00692 
00693     Timer.start();
00694 
00695     for (i = 0; i < n_locates; ++i)
00696     {
00697         //element.first.keybuf[KEYPOS] = letters[VALUE];
00698         rand_key(i, element.first);
00699         CMap.lower_bound(element.first);
00700     }
00701 
00702     Timer.stop();
00703     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00704               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00705 
00706     Map.print_statistics(cout);
00707     Map.reset_statistics();
00708     std::cout << *Stats;
00709     Stats->reset();
00710 
00712     Timer.reset();
00713 
00714     Map.enable_prefetching();
00715 
00716     Timer.start();
00717 
00718     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00719 
00720     for (i = 0; i < n_range_queries; ++i)
00721     {
00722         //element.first.keybuf[KEYPOS] = letters[VALUE];
00723         rand_key(i, element.first);
00724         my_key begin_key = element.first;
00725         map_type::const_iterator begin = CMap.lower_bound(element.first);
00726         //element.first.keybuf[KEYPOS] = letters[VALUE];
00727         rand_key(i, element.first);
00728         map_type::const_iterator beyond = CMap.lower_bound(element.first);
00729         if (element.first < begin_key)
00730             std::swap(begin, beyond);
00731 
00732         /*
00733            STXXL_MSG("Looking     "<<element.first<<" scanned: "<<n_scanned);
00734 
00735            if(beyond==CMap.end())
00736            {
00737                 STXXL_MSG("Upper bound "<<"END");
00738            }
00739            else
00740            {
00741                 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key));
00742            }*/
00743 
00744         while (begin != beyond)
00745         {
00746             ++n_scanned;
00747             ++begin;
00748         }
00749 
00750         if (n_scanned >= SCAN_LIMIT(n))
00751         {
00752             ++i;
00753             break;
00754         }
00755     }
00756 
00757     n_range_queries = i;
00758 
00759     Timer.stop();
00760     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00761               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00762               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00763 
00764     Map.print_statistics(cout);
00765     Map.reset_statistics();
00766 
00767     std::cout << *Stats;
00768     Stats->reset();
00769 
00771     ran32State = 0xdeadbeef;
00772     memset(element.first.keybuf, 'a', KEY_SIZE);
00773     memset(element.second.databuf, 'b', DATA_SIZE);
00774 
00775     Timer.reset();
00776     Map.disable_prefetching();
00777 
00778     Timer.start();
00779 
00780     for (i = n_deletes; i > 0; --i)
00781     {
00782         //element.first.keybuf[KEYPOS] = letters[VALUE];
00783         rand_key(i, element.first);
00784         Map.erase(element.first);
00785     }
00786 
00787     Timer.stop();
00788     STXXL_MSG("Records in map: " << Map.size());
00789     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00790               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00791 
00792     Map.print_statistics(cout);
00793     Map.reset_statistics();
00794     std::cout << *Stats;
00795     Stats->reset();
00796 }
00797 
00799 
00800 typedef AMI_STREAM<el_t> stream_t;
00801 
00802 char dummy;
00803 
00804 class MyFilter {
00805 public:
00806     bool operator () (const el_t & v) const
00807     {
00808         dummy += v.key_.keybuf[0];         // touch element
00809         return true;
00810     }
00811 };
00812 
00813 
00814 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
00815 {
00816     el_t element;
00817 
00818     memset(element.key_.keybuf, 'a', KEY_SIZE);
00819     memset(element.data_.databuf, 'b', DATA_SIZE);
00820 
00821     // Log debugging info from the application, but not from the library.
00822     tpie_log_init(TPIE_LOG_APP_DEBUG);
00823     MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
00824     MM_manager.enforce_memory_limit();
00825 
00826     stream_t * is = new stream_t;
00827 
00828     stxxl::timer Timer;
00829     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
00830     stxxl::int64 i;
00831     //comp_type cmp_;
00832 
00833     ran32State = 0xdeadbeef;
00834 
00835     //stxxl::random_number32 myrand;
00836 
00837     Timer.start();
00838 
00839 
00840     {
00841         rand_key_gen Gen(n, element.key_);
00842         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
00843         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
00844         //typedef key2pair<sorter_type> key2pair_type;
00845         //key2pair_type Key2Pair(Sorter);
00846         while (!Sorter.empty())
00847         {
00848             is->write_item(*Sorter);
00849             ++Sorter;
00850         }
00851     }
00852 
00853     Timer.stop();
00854 
00855 
00856     STXXL_MSG("Finished sorting input. Elapsed time: " <<
00857               (Timer.mseconds() / 1000.) << " seconds.");
00858 
00859 
00860     Timer.reset();
00861     Timer.start();
00862 
00863     // bulk construction
00864     u_btree_t * u_btree;
00865     AMI_btree_params params;
00866     params.node_block_factor = NODE_BLOCK_SIZE / 4096;
00867     params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
00868     params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
00869     params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
00870 
00871     u_btree = new u_btree_t(params);
00872 
00873     using std::cout;
00874     using std::cerr;
00875 
00876     if (!u_btree->is_valid()) {
00877         cerr << "Error initializing btree. Aborting.\n";
00878         delete u_btree;
00879         exit(1);
00880     }
00881 
00882     if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
00883         cerr << "Error during bulk loading.\n";
00884 
00885 
00886     Timer.stop();
00887 
00888     STXXL_MSG("Records in map: " << u_btree->size());
00889     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
00890               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00891 
00892 
00894     Timer.reset();
00895 
00896 
00897     Timer.start();
00898 
00899     for (i = 0; i < n_inserts; ++i)
00900     {
00901         //element.first.keybuf[KEYPOS] = letters[VALUE];
00902         rand_key(i, element.key_);
00903         u_btree->insert(element);
00904     }
00905 
00906     Timer.stop();
00907 
00908     STXXL_MSG("Records in map: " << u_btree->size());
00909     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
00910               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00911 
00912 
00914     Timer.reset();
00915 
00916 
00917     Timer.start();
00918 
00919 
00920     el_t result;
00921     for (i = 0; i < n_locates; ++i)
00922     {
00923         //element.first.keybuf[KEYPOS] = letters[VALUE];
00924         rand_key(i, element.key_);
00925         u_btree->succ(element.key_, result);
00926     }
00927 
00928     Timer.stop();
00929     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
00930               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00931 
00932 
00934     Timer.reset();
00935 
00936 
00937     Timer.start();
00938 
00939     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
00940     MyFilter filter;
00941 
00942     for (i = 0; i < n_range_queries; ++i)
00943     {
00944         rand_key(i, element.key_);
00945         my_key begin_key = element.key_;
00946         rand_key(i, element.key_);
00947         if (element.key_ < begin_key)
00948             n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
00949 
00950         else
00951             n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
00952 
00953 
00954         if (n_scanned >= SCAN_LIMIT(n))
00955         {
00956             ++i;
00957             break;
00958         }
00959     }
00960 
00961     n_range_queries = i;
00962 
00963     Timer.stop();
00964     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
00965               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
00966               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
00967 
00968 
00970     ran32State = 0xdeadbeef;
00971     memset(element.key_.keybuf, 'a', KEY_SIZE);
00972     memset(element.data_.databuf, 'b', DATA_SIZE);
00973 
00974     Timer.reset();
00975 
00976     Timer.start();
00977 
00978     for (i = n_deletes; i > 0; --i)
00979     {
00980         //element.first.keybuf[KEYPOS] = letters[VALUE];
00981         rand_key(i, element.key_);
00982         u_btree->erase(element.key_);
00983     }
00984 
00985     Timer.stop();
00986     STXXL_MSG("Records in map: " << u_btree->size());
00987     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
00988               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
00989 }
00990 
00991 void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
00992 {
00993     const char * filename = BDB_FILE;
00994 
00995     my_key key1_storage;
00996     my_data data1_storage;
00997 
00998 #ifdef BDB_BULK_SCAN
00999     int * bulk_buffer = new int[logbufsize / sizeof(int)];
01000 #endif
01001 
01002     unlink(filename);
01003 
01004     memset(key1_storage.keybuf, 'a', KEY_SIZE);
01005     memset(data1_storage.databuf, 'b', DATA_SIZE);
01006 
01007 
01008     Db db(NULL, 0);                   // Instantiate the Db object
01009 
01010     try {
01011         // here we start with the tests
01012         Dbt key1(key1_storage.keybuf, KEY_SIZE);
01013         Dbt data1(data1_storage.databuf, DATA_SIZE);
01014 
01015         stxxl::timer Timer;
01016         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
01017         stxxl::int64 i;
01018         //comp_type cmp_;
01019 
01020         ran32State = 0xdeadbeef;
01021 
01022         Timer.start();
01023 
01024         vector_type SortedSeq(n);
01025         //const vector_type & CSortedSeq(SortedSeq);
01026         {
01027             rand_key_gen Gen(n, key1_storage);
01028             typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
01029             sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
01030             typedef key2pair<sorter_type> key2pair_type;
01031             key2pair_type Key2Pair(Sorter);
01032             stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
01033         }
01034 
01035         Timer.stop();
01036 
01037         STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
01038 
01039         db.set_errfile(stderr);
01040         db.set_pagesize(pagesize);
01041         db.set_cachesize(0, cachesize, 10);
01042 
01043         STXXL_MSG("BDB cache size set.");
01044 
01045         // Open the database
01046         db.open(NULL,           // Transaction pointer
01047                 filename,       // Database file name
01048                 NULL,           // Optional logical database name
01049                 DB_BTREE,       // Database access method
01050                 DB_CREATE,      // Open flags
01051                 0);             // File mode (using defaults)
01052 
01053         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
01054 
01055         Timer.reset();
01056         Timer.start();
01057 
01058         // DBD does not have bulk construction
01059         // however inserting in sorted order might help
01060         // to improve performance
01061         vector_type::const_iterator cit = SortedSeq.begin();
01062         for (i = 0; i < n; ++i, ++cit)
01063         {
01064             key1_storage = cit->first;
01065             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01066         }
01067 
01068         Timer.stop();
01069 
01070         DB_BTREE_STAT * dbstat;
01071         db.stat(NULL, &dbstat, 0);
01072         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01073         STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
01074                   " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01075 
01076         db.stat_print(0);
01077         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01079 
01080 
01081         Timer.reset();
01082         Timer.start();
01083 
01084         for (i = 0; i < n_inserts; ++i)
01085         {
01086             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01087             rand_key(i, key1_storage);
01088             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
01089         }
01090 
01091         Timer.stop();
01092         db.stat(NULL, &dbstat, 0);
01093         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01094         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
01095                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01096 
01097         db.stat_print(0);
01098         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01099 
01101         Timer.reset();
01102         Timer.start();
01103 
01104 
01105         Dbc * cursorp;
01106         db.cursor(NULL, &cursorp, 0);
01107 
01108         for (i = 0; i < n_locates; ++i)
01109         {
01110             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01111             rand_key(i, key1_storage);
01112             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01113             Dbt datax(data1_storage.databuf, DATA_SIZE);
01114 
01115             cursorp->get(&keyx, &datax, DB_SET_RANGE);
01116         }
01117 
01118         Timer.stop();
01119         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
01120                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01121 
01122         db.stat_print(0);
01123         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01124 
01126         Timer.reset();
01127 
01128         Timer.start();
01129 
01130         stxxl::int64 n_scanned = 0;
01131 
01132         for (i = 0; i < n_range_queries; ++i)
01133         {
01134             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01135             rand_key(i, key1_storage);
01136             my_key last_key = key1_storage;
01137             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01138             rand_key(i, key1_storage);
01139             if (last_key < key1_storage)
01140                 std::swap(last_key, key1_storage);
01141 
01142 
01143             //STXXL_MSG("Looking     "<<key1_storage<<" scanned: "<<n_scanned);
01144             //STXXL_MSG("Upper bound "<<last_key);
01145 
01146             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01147 #ifdef BDB_BULK_SCAN
01148             Dbt datax(bulk_buffer, DATA_SIZE);
01149             datax.set_ulen(logbufsize);
01150             datax.set_flags(DB_DBT_USERMEM);
01151 #else
01152             Dbt datax(data1_storage.databuf, DATA_SIZE);
01153 #endif
01154 
01155 
01156 #ifdef BDB_BULK_SCAN
01157             if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01158                 continue;
01159 
01160 
01161             do
01162             {
01163                 DbMultipleKeyDataIterator BulkIterator(datax);
01164                 Dbt key1, data1;
01165                 while (BulkIterator.next(key1, data1) &&
01166                        *((my_key *)key1.get_data()) <= last_key)
01167                 {
01168                     ++n_scanned;
01169                     //STXXL_MSG("Result      "<<*((my_key *)key1.get_data()));
01170                 }
01171                 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
01172                     break;
01173 
01174 
01175                 if (*((my_key *)keyx.get_data()) > last_key)
01176                 {
01177                     break;
01178                 }
01179             } while (1);
01180 
01181 #else
01182             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
01183                 continue;
01184 
01185             while (*((my_key *)keyx.get_data()) <= last_key)
01186             {
01187                 ++n_scanned;
01188                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
01189                     break;
01190             }
01191 #endif
01192 
01193 
01194             if (n_scanned >= SCAN_LIMIT(n))
01195             {
01196                 ++i;
01197                 break;
01198             }
01199         }
01200 
01201         n_range_queries = i;
01202 
01203         Timer.stop();
01204         if (cursorp != NULL)
01205             cursorp->close();
01206 
01207 
01208         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
01209                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
01210                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
01211 
01212         db.stat_print(0);
01213         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01214 
01216 
01217         ran32State = 0xdeadbeef;
01218         memset(key1_storage.keybuf, 'a', KEY_SIZE);
01219 
01220         Timer.reset();
01221         Timer.start();
01222 
01223         for (i = 0; i < n_deletes; ++i)
01224         {
01225             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
01226             rand_key(i, key1_storage);
01227             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
01228             db.del(NULL, &keyx, 0);
01229         }
01230 
01231         Timer.stop();
01232         db.stat(NULL, &dbstat, 0);
01233         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
01234         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
01235                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
01236 
01237         db.stat_print(0);
01238         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
01239 
01240         db.close(0);
01241     }
01242     catch (DbException & e)
01243     {
01244         STXXL_ERRMSG("DbException happened");
01245     }
01246     catch (std::exception & e)
01247     {
01248         STXXL_ERRMSG("std::exception happened");
01249     }
01250 
01251     unlink(filename);
01252 
01253 #ifdef BDB_BULK_SCAN
01254     delete[]  bulk_buffer;
01255 #endif
01256 }
01257 
01258 
01259 int main(int argc, char * argv[])
01260 {
01261     STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
01262     STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
01263     STXXL_MSG("stxxl::map Node max elements   : " << REAL_NODE_MELEMENTS);
01264     STXXL_MSG("stxxl::map Leaf max elements   : " << REAL_LEAF_MELEMENTS);
01265 #ifdef STXXL_DIRECT_IO_OFF
01266     STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
01267 #else
01268     STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
01269 #endif
01270 
01271     if (argc < 3)
01272     {
01273         STXXL_MSG("Usage: " << argv[0] << " version #ops");
01274         STXXL_MSG("\t version = 1: test stxxl map");
01275         STXXL_MSG("\t version = 2: test Berkeley DB btree");
01276         STXXL_MSG("\t version = 3: big test stxxl map");
01277         STXXL_MSG("\t version = 4: big test Berkeley DB btree");
01278         STXXL_MSG("\t version = 5: big test TPIE btree");
01279         return -1;
01280     }
01281 
01282     init();
01283 
01284     int version = atoi(argv[1]);
01285     stxxl::int64 ops = stxxl::atoint64(argv[2]);
01286 
01287     STXXL_MSG("Running version      : " << version);
01288     STXXL_MSG("Operations to perform: " << ops);
01289     STXXL_MSG("Btree cache size     : " << TOTAL_CACHE_SIZE << " bytes");
01290     STXXL_MSG("Leaf block size      : " << LEAF_BLOCK_SIZE << " bytes");
01291 
01292 
01293     switch (version)
01294     {
01295     case 1:
01296         run_stxxl_map(ops);
01297         break;
01298     case 2:
01299         run_bdb_btree(ops);
01300         break;
01301     case 3:
01302         run_stxxl_map_big(ops, 100000);
01303         break;
01304     case 4:
01305         run_bdb_btree_big(ops, 100000);
01306         break;
01307     case 5:
01308         run_tpie_btree_big(ops, 100000);
01309         break;
01310     default:
01311         STXXL_MSG("Unsupported version " << version);
01312     }
01313 }

Generated on Thu Jun 4 10:30:00 2009 for Stxxl by  doxygen 1.4.7