00001 /*************************************************************************** 00002 * containers/berkeley_db_benchmark.cpp 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de> 00007 * 00008 * Distributed under the Boost Software License, Version 1.0. 00009 * (See accompanying file LICENSE_1_0.txt or copy at 00010 * http://www.boost.org/LICENSE_1_0.txt) 00011 **************************************************************************/ 00012 00019 00020 00021 #include <stxxl/vector> 00022 #include <stxxl/map> 00023 #include <stxxl/timer> 00024 #include <stxxl/stream> 00025 00026 00028 #include <db_cxx.h> 00029 00031 #include "app_config.h" 00032 #include <portability.h> 00033 #include <ami_btree.h> 00035 00036 #define BDB_BULK_SCAN 00037 00038 #define KEY_SIZE 8 00039 #define DATA_SIZE 32 00040 00041 #define NODE_BLOCK_SIZE (32 * 1024) 00042 #define LEAF_BLOCK_SIZE (32 * 1024) 00043 00044 00045 #define LEAF_BLOCK_SIZE (32 * 1024) 00046 00047 #define TOTAL_CACHE_SIZE (750 * 1024 * 1024) 00048 //#define TOTAL_CACHE_SIZE (150 * 1024 * 1024) 00049 00050 //#define NODE_CACHE_SIZE (1 * (TOTAL_CACHE_SIZE / 40)) 00051 //#define LEAF_CACHE_SIZE (39 * (TOTAL_CACHE_SIZE / 40)) 00052 00053 #define NODE_CACHE_SIZE (64 * 1024 * 1024) 00054 #define LEAF_CACHE_SIZE (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE) 00055 00056 #define SORTER_MEM (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4) 00057 00058 00059 #define SCAN_LIMIT(x) (x) 00060 00061 //#define BDB_FILE "/data3/bdb_file" 00062 #define BDB_FILE "/var/tmp/bdb_file" 00063 00064 00065 // BDB settings 00066 u_int32_t pagesize = LEAF_BLOCK_SIZE; 00067 u_int bulkbufsize = 4 * 1024 * 1024; 00068 u_int logbufsize = 8 * 1024 * 1024; 00069 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024); 00070 u_int datasize = DATA_SIZE; 00071 u_int keysize = KEY_SIZE; 00072 u_int numitems = 0; 00073 00074 const char * letters = "abcdefghijklmnopqrstuvwxuz"; 00075 00076 struct my_key 00077 { 00078 char keybuf[KEY_SIZE]; 00079 }; 00080 00081 00082 std::ostream & operator << (std::ostream & o, const my_key & obj) 00083 { 00084 for (int i = 0; i < KEY_SIZE; ++i) 00085 o << obj.keybuf[i]; 00086 00087 return o; 00088 } 00089 00090 bool operator == (const my_key & a, const my_key & b) 00091 { 00092 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0; 00093 } 00094 00095 bool operator != (const my_key & a, const my_key & b) 00096 { 00097 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0; 00098 } 00099 00100 bool operator < (const my_key & a, const my_key & b) 00101 { 00102 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0; 00103 } 00104 00105 bool operator > (const my_key & a, const my_key & b) 00106 { 00107 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0; 00108 } 00109 00110 bool operator <= (const my_key & a, const my_key & b) 00111 { 00112 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0; 00113 } 00114 00115 bool operator >= (const my_key & a, const my_key & b) 00116 { 00117 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0; 00118 } 00119 00120 00121 struct my_data 00122 { 00123 char databuf[DATA_SIZE]; 00124 }; 00125 00126 std::ostream & operator << (std::ostream & o, const my_data & obj) 00127 { 00128 o << "DATA(size=" << sizeof(obj) << ") "; 00129 00130 return o; 00131 } 00132 00133 my_key min_key, max_key; 00134 00135 struct comp_type : std::binary_function<my_key, my_key, bool> 00136 { 00137 bool operator () (const my_key & a, const my_key & b) const 00138 { 00139 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0; 00140 } 00141 static my_key max_value() 00142 { 00143 return max_key; 00144 } 00145 static my_key min_value() 00146 { 00147 return min_key; 00148 } 00149 }; 00150 00151 00153 // Key type. 00154 typedef my_key bkey_t; 00155 00156 // Element type for the btree. 00157 struct el_t { 00158 bkey_t key_; 00159 my_data data_; 00160 el_t(bkey_t k) : key_(k) { } 00161 el_t() { } 00162 }; 00163 struct key_from_el { 00164 bkey_t operator () (const el_t & v) const 00165 { 00166 return v.key_; 00167 } 00168 }; 00169 00170 00171 // Temporary distinction btw UN*X and WIN, since there are some 00172 // problems with the MMAP collection implementation. 00173 #ifdef _WIN32 00174 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t; 00175 #else 00176 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t; 00177 #endif 00178 00179 00180 void init() 00181 { 00182 memset(max_key.keybuf, (std::numeric_limits<char>::max)(), KEY_SIZE); 00183 memset(min_key.keybuf, (std::numeric_limits<char>::min)(), KEY_SIZE); 00184 } 00185 00186 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type; 00187 00188 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size 00189 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size 00190 #define REAL_NODE_MELEMENTS map_type::node_block_type::size 00191 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size 00192 00193 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type; 00194 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024> vector_type; 00195 00196 00197 //#define KEYPOS (i % KEY_SIZE) 00198 //#define VALUE (myrand() % 26) 00199 00200 00201 #if 0 00202 unsigned ran32State = 0xdeadbeef; 00203 inline unsigned myrand() 00204 { 00205 return (ran32State = 1664525 * ran32State + 1013904223); 00206 } 00207 inline void rand_key(stxxl::int64 pos, my_key & Key) 00208 { 00209 for (int i = 0; i < KEY_SIZE; ++i) 00210 Key.keybuf[i] = letters[myrand() % 26]; 00211 } 00212 #else // a longer pseudo random sequence 00213 long long unsigned ran32State = 0xdeadbeef; 00214 inline long long unsigned myrand() 00215 { 00216 return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL); 00217 } 00218 inline void rand_key(stxxl::int64 /*pos*/, my_key & Key) 00219 { 00220 long long unsigned r = myrand(); 00221 for (int i = 0; i < KEY_SIZE; ++i) 00222 { 00223 Key.keybuf[i] = letters[r % 26]; 00224 r >>= 5; 00225 } 00226 } 00227 #endif 00228 00229 void run_bdb_btree(stxxl::int64 ops) 00230 { 00231 const char * filename = BDB_FILE; 00232 00233 my_key key1_storage; 00234 my_data data1_storage; 00235 00236 unlink(filename); 00237 00238 memset(key1_storage.keybuf, 'a', KEY_SIZE); 00239 memset(data1_storage.databuf, 'b', DATA_SIZE); 00240 00241 00242 Db db(NULL, 0); // Instantiate the Db object 00243 00244 try { 00245 db.set_errfile(stderr); 00246 db.set_pagesize(pagesize); 00247 db.set_cachesize(0, cachesize, 1); 00248 00249 // Open the database 00250 db.open(NULL, // Transaction pointer 00251 filename, // Database file name 00252 NULL, // Optional logical database name 00253 DB_BTREE, // Database access method 00254 DB_CREATE, // Open flags 00255 0); // File mode (using defaults) 00256 00257 00258 // here we start with the tests 00259 Dbt key1(key1_storage.keybuf, KEY_SIZE); 00260 Dbt data1(data1_storage.databuf, DATA_SIZE); 00261 00262 stxxl::timer Timer; 00263 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00264 stxxl::int64 i; 00265 //comp_type cmp_; 00266 00267 ran32State = 0xdeadbeef; 00268 00269 00270 DB_BTREE_STAT * dbstat; 00271 00272 db.stat(NULL, &dbstat, 0); 00273 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00274 00275 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR); 00276 00277 Timer.start(); 00278 00279 for (i = 0; i < n_inserts; ++i) 00280 { 00281 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00282 rand_key(i, key1_storage); 00283 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 00284 } 00285 00286 Timer.stop(); 00287 db.stat(NULL, &dbstat, 0); 00288 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00289 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00290 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00291 00292 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00293 00295 Timer.reset(); 00296 Timer.start(); 00297 00298 00299 Dbc * cursorp; 00300 db.cursor(NULL, &cursorp, 0); 00301 00302 for (i = 0; i < n_locates; ++i) 00303 { 00304 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00305 rand_key(i, key1_storage); 00306 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00307 Dbt datax(data1_storage.databuf, DATA_SIZE); 00308 00309 cursorp->get(&keyx, &datax, DB_SET_RANGE); 00310 } 00311 00312 Timer.stop(); 00313 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00314 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00315 00316 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00317 00319 Timer.reset(); 00320 00321 Timer.start(); 00322 00323 stxxl::int64 n_scanned = 0; 00324 00325 for (i = 0; i < n_range_queries; ++i) 00326 { 00327 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00328 rand_key(i, key1_storage); 00329 my_key last_key = key1_storage; 00330 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00331 rand_key(i, key1_storage); 00332 if (last_key < key1_storage) 00333 std::swap(last_key, key1_storage); 00334 00335 00336 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00337 Dbt datax(data1_storage.databuf, DATA_SIZE); 00338 00339 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND) 00340 continue; 00341 00342 00343 while (*((my_key *)keyx.get_data()) <= last_key) 00344 { 00345 ++n_scanned; 00346 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND) 00347 break; 00348 } 00349 00350 if (n_scanned >= 10 * n_range_queries) 00351 { 00352 ++i; 00353 break; 00354 } 00355 } 00356 00357 n_range_queries = i; 00358 00359 Timer.stop(); 00360 if (cursorp != NULL) 00361 cursorp->close(); 00362 00363 00364 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00365 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00366 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00367 00368 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00369 00371 00372 ran32State = 0xdeadbeef; 00373 memset(key1_storage.keybuf, 'a', KEY_SIZE); 00374 00375 Timer.reset(); 00376 Timer.start(); 00377 00378 for (i = 0; i < n_deletes; ++i) 00379 { 00380 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 00381 rand_key(i, key1_storage); 00382 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 00383 db.del(NULL, &keyx, 0); 00384 } 00385 00386 Timer.stop(); 00387 db.stat(NULL, &dbstat, 0); 00388 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 00389 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00390 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00391 00392 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 00393 00394 db.close(0); 00395 } 00396 catch (DbException & e) 00397 { 00398 STXXL_ERRMSG("DbException happened"); 00399 } 00400 catch (std::exception & e) 00401 { 00402 STXXL_ERRMSG("std::exception happened"); 00403 } 00404 00405 unlink(filename); 00406 } 00407 00408 void run_stxxl_map(stxxl::int64 ops) 00409 { 00410 map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE); 00411 Map.enable_prefetching(); 00412 stxxl::stats * Stats = stxxl::stats::get_instance(); 00413 00414 std::pair<my_key, my_data> element; 00415 00416 memset(element.first.keybuf, 'a', KEY_SIZE); 00417 memset(element.second.databuf, 'b', DATA_SIZE); 00418 00419 00420 stxxl::timer Timer; 00421 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00422 stxxl::int64 i; 00423 //comp_type cmp_; 00424 00425 ran32State = 0xdeadbeef; 00426 00427 //stxxl::random_number32 myrand; 00428 00429 STXXL_MSG("Records in map: " << Map.size()); 00430 00431 Stats->reset(); 00432 00433 Timer.start(); 00434 00435 for (i = 0; i < n_inserts; ++i) 00436 { 00437 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00438 rand_key(i, element.first); 00439 Map.insert(element); 00440 } 00441 00442 Timer.stop(); 00443 00444 STXXL_MSG("Records in map: " << Map.size()); 00445 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00446 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00447 00448 std::cout << *Stats; 00449 Stats->reset(); 00450 00452 Timer.reset(); 00453 00454 const map_type & CMap(Map); // const map reference 00455 00456 Timer.start(); 00457 00458 for (i = 0; i < n_locates; ++i) 00459 { 00460 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00461 rand_key(i, element.first); 00462 CMap.lower_bound(element.first); 00463 } 00464 00465 Timer.stop(); 00466 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00467 " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00468 00469 std::cout << *Stats; 00470 Stats->reset(); 00471 00473 Timer.reset(); 00474 00475 Timer.start(); 00476 00477 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00478 00479 for (i = 0; i < n_range_queries; ++i) 00480 { 00481 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00482 rand_key(i, element.first); 00483 my_key begin_key = element.first; 00484 map_type::const_iterator begin = CMap.lower_bound(element.first); 00485 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00486 rand_key(i, element.first); 00487 map_type::const_iterator beyond = CMap.lower_bound(element.first); 00488 if (element.first < begin_key) 00489 std::swap(begin, beyond); 00490 00491 while (begin != beyond) 00492 { 00493 my_data tmp = begin->second; 00494 ++n_scanned; 00495 ++begin; 00496 } 00497 if (n_scanned >= 10 * n_range_queries) 00498 { 00499 ++i; 00500 break; 00501 } 00502 } 00503 00504 n_range_queries = i; 00505 00506 Timer.stop(); 00507 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00508 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00509 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00510 00511 std::cout << *Stats; 00512 Stats->reset(); 00513 00515 ran32State = 0xdeadbeef; 00516 memset(element.first.keybuf, 'a', KEY_SIZE); 00517 memset(element.second.databuf, 'b', DATA_SIZE); 00518 00519 Timer.reset(); 00520 Timer.start(); 00521 00522 for (i = 0; i < n_deletes; ++i) 00523 { 00524 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00525 rand_key(i, element.first); 00526 Map.erase(element.first); 00527 } 00528 00529 Timer.stop(); 00530 STXXL_MSG("Records in map: " << Map.size()); 00531 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00532 " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00533 00534 std::cout << *Stats; 00535 Stats->reset(); 00536 } 00537 00538 class rand_key_gen 00539 { 00540 stxxl::int64 counter; 00541 my_key & current; 00542 stxxl::random_number32 myrand; 00543 rand_key_gen(); 00544 00545 public: 00546 typedef my_key value_type; 00547 00548 rand_key_gen(stxxl::int64 el, my_key & cur) : 00549 counter(el), current(cur) 00550 { 00551 //const stxxl::int64 & i = counter; 00552 //current.keybuf[KEYPOS] = letters[VALUE]; 00553 rand_key(counter, current); 00554 } 00555 const my_key & operator * () { return current; } 00556 const my_key * operator -> () { return ¤t; } 00557 00558 rand_key_gen & operator ++ () 00559 { 00560 --counter; 00561 //const stxxl::int64 & i = counter; 00562 //current.keybuf[KEYPOS] = letters[VALUE]; 00563 rand_key(counter, current); 00564 return *this; 00565 } 00566 bool empty() const { return counter == 0; } 00567 }; 00568 00569 template <class InputType> 00570 class key2pair 00571 { 00572 InputType & in; 00573 std::pair<my_key, my_data> current; 00574 key2pair(); 00575 00576 public: 00577 typedef std::pair<my_key, my_data> value_type; 00578 00579 key2pair(InputType & in_) : in(in_) 00580 { 00581 if (!in.empty()) 00582 current.first = *in; 00583 } 00584 const value_type & operator * () { return current; } 00585 const value_type * operator -> () { return ¤t; } 00586 00587 key2pair & operator ++ () 00588 { 00589 ++in; 00590 if (!empty()) 00591 current.first = *in; 00592 00593 00594 return *this; 00595 } 00596 bool empty() const { return in.empty(); } 00597 }; 00598 00599 void run_stxxl_map_big(stxxl::int64 n, unsigned ops) 00600 { 00601 stxxl::stats * Stats = stxxl::stats::get_instance(); 00602 00603 std::pair<my_key, my_data> element; 00604 00605 memset(element.first.keybuf, 'a', KEY_SIZE); 00606 memset(element.second.databuf, 'b', DATA_SIZE); 00607 00608 stxxl::timer Timer; 00609 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00610 stxxl::int64 i; 00611 //comp_type cmp_; 00612 00613 ran32State = 0xdeadbeef; 00614 00615 //stxxl::random_number32 myrand; 00616 00617 Timer.start(); 00618 00619 vector_type SortedSeq(n); 00620 const vector_type & CSortedSeq(SortedSeq); 00621 { 00622 rand_key_gen Gen(n, element.first); 00623 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 00624 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 00625 typedef key2pair<sorter_type> key2pair_type; 00626 key2pair_type Key2Pair(Sorter); 00627 stxxl::stream::materialize(Key2Pair, SortedSeq.begin()); 00628 } 00629 00630 00631 Timer.stop(); 00632 Stats->reset(); 00633 00634 STXXL_MSG("Finished sorting input. Elapsed time: " << 00635 (Timer.mseconds() / 1000.) << " seconds."); 00636 00637 Timer.reset(); 00638 Timer.start(); 00639 00640 // bulk construction 00641 map_type Map(CSortedSeq.begin(), 00642 CSortedSeq.end(), 00643 NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true); 00644 00645 Timer.stop(); 00646 00647 00648 STXXL_MSG("Records in map: " << Map.size()); 00649 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 00650 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00651 00652 using std::cout; 00653 Map.print_statistics(cout); 00654 Map.reset_statistics(); 00655 std::cout << *Stats; 00656 Stats->reset(); 00658 Timer.reset(); 00659 00660 00661 Map.disable_prefetching(); 00662 00663 Timer.start(); 00664 00665 for (i = 0; i < n_inserts; ++i) 00666 { 00667 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00668 rand_key(i, element.first); 00669 Map.insert(element); 00670 } 00671 00672 Timer.stop(); 00673 00674 STXXL_MSG("Records in map: " << Map.size()); 00675 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00676 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00677 00678 Map.print_statistics(cout); 00679 Map.reset_statistics(); 00680 00681 std::cout << *Stats; 00682 Stats->reset(); 00683 00685 00686 00688 Timer.reset(); 00689 00690 00691 const map_type & CMap(Map); // const map reference 00692 00693 Timer.start(); 00694 00695 for (i = 0; i < n_locates; ++i) 00696 { 00697 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00698 rand_key(i, element.first); 00699 CMap.lower_bound(element.first); 00700 } 00701 00702 Timer.stop(); 00703 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00704 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00705 00706 Map.print_statistics(cout); 00707 Map.reset_statistics(); 00708 std::cout << *Stats; 00709 Stats->reset(); 00710 00712 Timer.reset(); 00713 00714 Map.enable_prefetching(); 00715 00716 Timer.start(); 00717 00718 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00719 00720 for (i = 0; i < n_range_queries; ++i) 00721 { 00722 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00723 rand_key(i, element.first); 00724 my_key begin_key = element.first; 00725 map_type::const_iterator begin = CMap.lower_bound(element.first); 00726 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00727 rand_key(i, element.first); 00728 map_type::const_iterator beyond = CMap.lower_bound(element.first); 00729 if (element.first < begin_key) 00730 std::swap(begin, beyond); 00731 00732 /* 00733 STXXL_MSG("Looking "<<element.first<<" scanned: "<<n_scanned); 00734 00735 if(beyond==CMap.end()) 00736 { 00737 STXXL_MSG("Upper bound "<<"END"); 00738 } 00739 else 00740 { 00741 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key)); 00742 }*/ 00743 00744 while (begin != beyond) 00745 { 00746 ++n_scanned; 00747 ++begin; 00748 } 00749 00750 if (n_scanned >= SCAN_LIMIT(n)) 00751 { 00752 ++i; 00753 break; 00754 } 00755 } 00756 00757 n_range_queries = i; 00758 00759 Timer.stop(); 00760 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00761 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00762 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00763 00764 Map.print_statistics(cout); 00765 Map.reset_statistics(); 00766 00767 std::cout << *Stats; 00768 Stats->reset(); 00769 00771 ran32State = 0xdeadbeef; 00772 memset(element.first.keybuf, 'a', KEY_SIZE); 00773 memset(element.second.databuf, 'b', DATA_SIZE); 00774 00775 Timer.reset(); 00776 Map.disable_prefetching(); 00777 00778 Timer.start(); 00779 00780 for (i = n_deletes; i > 0; --i) 00781 { 00782 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00783 rand_key(i, element.first); 00784 Map.erase(element.first); 00785 } 00786 00787 Timer.stop(); 00788 STXXL_MSG("Records in map: " << Map.size()); 00789 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00790 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00791 00792 Map.print_statistics(cout); 00793 Map.reset_statistics(); 00794 std::cout << *Stats; 00795 Stats->reset(); 00796 } 00797 00799 00800 typedef AMI_STREAM<el_t> stream_t; 00801 00802 char dummy; 00803 00804 class MyFilter { 00805 public: 00806 bool operator () (const el_t & v) const 00807 { 00808 dummy += v.key_.keybuf[0]; // touch element 00809 return true; 00810 } 00811 }; 00812 00813 00814 void run_tpie_btree_big(stxxl::int64 n, unsigned ops) 00815 { 00816 el_t element; 00817 00818 memset(element.key_.keybuf, 'a', KEY_SIZE); 00819 memset(element.data_.databuf, 'b', DATA_SIZE); 00820 00821 // Log debugging info from the application, but not from the library. 00822 tpie_log_init(TPIE_LOG_APP_DEBUG); 00823 MM_manager.set_memory_limit(TOTAL_CACHE_SIZE); 00824 MM_manager.enforce_memory_limit(); 00825 00826 stream_t * is = new stream_t; 00827 00828 stxxl::timer Timer; 00829 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 00830 stxxl::int64 i; 00831 //comp_type cmp_; 00832 00833 ran32State = 0xdeadbeef; 00834 00835 //stxxl::random_number32 myrand; 00836 00837 Timer.start(); 00838 00839 00840 { 00841 rand_key_gen Gen(n, element.key_); 00842 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 00843 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 00844 //typedef key2pair<sorter_type> key2pair_type; 00845 //key2pair_type Key2Pair(Sorter); 00846 while (!Sorter.empty()) 00847 { 00848 is->write_item(*Sorter); 00849 ++Sorter; 00850 } 00851 } 00852 00853 Timer.stop(); 00854 00855 00856 STXXL_MSG("Finished sorting input. Elapsed time: " << 00857 (Timer.mseconds() / 1000.) << " seconds."); 00858 00859 00860 Timer.reset(); 00861 Timer.start(); 00862 00863 // bulk construction 00864 u_btree_t * u_btree; 00865 AMI_btree_params params; 00866 params.node_block_factor = NODE_BLOCK_SIZE / 4096; 00867 params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096; 00868 params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE; 00869 params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE; 00870 00871 u_btree = new u_btree_t(params); 00872 00873 using std::cout; 00874 using std::cerr; 00875 00876 if (!u_btree->is_valid()) { 00877 cerr << "Error initializing btree. Aborting.\n"; 00878 delete u_btree; 00879 exit(1); 00880 } 00881 00882 if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR) 00883 cerr << "Error during bulk loading.\n"; 00884 00885 00886 Timer.stop(); 00887 00888 STXXL_MSG("Records in map: " << u_btree->size()); 00889 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 00890 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00891 00892 00894 Timer.reset(); 00895 00896 00897 Timer.start(); 00898 00899 for (i = 0; i < n_inserts; ++i) 00900 { 00901 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00902 rand_key(i, element.key_); 00903 u_btree->insert(element); 00904 } 00905 00906 Timer.stop(); 00907 00908 STXXL_MSG("Records in map: " << u_btree->size()); 00909 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 00910 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00911 00912 00914 Timer.reset(); 00915 00916 00917 Timer.start(); 00918 00919 00920 el_t result; 00921 for (i = 0; i < n_locates; ++i) 00922 { 00923 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00924 rand_key(i, element.key_); 00925 u_btree->succ(element.key_, result); 00926 } 00927 00928 Timer.stop(); 00929 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 00930 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00931 00932 00934 Timer.reset(); 00935 00936 00937 Timer.start(); 00938 00939 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0; 00940 MyFilter filter; 00941 00942 for (i = 0; i < n_range_queries; ++i) 00943 { 00944 rand_key(i, element.key_); 00945 my_key begin_key = element.key_; 00946 rand_key(i, element.key_); 00947 if (element.key_ < begin_key) 00948 n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter); 00949 00950 else 00951 n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter); 00952 00953 00954 if (n_scanned >= SCAN_LIMIT(n)) 00955 { 00956 ++i; 00957 break; 00958 } 00959 } 00960 00961 n_range_queries = i; 00962 00963 Timer.stop(); 00964 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 00965 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 00966 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 00967 00968 00970 ran32State = 0xdeadbeef; 00971 memset(element.key_.keybuf, 'a', KEY_SIZE); 00972 memset(element.data_.databuf, 'b', DATA_SIZE); 00973 00974 Timer.reset(); 00975 00976 Timer.start(); 00977 00978 for (i = n_deletes; i > 0; --i) 00979 { 00980 //element.first.keybuf[KEYPOS] = letters[VALUE]; 00981 rand_key(i, element.key_); 00982 u_btree->erase(element.key_); 00983 } 00984 00985 Timer.stop(); 00986 STXXL_MSG("Records in map: " << u_btree->size()); 00987 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 00988 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 00989 } 00990 00991 void run_bdb_btree_big(stxxl::int64 n, unsigned ops) 00992 { 00993 const char * filename = BDB_FILE; 00994 00995 my_key key1_storage; 00996 my_data data1_storage; 00997 00998 #ifdef BDB_BULK_SCAN 00999 int * bulk_buffer = new int[logbufsize / sizeof(int)]; 01000 #endif 01001 01002 unlink(filename); 01003 01004 memset(key1_storage.keybuf, 'a', KEY_SIZE); 01005 memset(data1_storage.databuf, 'b', DATA_SIZE); 01006 01007 01008 Db db(NULL, 0); // Instantiate the Db object 01009 01010 try { 01011 // here we start with the tests 01012 Dbt key1(key1_storage.keybuf, KEY_SIZE); 01013 Dbt data1(data1_storage.databuf, DATA_SIZE); 01014 01015 stxxl::timer Timer; 01016 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops; 01017 stxxl::int64 i; 01018 //comp_type cmp_; 01019 01020 ran32State = 0xdeadbeef; 01021 01022 Timer.start(); 01023 01024 vector_type SortedSeq(n); 01025 //const vector_type & CSortedSeq(SortedSeq); 01026 { 01027 rand_key_gen Gen(n, key1_storage); 01028 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type; 01029 sorter_type Sorter(Gen, comp_type(), SORTER_MEM); 01030 typedef key2pair<sorter_type> key2pair_type; 01031 key2pair_type Key2Pair(Sorter); 01032 stxxl::stream::materialize(Key2Pair, SortedSeq.begin()); 01033 } 01034 01035 Timer.stop(); 01036 01037 STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds."); 01038 01039 db.set_errfile(stderr); 01040 db.set_pagesize(pagesize); 01041 db.set_cachesize(0, cachesize, 10); 01042 01043 STXXL_MSG("BDB cache size set."); 01044 01045 // Open the database 01046 db.open(NULL, // Transaction pointer 01047 filename, // Database file name 01048 NULL, // Optional logical database name 01049 DB_BTREE, // Database access method 01050 DB_CREATE, // Open flags 01051 0); // File mode (using defaults) 01052 01053 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR); 01054 01055 Timer.reset(); 01056 Timer.start(); 01057 01058 // DBD does not have bulk construction 01059 // however inserting in sorted order might help 01060 // to improve performance 01061 vector_type::const_iterator cit = SortedSeq.begin(); 01062 for (i = 0; i < n; ++i, ++cit) 01063 { 01064 key1_storage = cit->first; 01065 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 01066 } 01067 01068 Timer.stop(); 01069 01070 DB_BTREE_STAT * dbstat; 01071 db.stat(NULL, &dbstat, 0); 01072 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01073 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) << 01074 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01075 01076 db.stat_print(0); 01077 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01079 01080 01081 Timer.reset(); 01082 Timer.start(); 01083 01084 for (i = 0; i < n_inserts; ++i) 01085 { 01086 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01087 rand_key(i, key1_storage); 01088 db.put(NULL, &key1, &data1, DB_NOOVERWRITE); 01089 } 01090 01091 Timer.stop(); 01092 db.stat(NULL, &dbstat, 0); 01093 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01094 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) << 01095 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01096 01097 db.stat_print(0); 01098 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01099 01101 Timer.reset(); 01102 Timer.start(); 01103 01104 01105 Dbc * cursorp; 01106 db.cursor(NULL, &cursorp, 0); 01107 01108 for (i = 0; i < n_locates; ++i) 01109 { 01110 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01111 rand_key(i, key1_storage); 01112 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01113 Dbt datax(data1_storage.databuf, DATA_SIZE); 01114 01115 cursorp->get(&keyx, &datax, DB_SET_RANGE); 01116 } 01117 01118 Timer.stop(); 01119 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) << 01120 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01121 01122 db.stat_print(0); 01123 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01124 01126 Timer.reset(); 01127 01128 Timer.start(); 01129 01130 stxxl::int64 n_scanned = 0; 01131 01132 for (i = 0; i < n_range_queries; ++i) 01133 { 01134 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01135 rand_key(i, key1_storage); 01136 my_key last_key = key1_storage; 01137 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01138 rand_key(i, key1_storage); 01139 if (last_key < key1_storage) 01140 std::swap(last_key, key1_storage); 01141 01142 01143 //STXXL_MSG("Looking "<<key1_storage<<" scanned: "<<n_scanned); 01144 //STXXL_MSG("Upper bound "<<last_key); 01145 01146 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01147 #ifdef BDB_BULK_SCAN 01148 Dbt datax(bulk_buffer, DATA_SIZE); 01149 datax.set_ulen(logbufsize); 01150 datax.set_flags(DB_DBT_USERMEM); 01151 #else 01152 Dbt datax(data1_storage.databuf, DATA_SIZE); 01153 #endif 01154 01155 01156 #ifdef BDB_BULK_SCAN 01157 if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND) 01158 continue; 01159 01160 01161 do 01162 { 01163 DbMultipleKeyDataIterator BulkIterator(datax); 01164 Dbt key1, data1; 01165 while (BulkIterator.next(key1, data1) && 01166 *((my_key *)key1.get_data()) <= last_key) 01167 { 01168 ++n_scanned; 01169 //STXXL_MSG("Result "<<*((my_key *)key1.get_data())); 01170 } 01171 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND) 01172 break; 01173 01174 01175 if (*((my_key *)keyx.get_data()) > last_key) 01176 { 01177 break; 01178 } 01179 } while (1); 01180 01181 #else 01182 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND) 01183 continue; 01184 01185 while (*((my_key *)keyx.get_data()) <= last_key) 01186 { 01187 ++n_scanned; 01188 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND) 01189 break; 01190 } 01191 #endif 01192 01193 01194 if (n_scanned >= SCAN_LIMIT(n)) 01195 { 01196 ++i; 01197 break; 01198 } 01199 } 01200 01201 n_range_queries = i; 01202 01203 Timer.stop(); 01204 if (cursorp != NULL) 01205 cursorp->close(); 01206 01207 01208 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) << 01209 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) << 01210 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned); 01211 01212 db.stat_print(0); 01213 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01214 01216 01217 ran32State = 0xdeadbeef; 01218 memset(key1_storage.keybuf, 'a', KEY_SIZE); 01219 01220 Timer.reset(); 01221 Timer.start(); 01222 01223 for (i = 0; i < n_deletes; ++i) 01224 { 01225 //key1_storage.keybuf[KEYPOS] = letters[VALUE]; 01226 rand_key(i, key1_storage); 01227 Dbt keyx(key1_storage.keybuf, KEY_SIZE); 01228 db.del(NULL, &keyx, 0); 01229 } 01230 01231 Timer.stop(); 01232 db.stat(NULL, &dbstat, 0); 01233 STXXL_MSG("Records in map: " << dbstat->bt_ndata); 01234 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) << 01235 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec"); 01236 01237 db.stat_print(0); 01238 db.get_env()->memp_stat_print(DB_STAT_CLEAR); 01239 01240 db.close(0); 01241 } 01242 catch (DbException & e) 01243 { 01244 STXXL_ERRMSG("DbException happened"); 01245 } 01246 catch (std::exception & e) 01247 { 01248 STXXL_ERRMSG("std::exception happened"); 01249 } 01250 01251 unlink(filename); 01252 01253 #ifdef BDB_BULK_SCAN 01254 delete[] bulk_buffer; 01255 #endif 01256 } 01257 01258 01259 int main(int argc, char * argv[]) 01260 { 01261 STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes"); 01262 STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes"); 01263 STXXL_MSG("stxxl::map Node max elements : " << REAL_NODE_MELEMENTS); 01264 STXXL_MSG("stxxl::map Leaf max elements : " << REAL_LEAF_MELEMENTS); 01265 #ifdef STXXL_DIRECT_IO_OFF 01266 STXXL_MSG("STXXL_DIRECT_IO_OFF is defined"); 01267 #else 01268 STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined"); 01269 #endif 01270 01271 if (argc < 3) 01272 { 01273 STXXL_MSG("Usage: " << argv[0] << " version #ops"); 01274 STXXL_MSG("\t version = 1: test stxxl map"); 01275 STXXL_MSG("\t version = 2: test Berkeley DB btree"); 01276 STXXL_MSG("\t version = 3: big test stxxl map"); 01277 STXXL_MSG("\t version = 4: big test Berkeley DB btree"); 01278 STXXL_MSG("\t version = 5: big test TPIE btree"); 01279 return -1; 01280 } 01281 01282 init(); 01283 01284 int version = atoi(argv[1]); 01285 stxxl::int64 ops = stxxl::atoint64(argv[2]); 01286 01287 STXXL_MSG("Running version : " << version); 01288 STXXL_MSG("Operations to perform: " << ops); 01289 STXXL_MSG("Btree cache size : " << TOTAL_CACHE_SIZE << " bytes"); 01290 STXXL_MSG("Leaf block size : " << LEAF_BLOCK_SIZE << " bytes"); 01291 01292 01293 switch (version) 01294 { 01295 case 1: 01296 run_stxxl_map(ops); 01297 break; 01298 case 2: 01299 run_bdb_btree(ops); 01300 break; 01301 case 3: 01302 run_stxxl_map_big(ops, 100000); 01303 break; 01304 case 4: 01305 run_bdb_btree_big(ops, 100000); 01306 break; 01307 case 5: 01308 run_tpie_btree_big(ops, 100000); 01309 break; 01310 default: 01311 STXXL_MSG("Unsupported version " << version); 01312 } 01313 }