/* #define NDEBUG */ #include "hash_table.hpp" #include #include #include #include #include #include #include #include #include #include using namespace std; const int max_string_length = 256; const int D = (max_string_length+7) >> 3; // D = ceil(max_string_length/size(char)) My_string * read_word(const char * book, size_t * reading_progress); int main() { return EXIT_SUCCESS; } void speedtest_using_hash_table(string title, uint64_t(* algorithm)(const My_string *, const uint64_t *, uint64_t *, size_t), string seed_file) { const clock_t clock_before_seed = clock(); // load seed generated by random.org string filename = "random_org_16_bit_numbers.txt"; std::ifstream seed_file(filename); uint16_t seed_part; uint16_t seed_parts[D << 2]; size_t count = 0; while (seed_file >> seed_part) { seed_parts[count] = seed_part; count++; if (count == D << 2) break; // the seed is large enough for the max_string_length } if (count != D << 2) { cout << "The current seed is not large enough. Please extend it by appending the numbers at https://www.random.org/integers/?num=10000&min=0&max=65535&col=1&base=10&format=plain&rnd=new." << endl; return EXIT_SUCCESS; } // Get 64 bit seed from 16 bit seed uint64_t seed[D]; memcpy(seed, seed_parts, D*sizeof(seed[0])); const clock_t clock_before_hash_table = clock(); size_t l = 1; // initially, hash to a table of up to 1024 distinct words (2^10) Hash_table ht = Hash_table(new Hash_function(seed, hash_string, l, D)); const clock_t clock_before_loading_book = clock(); // choose a book /* std::ifstream ifs("genji_monogatari_english.txt"); */ /* std::ifstream ifs("Child_of_Light.txt"); */ /* std::ifstream ifs("the_adventures_of_sherlock_holmes.txt"); */ /* std::ifstream ifs("dracula.txt"); */ std::ifstream ifs("the_complete_works_of_william_shakespeare.txt"); string book_string( (std::istreambuf_iterator(ifs) ), (std::istreambuf_iterator() ) ); const char * book = book_string.c_str(); const size_t book_length = book_string.size(); const clock_t clock_before_reading = clock(); std::vector words; size_t reading_progress = 0; while (reading_progress < book_length-1) { // book_length includes '\0' which reading_progress avoids My_string * word = read_word(book, &reading_progress); if (word->size > 0) words.push_back(word); } const clock_t clock_after_reading = clock(); size_t count_words = 0; for (My_string * word : words) { count_words++; ht.hash(word); if (ht.is_time_for_rehash()) { l++; // double the universe which we hashes to ht.rehash(new Hash_function(seed, hash_string, l, D)); } } const clock_t clock_after_hashing = clock(); Hash_function hf = Hash_function(seed, hash_string, 10, D); const clock_t clock_after_init_hash_function = clock(); uint64_t hashed_value = 0; for (My_string * word : words) { /* hf.hash(word); */ hashed_value += hf.hash(word); } const clock_t clock_after_hashing_only = clock(); cout << "Sum of the hashed values (after overflow): " << hashed_value << endl; cout << "Nr of words: " << count_words << endl; cout << "Distinct words: " << ht.get_distict_words() << endl; cout << "Time: Load seed: " << float( clock_before_hash_table - clock_before_seed ) / CLOCKS_PER_SEC << endl; cout << "Time: Init hash table: " << float( clock_before_loading_book - clock_before_hash_table ) / CLOCKS_PER_SEC << endl; cout << "Time: Load book: " << float( clock_before_reading - clock_before_loading_book ) / CLOCKS_PER_SEC << endl; cout << "Time: Read: " << float( clock_after_reading - clock_before_reading ) / CLOCKS_PER_SEC << endl; cout << "Time: Hash & Table: " << float( clock_after_hashing - clock_after_reading ) / CLOCKS_PER_SEC << endl; cout << "Time: Total: " << float( clock_after_hashing - clock_before_seed ) / CLOCKS_PER_SEC << endl; cout << "Time: Init hash function: " << float( clock_after_init_hash_function - clock_after_hashing ) / CLOCKS_PER_SEC << endl; cout << "Time: Hash, no table: " << float( clock_after_hashing_only - clock_after_init_hash_function ) / CLOCKS_PER_SEC << endl; for (My_string * word : words) delete word; } My_string * read_word(const char * book, size_t * reading_progress) { bool word_started = false; char word[max_string_length]; size_t word_length = 0; char c; while (book[*reading_progress+1] != '\0') { (*reading_progress)++; c = book[*reading_progress]; if (word_started) { if (isalnum(c)) { if (word_length != max_string_length) { // crop words longer than the max_string_length word[word_length] = tolower(c); word_length++; } } else { return new My_string(word, word_length); } } else if (isalnum(c)) { word_started = true; word[word_length] = c; word_length++; } } return new My_string(word, word_length); }