/* #define NDEBUG */ #include #include #include #include #include #include #include #include #include "hash_function_library.hpp" // Initialization of constants int p = 7919; // the prime for our hash function int r = 11; // random int in \in F_p char T[] = "abcabcabcdabc"; // text /* char P[] = "abcabc"; // pattern */ char P[] = "abcabcabcabcabcabcddddddddddddddddddabc"; // pattern std::vector prehashed_values; std::vector shortest_periods; std::map prehashed_indices; // https://stackoverflow.com/questions/18620942/find-the-smallest-period-of-input-string-in-on // BEGIN stolen code std::vector calculateLPS(char * pat, int m) { /* int[] lps = new int[pat.length()]; */ int len = 0; int i = 1; std::vector lps = {0}; lps.resize(m); while (i < m) { if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } else { if (len != 0) { len = lps[len - 1]; } else { lps[i] = len; i++; } } } return lps; } // calculates the length of the shortest period int len_of_shortest_period (char * pattern, int m) { std::vector lps = calculateLPS(pattern, m); //start at the end of the string int i = lps.size()-1; while (lps[i] != 0) { //shift back i -= lps[i]; } return i+1; } // END class porat_process { // TODO: use a different hash function. This one is BAD public: // we use the polynomial fingerprint void increment_hash (char c) { prev_pow = prev_pow*r % p; hash = (hash + c*prev_pow) % p; l++; } void subtract_hash (long pre_fingerprint, int i) { // i is the number of removed elements // pre_fingerprint is the fingerprint of those previous elements hash = (hash - pre_fingerprint)/(long)pow(r, i); // we are guaranteed that integer division will return a whole number prev_pow /= (long)pow(r, i); l -= i; // TODO: untested, especially prev_pow } bool should_spawn_child() { if (l == next_i_squared) { next_i_squared <<= 1; return true; } else return false; } long get_fingerprint() { return hash; } private: long prev_pow = 1; int l = 0; int next_i_squared = 1; long hash = 0; }; void print_map(std::string_view comment, const std::map& m) { std::cout << comment; for (const auto& [key, value] : m) { std::cout << key << " = " << value << "; "; } std::cout << "\n"; } void print_vector(std::string_view comment, const std::vector& m) { std::cout << comment << "["; for (const auto& a : m) { std::cout << a << ", "; } std::cout << "]\n"; } void print_vector_long(std::string_view comment, const std::vector& m) { std::cout << comment << "["; for (const auto& a : m) { std::cout << a << ", "; } std::cout << "]\n"; } int main() { int n = sizeof(T)/sizeof(char) - 1; int m = sizeof(P)/sizeof(char) - 1; { int i = 1; while (i < m) { // calculate shortest period length int period = len_of_shortest_period(P, i); shortest_periods.push_back(period); // calculate fingerprint of period if (!prehashed_indices.contains(period)) { porat_process process; for (int ii = 0; ii < period; ii++){ std::cout << P[ii]; process.increment_hash(P[ii]); } /* prehashed_indices[period] = process.get_fingerprint(); */ prehashed_values.push_back(process.get_fingerprint()); std::cout << period << " " << prehashed_values.size() << std::endl; prehashed_indices[period] = prehashed_values.size()-1; } i <<= 1; } if (i != m) { // so i>m, which means we skipped exactly m // calculate shortest period length int period = len_of_shortest_period(P, m); shortest_periods.push_back(period); // calculate fingerprint of period if (!prehashed_indices.contains(period)) { porat_process process; std::cout << "["; for (int ii = 0; ii < period; ii++) { std::cout << P[ii]; process.increment_hash(P[ii]); } std::cout << "]\n"; prehashed_values.push_back(process.get_fingerprint()); std::cout << period << " " << prehashed_values.size() << std::endl; prehashed_indices[period] = prehashed_values.size()-1; } /* // calculate fingerprint of phi(P_{2^i}) */ /* while (ii < m) { */ /* process.increment_hash(P[ii]); */ /* ii++; */ /* } */ /* prehashed_values.push_back(process.get_fingerprint()); */ } } { std::cout << P << std::endl; int i = 0; while ((1 << i) < m) { std::cout << "pattern: "; for (int ii = 0; ii < (1 << i); ii++) std::cout << P[ii]; std::cout << std::endl; std::cout << "period: "; for (int ii = 0; ii < shortest_periods[i]; ii++) std::cout << P[ii]; std::cout << std::endl; std::cout << "|prefix_{P_" << (1 << i) << "}| = " << shortest_periods[i] << std::endl; std::cout << prehashed_values[prehashed_indices[shortest_periods[i]]] << std::endl; i++; } if ((1 << i) != m) { // so i>m, which means we skipped exactly m std::cout << "pattern: "; for (int ii = 0; ii < m; ii++) std::cout << P[ii]; std::cout << std::endl; std::cout << "period: "; for (int ii = 0; ii < shortest_periods[i]; ii++) std::cout << P[ii]; std::cout << std::endl; std::cout << "|prefix_{P_" << m << "}| = " << shortest_periods[i] << std::endl; std::cout << prehashed_values[prehashed_indices[shortest_periods[i]]] << std::endl; std::cout << prehashed_values[0] << std::endl; std::cout << prehashed_values[1] << std::endl; std::cout << prehashed_values[2] << std::endl; std::cout << prehashed_values[3] << std::endl; std::cout << prehashed_values[4] << std::endl; } } print_map("Indices map: ", prehashed_indices); print_vector_long("Values vector: ", prehashed_values); print_vector("Periods vector: ", shortest_periods); return EXIT_SUCCESS; }