Bachelors_Thesis_Code/hash_function_library.cpp

107 lines
3.4 KiB
C++
Raw Normal View History

2021-11-14 14:35:05 +01:00
#include "hash_function_library.hpp"
int Rolling_hash::get_fingerprint() {
return (int)fingerprint;
}
Rabin_karp::Rabin_karp(uint32_t p, size_t length) : p(p) {
for (size_t i = 0; i < length; i++)
elements.push(0);
if (p >= (uint32_t)(1<<31))
throw std::overflow_error("A Rabin-Karp hash function has been initialized with too large a prime, such that we will encounter overflow errors.");
xi = (2 << length) % p;
}
void Rabin_karp::slide(unsigned char c_in) {
unsigned char c_out = elements.front();
elements.pop();
elements.push(c_in);
for (size_t i = 7; i >= 0; i--) {
bool bit_in = (c_in & (1 << i)) != 0;
bool bit_out = (c_out & (1 << i)) != 0;
slide_bit(bit_in, bit_out);
}
}
void Rabin_karp::slide_bit(bool bit_in, bool bit_out) {
fingerprint = ((fingerprint << 1) - xi*bit_out + bit_in);
// fast mod p
if (fingerprint > (uint32_t)p)
fingerprint -= p;
}
Polynomial_fingerprint::Polynomial_fingerprint(int32_t p, int32_t r) : p(p), r(r) {}
void Polynomial_fingerprint::push(unsigned char c) {
int exp = elements.size() % (p-1);
fingerprint = (fingerprint + c*(unsigned int)pow(r, exp)) % p;
elements.push(c);
}
void Polynomial_fingerprint::shift(size_t i) {
int32_t subtract_fingerprint = 0;
for (size_t ii = 0; ii < i; ii++) {
unsigned char c = elements.front();
elements.pop();
int exp = (ii+1) % (p-1);
subtract_fingerprint += c*pow(r, exp);
}
fingerprint = (fingerprint-subtract_fingerprint)*multiplicative_inverse(pow(r, i), p) % p;
}
Porat_porat_polynomial_fingerprint::Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r) : p(p), r(r) {
// Calculate the shortest periods for all prefixes of length 2^i (and the full pattern)
{
size_t i = 1;
const char * P_c_str = P.c_str();
while (i < P.length()) {
shortest_periods.push_back(len_of_shortest_period(P_c_str, i));
i <<= 1;
}
shortest_periods.push_back(len_of_shortest_period(P_c_str, P.length()));
}
// Calculate the fingerprints of all prefixes of length 2^i, and of the shortest periods found before
{
size_t next_2_exponent = 1;
size_t shortest_period_index = 0;
for (size_t i = 0; i < P.length(); i++) {
if (i == next_2_exponent) {
if (!prehashed_indices.contains(i)) {
prehashed_values.push_back(get_fingerprint());
prehashed_indices[i] = prehashed_values.size()-1;
}
}
else if (shortest_period_index < shortest_periods.size() && i == shortest_periods[shortest_period_index]) {
if (!prehashed_indices.contains(i)) {
prehashed_values.push_back(get_fingerprint());
prehashed_indices[i] = prehashed_values.size()-1;
}
shortest_period_index++;
}
else {
push(P[i]);
}
}
// We have now pushed the entire pattern
prehashed_values.push_back(get_fingerprint());
prehashed_indices[P.length()] = prehashed_values.size()-1;
}
// Save the length of the pattern before we throw the pattern away
m = P.length();
}
void Porat_porat_polynomial_fingerprint::push(unsigned char c){
// TODO
}
void Porat_porat_polynomial_fingerprint::shift(size_t i){
// TODO
}