107 lines
3.4 KiB
C++
107 lines
3.4 KiB
C++
#include "hash_function_library.hpp"
|
|
|
|
int Rolling_hash::get_fingerprint() {
|
|
return (int)fingerprint;
|
|
}
|
|
|
|
Rabin_karp::Rabin_karp(uint32_t p, size_t length) : p(p) {
|
|
for (size_t i = 0; i < length; i++)
|
|
elements.push(0);
|
|
|
|
if (p >= (uint32_t)(1<<31))
|
|
throw std::overflow_error("A Rabin-Karp hash function has been initialized with too large a prime, such that we will encounter overflow errors.");
|
|
|
|
xi = (2 << length) % p;
|
|
}
|
|
|
|
void Rabin_karp::slide(unsigned char c_in) {
|
|
unsigned char c_out = elements.front();
|
|
elements.pop();
|
|
elements.push(c_in);
|
|
|
|
for (size_t i = 7; i >= 0; i--) {
|
|
bool bit_in = (c_in & (1 << i)) != 0;
|
|
bool bit_out = (c_out & (1 << i)) != 0;
|
|
slide_bit(bit_in, bit_out);
|
|
}
|
|
}
|
|
|
|
void Rabin_karp::slide_bit(bool bit_in, bool bit_out) {
|
|
fingerprint = ((fingerprint << 1) - xi*bit_out + bit_in);
|
|
// fast mod p
|
|
if (fingerprint > (uint32_t)p)
|
|
fingerprint -= p;
|
|
}
|
|
|
|
Polynomial_fingerprint::Polynomial_fingerprint(int32_t p, int32_t r) : p(p), r(r) {}
|
|
|
|
void Polynomial_fingerprint::push(unsigned char c) {
|
|
int exp = elements.size() % (p-1);
|
|
|
|
fingerprint = (fingerprint + c*(unsigned int)pow(r, exp)) % p;
|
|
|
|
elements.push(c);
|
|
}
|
|
|
|
void Polynomial_fingerprint::shift(size_t i) {
|
|
int32_t subtract_fingerprint = 0;
|
|
for (size_t ii = 0; ii < i; ii++) {
|
|
unsigned char c = elements.front();
|
|
elements.pop();
|
|
int exp = (ii+1) % (p-1);
|
|
subtract_fingerprint += c*pow(r, exp);
|
|
}
|
|
fingerprint = (fingerprint-subtract_fingerprint)*multiplicative_inverse(pow(r, i), p) % p;
|
|
}
|
|
|
|
Porat_porat_polynomial_fingerprint::Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r) : p(p), r(r) {
|
|
// Calculate the shortest periods for all prefixes of length 2^i (and the full pattern)
|
|
{
|
|
size_t i = 1;
|
|
const char * P_c_str = P.c_str();
|
|
while (i < P.length()) {
|
|
shortest_periods.push_back(len_of_shortest_period(P_c_str, i));
|
|
i <<= 1;
|
|
}
|
|
shortest_periods.push_back(len_of_shortest_period(P_c_str, P.length()));
|
|
}
|
|
|
|
// Calculate the fingerprints of all prefixes of length 2^i, and of the shortest periods found before
|
|
{
|
|
size_t next_2_exponent = 1;
|
|
size_t shortest_period_index = 0;
|
|
for (size_t i = 0; i < P.length(); i++) {
|
|
if (i == next_2_exponent) {
|
|
if (!prehashed_indices.contains(i)) {
|
|
prehashed_values.push_back(get_fingerprint());
|
|
prehashed_indices[i] = prehashed_values.size()-1;
|
|
}
|
|
}
|
|
else if (shortest_period_index < shortest_periods.size() && i == shortest_periods[shortest_period_index]) {
|
|
if (!prehashed_indices.contains(i)) {
|
|
prehashed_values.push_back(get_fingerprint());
|
|
prehashed_indices[i] = prehashed_values.size()-1;
|
|
}
|
|
shortest_period_index++;
|
|
}
|
|
else {
|
|
push(P[i]);
|
|
}
|
|
}
|
|
// We have now pushed the entire pattern
|
|
prehashed_values.push_back(get_fingerprint());
|
|
prehashed_indices[P.length()] = prehashed_values.size()-1;
|
|
}
|
|
|
|
// Save the length of the pattern before we throw the pattern away
|
|
m = P.length();
|
|
}
|
|
|
|
void Porat_porat_polynomial_fingerprint::push(unsigned char c){
|
|
// TODO
|
|
}
|
|
|
|
void Porat_porat_polynomial_fingerprint::shift(size_t i){
|
|
// TODO
|
|
}
|