Bachelors_Thesis_Code/hash_function_library.hpp
2021-11-14 14:35:05 +01:00

109 lines
3.0 KiB
C++

#ifndef HASH_LIBRARY_H
#define HASH_LIBRARY_H
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <string>
#include <queue>
#include <stdexcept>
#include <map>
#include "general_library.hpp"
class Rolling_hash {
public:
// pushes element
virtual void push(unsigned char c);
// shifts i indices
// the hash function should already know the values
virtual void shift(size_t i);
virtual void slide(unsigned char c);
int get_fingerprint();
private:
int fingerprint;
};
class Rabin_karp : Rolling_hash {
// Hash function: sum_{i=1}^n x_i*2^{n-i} mod p
// With X being a binary string
public:
// Let prime `p` be an int32_t, which ensures that it is small enough to avoid underflows
Rabin_karp(uint32_t p, size_t length);
void slide(unsigned char c);
private:
void slide_bit(bool bit_in, bool bit_out);
std::queue<unsigned char> elements;
uint32_t fingerprint = 0;
uint32_t xi; // 2^n mod p
uint32_t p; // prime
};
class Polynomial_fingerprint : Rolling_hash {
// Hash function: sum_{i=1}^l s_i*r^i mod p
// r in F_p
// TODO: Add some overflow warning
// TODO: Untested
public:
Polynomial_fingerprint(int32_t p, int32_t r);
void push(unsigned char c);
void shift(size_t i);
private:
std::queue<unsigned char> elements;
int32_t fingerprint = 0;
int32_t p; // prime
int32_t r; // r in F_p
};
class Porat_porat_polynomial_fingerprint : Rolling_hash {
// Hash function: sum_{i=1}^l s_i*r^i mod p
// r in F_p
// TODO: Add some overflow warning
// TODO: Untested
public:
Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r);
void push(unsigned char c);
void shift(size_t i);
bool should_children_be_killed();
int get_generation();
int get_child();
protected:
// TODO: Optimize the code such that we don't copy the prehashed values, but instead share it between all instances.
// I guess we would have to store it in a separate object, and then pass a reference to it around (to avoid it getting destroyed early).
Porat_porat_polynomial_fingerprint(int32_t p, int32_t r, std::vector<int32_t> prehashed_values, std::vector<unsigned int> shortest_periods, std::map<unsigned int, unsigned int> prehashed_indices);
private:
std::queue<unsigned char> elements;
int32_t fingerprint = 0;
int32_t p; // prime
int32_t r; // r in F_p
size_t m; // pattern length
std::vector<int32_t> prehashed_values;
std::vector<unsigned int> shortest_periods;
std::map<unsigned int, unsigned int> prehashed_indices;
};
/* void push(const char * cs, size_t n) { */
/* for (size_t i = 0; i < n; i++) */
/* push(cs[i]); */
/* } */
/* void push(std::string s) { */
/* for (char c : s) */
/* push(c); */
/* } */
#endif