109 lines
3.0 KiB
C++
109 lines
3.0 KiB
C++
#ifndef HASH_LIBRARY_H
|
|
#define HASH_LIBRARY_H
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <math.h>
|
|
#include <string>
|
|
#include <queue>
|
|
#include <stdexcept>
|
|
#include <map>
|
|
|
|
#include "general_library.hpp"
|
|
|
|
class Rolling_hash {
|
|
public:
|
|
// pushes element
|
|
virtual void push(unsigned char c);
|
|
|
|
// shifts i indices
|
|
// the hash function should already know the values
|
|
virtual void shift(size_t i);
|
|
virtual void slide(unsigned char c);
|
|
|
|
int get_fingerprint();
|
|
|
|
private:
|
|
int fingerprint;
|
|
};
|
|
|
|
class Rabin_karp : Rolling_hash {
|
|
// Hash function: sum_{i=1}^n x_i*2^{n-i} mod p
|
|
// With X being a binary string
|
|
|
|
public:
|
|
// Let prime `p` be an int32_t, which ensures that it is small enough to avoid underflows
|
|
Rabin_karp(uint32_t p, size_t length);
|
|
void slide(unsigned char c);
|
|
|
|
private:
|
|
void slide_bit(bool bit_in, bool bit_out);
|
|
std::queue<unsigned char> elements;
|
|
uint32_t fingerprint = 0;
|
|
uint32_t xi; // 2^n mod p
|
|
uint32_t p; // prime
|
|
};
|
|
|
|
class Polynomial_fingerprint : Rolling_hash {
|
|
// Hash function: sum_{i=1}^l s_i*r^i mod p
|
|
// r in F_p
|
|
|
|
// TODO: Add some overflow warning
|
|
// TODO: Untested
|
|
|
|
public:
|
|
Polynomial_fingerprint(int32_t p, int32_t r);
|
|
void push(unsigned char c);
|
|
void shift(size_t i);
|
|
|
|
private:
|
|
std::queue<unsigned char> elements;
|
|
int32_t fingerprint = 0;
|
|
int32_t p; // prime
|
|
int32_t r; // r in F_p
|
|
};
|
|
|
|
class Porat_porat_polynomial_fingerprint : Rolling_hash {
|
|
// Hash function: sum_{i=1}^l s_i*r^i mod p
|
|
// r in F_p
|
|
|
|
// TODO: Add some overflow warning
|
|
// TODO: Untested
|
|
|
|
public:
|
|
Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r);
|
|
void push(unsigned char c);
|
|
void shift(size_t i);
|
|
bool should_children_be_killed();
|
|
int get_generation();
|
|
int get_child();
|
|
|
|
protected:
|
|
// TODO: Optimize the code such that we don't copy the prehashed values, but instead share it between all instances.
|
|
// I guess we would have to store it in a separate object, and then pass a reference to it around (to avoid it getting destroyed early).
|
|
Porat_porat_polynomial_fingerprint(int32_t p, int32_t r, std::vector<int32_t> prehashed_values, std::vector<unsigned int> shortest_periods, std::map<unsigned int, unsigned int> prehashed_indices);
|
|
|
|
private:
|
|
std::queue<unsigned char> elements;
|
|
int32_t fingerprint = 0;
|
|
int32_t p; // prime
|
|
int32_t r; // r in F_p
|
|
|
|
size_t m; // pattern length
|
|
std::vector<int32_t> prehashed_values;
|
|
std::vector<unsigned int> shortest_periods;
|
|
std::map<unsigned int, unsigned int> prehashed_indices;
|
|
};
|
|
|
|
/* void push(const char * cs, size_t n) { */
|
|
/* for (size_t i = 0; i < n; i++) */
|
|
/* push(cs[i]); */
|
|
/* } */
|
|
|
|
/* void push(std::string s) { */
|
|
/* for (char c : s) */
|
|
/* push(c); */
|
|
/* } */
|
|
|
|
#endif
|