Init (add codebase)
This commit is contained in:
parent
d10424819c
commit
bacd7cff71
48
Makefile
Normal file
48
Makefile
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
CC = cc # C compiler
|
||||||
|
# cc and gcc is the same
|
||||||
|
CXX = c++ # C++ compiler
|
||||||
|
# c++
|
||||||
|
# g++
|
||||||
|
# clang++
|
||||||
|
CPPFLAGS = # preprocessor flags
|
||||||
|
CFLAGS = -Wall -Wfloat-equal -std=c99 -O0 -mtune=native -funroll-loops $(OPT) # C compiler flags
|
||||||
|
# -Wall: Warnings all
|
||||||
|
# -Wfloat-equal: Warn if comparing floats with ==
|
||||||
|
# -OX: Optimization level -O0 (none, default), -O1, -O2, -O3 (most, may increase file size), -Os (decrease file size), -ffast-math
|
||||||
|
# -mtune: Tune for specific/generic CPU
|
||||||
|
# -funrool-loops: Unroll loops when compiling
|
||||||
|
# -fopenmp: Enable OpenMP (When running, set the environmental variable OMP_NUM_THREADS)
|
||||||
|
# $(OPT): Passes any parameter given when calling make with 'make OPT=...'
|
||||||
|
CXXFLAGS = -Wall -std=c++20 -O0 # C++ compiler flags
|
||||||
|
LDFLAGS = # linker flags
|
||||||
|
LDLIBS = # library flags
|
||||||
|
# -lm (for <math.h>)
|
||||||
|
# -lblas (BASIC LINEAR ALGEBRA SUBPROGRAMS)
|
||||||
|
# -lcblas (for <cblas.h>)
|
||||||
|
# -lopenblas (NOT INSTALLED; blas and cblas in one; conflicts with blas and cblas)
|
||||||
|
# -llapack (LAPACKE)
|
||||||
|
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
||||||
|
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
||||||
|
|
||||||
|
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o
|
||||||
|
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o -o simple_string_matching
|
||||||
|
|
||||||
|
simple_string_matching.o: simple_string_matching.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
|
||||||
|
|
||||||
|
Rabin_fingerprint.o: Rabin_fingerprint.cpp Rabin_fingerprint.hpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c Rabin_fingerprint.cpp
|
||||||
|
|
||||||
|
general_library.o: general_library.cpp general_library.hpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c general_library.cpp
|
||||||
|
|
||||||
|
porat-porat: porat-porat.cpp
|
||||||
|
|
||||||
|
# Tell the compiler that 'clean' isn't referring to a file
|
||||||
|
.PHONY: clean
|
||||||
|
|
||||||
|
# A make target that cleans (by deleting files)
|
||||||
|
clean:
|
||||||
|
$(RM) porat-porat
|
||||||
|
$(RM) simple_string_matching
|
||||||
|
$(RM) *.o
|
32
README.md
32
README.md
@ -1,3 +1,33 @@
|
|||||||
# Bachelors_Thesis_Code
|
# Bachelor's_Thesis_Code
|
||||||
|
|
||||||
Code used for my Bachelor's Thesis
|
Code used for my Bachelor's Thesis
|
||||||
|
|
||||||
|
# Versions
|
||||||
|
## Current Version
|
||||||
|
Compared to V1, this current state `generate_initial_irreducible_polynomials.sage`, `generate_random_irreducible_polynomial.sage`, and `multiply_polynomials_modulo_polynomial.sage` have been removed from the main folder, as they have already been used for implementing their corresponding features in `general_library.cpp`. They will be preserved in the V1 folder.
|
||||||
|
|
||||||
|
This version also contains code relating to Porat-Porat and other random code stumps. This code is going to be completely reimplemented, but is preserved for now to avoid redoing work.
|
||||||
|
|
||||||
|
## V1
|
||||||
|
#### `general_library.cpp`
|
||||||
|
- Exports functions for:
|
||||||
|
- Generating a random irreducible polynomial of degree up to 31.
|
||||||
|
- Contains code for:
|
||||||
|
- Contains code to multiply polynomials modulo another polynomial.
|
||||||
|
- Doesn't contain code to initially calculate modulo a polynomial. It requires the initial polynomials to all be at most of the same degree as the polynomial that you use for modulo.
|
||||||
|
- A very custom version of calculating Reduced Row Echelon Form assuming all values are in Z2.
|
||||||
|
|
||||||
|
#### `generate_initial_irreducible_polynomials.sage`
|
||||||
|
- Generates a list of irreducible polynomials, one of each degree. The polynomials are printed in a format that can be copy-pasted directly into `general_library.cpp`.
|
||||||
|
|
||||||
|
#### `generate_random_irreducible_polynomial.sage`
|
||||||
|
- Contains an general template of how `general_library.cpp` should implement generating a random irreducible polynomial. The output of this program can also be used for debugging, since it uses many (correctly implemented) built-in sagemath functions that have to manually implemented in `general_library.cpp`.
|
||||||
|
|
||||||
|
#### `multiply_polynomials_modulo_polynomial.sage`
|
||||||
|
- Contains a general template of how `general_library.cpp` should implement multiplying polynomials modulo a polynomial in Z2.
|
||||||
|
|
||||||
|
#### `compare_fingerprint_false_positive_probabilities.sage`
|
||||||
|
- Contains initial (i.e. very rouch) code, which compares the upper bounds of a false match occuring of the Rabin fingerprint and the fingerprint used in Karp-Rabin.
|
||||||
|
|
||||||
|
#### `test_rabin_fingerprint.sage`
|
||||||
|
- Contains initial (i.e. very rough) code which debunks my theory that modulo a prime is somewhat equivalent to modulo an irreducible polynomial.
|
||||||
|
73
Rabin_fingerprint.cpp
Normal file
73
Rabin_fingerprint.cpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#include "Rabin_fingerprint.hpp"
|
||||||
|
|
||||||
|
Rabin_fingerprint::Rabin_fingerprint(uint32_t p, size_t window_size_in_bits) {
|
||||||
|
set_modulo_polynomial(p);
|
||||||
|
set_shift_polynomial(window_size_in_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::set_modulo_polynomial (uint32_t p) {
|
||||||
|
polynomial = p & ((uint32_t)pow(2, 31)-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::set_shift_polynomial (size_t window_size_in_bits) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (polynomial == 0)
|
||||||
|
throw std::logic_error("Call set_modulo_polynomial first, as this function depends on the polynomial variable being set.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
shift_polynomial = 1;
|
||||||
|
|
||||||
|
// NOTE: We shift the bit 1 space too long, since we are removing the bit that has been pushed outside the window
|
||||||
|
for (size_t i = 0; i < window_size_in_bits; i++) {
|
||||||
|
shift_polynomial <<= 1;
|
||||||
|
if ((shift_polynomial & (uint32_t)pow(2, 31)) != 0)
|
||||||
|
shift_polynomial ^= polynomial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Rabin_fingerprint::push_char (char c) {
|
||||||
|
std::bitset<8> b(c);
|
||||||
|
for (char i = 7; i >= 0; i--) {
|
||||||
|
push_bit((bool)b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::push_bit (bool b) {
|
||||||
|
fingerprint <<= 1;
|
||||||
|
fingerprint |= b;
|
||||||
|
if ((fingerprint & (uint32_t)pow(2, 31)) != 0)
|
||||||
|
fingerprint ^= polynomial;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::shift_bit (bool b) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (shift_polynomial == 0)
|
||||||
|
throw std::logic_error("Call set_shift_polynomial first, as this function depends on the shift_polynomial variable being set.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (b)
|
||||||
|
fingerprint ^= shift_polynomial;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::slide_char (char c_in, char c_out) {
|
||||||
|
std::bitset<8> b_in(c_in);
|
||||||
|
std::bitset<8> b_out(c_out);
|
||||||
|
for (char i = 7; i >= 0; i--) {
|
||||||
|
slide_bit((bool)b_in[i], (bool)b_out[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* #include <iostream> */
|
||||||
|
void Rabin_fingerprint::slide_bit (bool b_in, bool b_out) {
|
||||||
|
/* std::cout << "bitset b(c): " << b << std::endl; */
|
||||||
|
/* std::cout << "push bit " << b[i] << std::endl; */
|
||||||
|
/* std::cout << "push bit " << b_in << std::endl; */
|
||||||
|
push_bit(b_in);
|
||||||
|
/* std::cout << "shift bit " << b_out << std::endl; */
|
||||||
|
shift_bit(b_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Rabin_fingerprint::get_fingerprint () {
|
||||||
|
return fingerprint;
|
||||||
|
}
|
30
Rabin_fingerprint.hpp
Normal file
30
Rabin_fingerprint.hpp
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef RABIN_FINGERPRINT_H
|
||||||
|
#define RABIN_FINGERPRINT_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <bitset>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
class Rabin_fingerprint {
|
||||||
|
public:
|
||||||
|
Rabin_fingerprint(uint32_t polynomial, size_t window_size_in_bits);
|
||||||
|
|
||||||
|
void push_char (char c);
|
||||||
|
void push_bit (bool b);
|
||||||
|
void shift_bit (bool b);
|
||||||
|
void slide_char (char c_in, char c_out);
|
||||||
|
void slide_bit (bool b1, bool b2);
|
||||||
|
|
||||||
|
uint32_t get_fingerprint();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void set_modulo_polynomial (uint32_t p);
|
||||||
|
void set_shift_polynomial (size_t window_size_in_bits);
|
||||||
|
|
||||||
|
uint32_t fingerprint = 0;
|
||||||
|
uint32_t polynomial = 0;
|
||||||
|
uint32_t shift_polynomial = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
46
V1/Makefile
Normal file
46
V1/Makefile
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
CC = cc # C compiler
|
||||||
|
# cc and gcc is the same
|
||||||
|
CXX = c++ # C++ compiler
|
||||||
|
# c++
|
||||||
|
# g++
|
||||||
|
# clang++
|
||||||
|
CPPFLAGS = # preprocessor flags
|
||||||
|
CFLAGS = -Wall -Wfloat-equal -std=c99 -O0 -mtune=native -funroll-loops $(OPT) # C compiler flags
|
||||||
|
# -Wall: Warnings all
|
||||||
|
# -Wfloat-equal: Warn if comparing floats with ==
|
||||||
|
# -OX: Optimization level -O0 (none, default), -O1, -O2, -O3 (most, may increase file size), -Os (decrease file size), -ffast-math
|
||||||
|
# -mtune: Tune for specific/generic CPU
|
||||||
|
# -funrool-loops: Unroll loops when compiling
|
||||||
|
# -fopenmp: Enable OpenMP (When running, set the environmental variable OMP_NUM_THREADS)
|
||||||
|
# $(OPT): Passes any parameter given when calling make with 'make OPT=...'
|
||||||
|
CXXFLAGS = -Wall -std=c++20 -O0 # C++ compiler flags
|
||||||
|
LDFLAGS = # linker flags
|
||||||
|
LDLIBS = # library flags
|
||||||
|
# -lm (for <math.h>)
|
||||||
|
# -lblas (BASIC LINEAR ALGEBRA SUBPROGRAMS)
|
||||||
|
# -lcblas (for <cblas.h>)
|
||||||
|
# -lopenblas (NOT INSTALLED; blas and cblas in one; conflicts with blas and cblas)
|
||||||
|
# -llapack (LAPACKE)
|
||||||
|
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
||||||
|
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
||||||
|
|
||||||
|
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o
|
||||||
|
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o -o simple_string_matching
|
||||||
|
|
||||||
|
simple_string_matching.o: simple_string_matching.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
|
||||||
|
|
||||||
|
Rabin_fingerprint.o: Rabin_fingerprint.cpp Rabin_fingerprint.hpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c Rabin_fingerprint.cpp
|
||||||
|
|
||||||
|
general_library.o: general_library.cpp general_library.hpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c general_library.cpp
|
||||||
|
|
||||||
|
|
||||||
|
# Tell the compiler that 'clean' isn't referring to a file
|
||||||
|
.PHONY: clean
|
||||||
|
|
||||||
|
# A make target that cleans (by deleting files)
|
||||||
|
clean:
|
||||||
|
$(RM) simple_string_matching
|
||||||
|
$(RM) *.o
|
68
V1/Rabin_fingerprint.cpp
Normal file
68
V1/Rabin_fingerprint.cpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#include "Rabin_fingerprint.hpp"
|
||||||
|
|
||||||
|
Rabin_fingerprint::Rabin_fingerprint(uint32_t p, size_t window_size_in_bits) {
|
||||||
|
set_modulo_polynomial(p);
|
||||||
|
set_shift_polynomial(window_size_in_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::set_modulo_polynomial (uint32_t p) {
|
||||||
|
polynomial = p & ((uint32_t)pow(2, 31)-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::set_shift_polynomial (size_t window_size_in_bits) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (polynomial == 0)
|
||||||
|
throw std::logic_error("Call set_modulo_polynomial first, as this function depends on the polynomial variable being set.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
shift_polynomial = 1;
|
||||||
|
|
||||||
|
// NOTE: We shift the bit 1 space too long, since we are removing the bit that has been pushed outside the window
|
||||||
|
for (size_t i = 0; i < window_size_in_bits; i++) {
|
||||||
|
shift_polynomial <<= 1;
|
||||||
|
if ((shift_polynomial & (uint32_t)pow(2, 31)) != 0)
|
||||||
|
shift_polynomial ^= polynomial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Rabin_fingerprint::push_char (char c) {
|
||||||
|
std::bitset<8> b(c);
|
||||||
|
for (char i = 7; i >= 0; i--) {
|
||||||
|
push_bit((bool)b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::push_bit (bool b) {
|
||||||
|
fingerprint <<= 1;
|
||||||
|
fingerprint |= b;
|
||||||
|
if ((fingerprint & (uint32_t)pow(2, 31)) != 0)
|
||||||
|
fingerprint ^= polynomial;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::shift_bit (bool b) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (shift_polynomial == 0)
|
||||||
|
throw std::logic_error("Call set_shift_polynomial first, as this function depends on the shift_polynomial variable being set.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (b)
|
||||||
|
fingerprint ^= shift_polynomial;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::slide_char (char c_in, char c_out) {
|
||||||
|
std::bitset<8> b_in(c_in);
|
||||||
|
std::bitset<8> b_out(c_out);
|
||||||
|
for (char i = 7; i >= 0; i--) {
|
||||||
|
slide_bit((bool)b_in[i], (bool)b_out[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint::slide_bit (bool b_in, bool b_out) {
|
||||||
|
push_bit(b_in);
|
||||||
|
shift_bit(b_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Rabin_fingerprint::get_fingerprint () {
|
||||||
|
return fingerprint;
|
||||||
|
}
|
30
V1/Rabin_fingerprint.hpp
Normal file
30
V1/Rabin_fingerprint.hpp
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#ifndef RABIN_FINGERPRINT_H
|
||||||
|
#define RABIN_FINGERPRINT_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <bitset>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
class Rabin_fingerprint {
|
||||||
|
public:
|
||||||
|
Rabin_fingerprint(uint32_t polynomial, size_t window_size_in_bits);
|
||||||
|
|
||||||
|
void push_char (char c);
|
||||||
|
void push_bit (bool b);
|
||||||
|
void shift_bit (bool b);
|
||||||
|
void slide_char (char c_in, char c_out);
|
||||||
|
void slide_bit (bool b1, bool b2);
|
||||||
|
|
||||||
|
uint32_t get_fingerprint();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void set_modulo_polynomial (uint32_t p);
|
||||||
|
void set_shift_polynomial (size_t window_size_in_bits);
|
||||||
|
|
||||||
|
uint32_t fingerprint = 0;
|
||||||
|
uint32_t polynomial = 0;
|
||||||
|
uint32_t shift_polynomial = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
73
V1/compare_fingerprint_false_positive_probabilities.sage
Normal file
73
V1/compare_fingerprint_false_positive_probabilities.sage
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# Rabin fingerprint: k > lg(n*m/e), e is the upper bound on the error probability
|
||||||
|
# Karp-Rabin fingerprint: <= pi(n(n-m+1))/pi(M)
|
||||||
|
|
||||||
|
# 32-bit
|
||||||
|
M = 2**32-1
|
||||||
|
k = 31
|
||||||
|
|
||||||
|
# 16-bit
|
||||||
|
# M = 2**16-1
|
||||||
|
# k = 13
|
||||||
|
|
||||||
|
def fpi(a):
|
||||||
|
p = Primes()
|
||||||
|
a_next = p.next(a)
|
||||||
|
i = 0
|
||||||
|
jump = 1
|
||||||
|
lastdirection = -2
|
||||||
|
while(True):
|
||||||
|
print(f'{jump:010d}', end='\r')
|
||||||
|
# print(f'{i}, {jump}')
|
||||||
|
val = p.unrank(i)
|
||||||
|
if val == a_next:
|
||||||
|
return i # i-1, is the value we search for, but we count from 0, so i is correct
|
||||||
|
elif val > a_next:
|
||||||
|
if lastdirection == 2:
|
||||||
|
# print('mul')
|
||||||
|
lastdirection -= 1
|
||||||
|
jump *= 2
|
||||||
|
else:
|
||||||
|
# print('div')
|
||||||
|
jump //= 2
|
||||||
|
lastdirection += 1
|
||||||
|
# print(jump)
|
||||||
|
i -= jump
|
||||||
|
else:
|
||||||
|
if lastdirection == -2:
|
||||||
|
# print('mul')
|
||||||
|
lastdirection += 1
|
||||||
|
jump *= 2
|
||||||
|
else:
|
||||||
|
# print('div')
|
||||||
|
jump //= 2
|
||||||
|
lastdirection -= 1
|
||||||
|
i += jump
|
||||||
|
|
||||||
|
# Rabin fingerprint
|
||||||
|
# k = lg(n*m/e) <=>
|
||||||
|
# 2^k = n*m/e <=>
|
||||||
|
# 2^k/(n*m) = 1/e <=>
|
||||||
|
# n*m/(2^k) = e
|
||||||
|
|
||||||
|
# Karp-Rabin fingerprint
|
||||||
|
# e = pi(n(n-m+1))/pi(M)
|
||||||
|
|
||||||
|
# print(pi(2**32-1))
|
||||||
|
|
||||||
|
# e_rabin = n*m/(2^k)
|
||||||
|
|
||||||
|
piM = fpi(M)
|
||||||
|
for m in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
|
||||||
|
for n in [10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000]:
|
||||||
|
if m>=n:
|
||||||
|
continue
|
||||||
|
print(f'(n, m) = ({n}, {m})')
|
||||||
|
e_rabin = n*m/(2**k)
|
||||||
|
print(f'Rabin: {float(e_rabin):.2e}')
|
||||||
|
e_karpr = fpi(n*(n-m+1))/piM
|
||||||
|
print(f'KarpR: {float(e_karpr):.2e}')
|
||||||
|
print()
|
||||||
|
print(f'{e_karpr} >= 1 is {e_karpr >= 1} | {e_rabin} >= 1 is {e_rabin >= 1}')
|
||||||
|
if float(e_karpr) >= 1 or float(e_rabin) >= 1:
|
||||||
|
break
|
||||||
|
|
184
V1/general_library.cpp
Normal file
184
V1/general_library.cpp
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
// SECTION: Functions related to calculating with polynomials in Z2[x]
|
||||||
|
|
||||||
|
uint32_t irreducible_polynomials[] {
|
||||||
|
0b11, // irreducible polynomial of degree 1
|
||||||
|
0b111, // irreducible polynomial of degree 2
|
||||||
|
0b1011, // irreducible polynomial of degree 3
|
||||||
|
0b10011, // irreducible polynomial of degree 4
|
||||||
|
0b100101, // irreducible polynomial of degree 5
|
||||||
|
0b1011011, // irreducible polynomial of degree 6
|
||||||
|
0b10000011, // irreducible polynomial of degree 7
|
||||||
|
0b100011101, // irreducible polynomial of degree 8
|
||||||
|
0b1000010001, // irreducible polynomial of degree 9
|
||||||
|
0b10001101111, // irreducible polynomial of degree 10
|
||||||
|
0b100000000101, // irreducible polynomial of degree 11
|
||||||
|
0b1000011101011, // irreducible polynomial of degree 12
|
||||||
|
0b10000000011011, // irreducible polynomial of degree 13
|
||||||
|
0b100000010101001, // irreducible polynomial of degree 14
|
||||||
|
0b1000000000110101, // irreducible polynomial of degree 15
|
||||||
|
0b10000000000101101, // irreducible polynomial of degree 16
|
||||||
|
0b100000000000001001, // irreducible polynomial of degree 17
|
||||||
|
0b1000001010000000011, // irreducible polynomial of degree 18
|
||||||
|
0b10000000000000100111, // irreducible polynomial of degree 19
|
||||||
|
0b100000000011011110011, // irreducible polynomial of degree 20
|
||||||
|
0b1000000000000001100101, // irreducible polynomial of degree 21
|
||||||
|
0b10000000001111101100001, // irreducible polynomial of degree 22
|
||||||
|
0b100000000000000000100001, // irreducible polynomial of degree 23
|
||||||
|
0b1000000011110011010101001, // irreducible polynomial of degree 24
|
||||||
|
0b10000000000000000101000101, // irreducible polynomial of degree 25
|
||||||
|
0b100000000000100010111010011, // irreducible polynomial of degree 26
|
||||||
|
0b1000000000000001011010101101, // irreducible polynomial of degree 27
|
||||||
|
0b10000000000000010000011100101, // irreducible polynomial of degree 28
|
||||||
|
0b100000000000000000000000000101, // irreducible polynomial of degree 29
|
||||||
|
0b1000000000000110010100010101111, // irreducible polynomial of degree 30
|
||||||
|
0b10000000000000000000000000001001 // irreducible polynomial of degree 31
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t degree (uint32_t polynomial) {
|
||||||
|
return 31 - __builtin_clz(polynomial);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t multiply_modulo_polynomials_in_Z2 (uint32_t q1, uint32_t q2, uint32_t p) {
|
||||||
|
size_t p_degree = degree(p);
|
||||||
|
size_t q1_degree = degree(q1);
|
||||||
|
size_t q2_degree = degree(q2);
|
||||||
|
|
||||||
|
if (q1_degree > p_degree)
|
||||||
|
throw std::logic_error("We only support multiply-modulo whenever the multiplied polynomials are initially at most the same degree as the modulo polynomial.");
|
||||||
|
|
||||||
|
if (q1_degree == p_degree)
|
||||||
|
q1 ^= p;
|
||||||
|
|
||||||
|
uint32_t arr[q2_degree+1];
|
||||||
|
arr[0] = q1;
|
||||||
|
for (size_t i = 1; i <= q2_degree; i++) {
|
||||||
|
arr[i] = arr[i-1]<<1;
|
||||||
|
if (degree(arr[i]) == p_degree)
|
||||||
|
arr[i] ^= p;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t sum = 0;
|
||||||
|
|
||||||
|
std::bitset<32> b(q2);
|
||||||
|
for (size_t i = 0; i <= q2_degree; i++) {
|
||||||
|
if (b[i] == 1)
|
||||||
|
sum ^= arr[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t polynomial_power_in_Z2(uint32_t q, uint32_t exp, uint32_t p) {
|
||||||
|
if (exp == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (degree(q) == degree(p))
|
||||||
|
q ^= p;
|
||||||
|
|
||||||
|
size_t log2exp = degree(exp) + 1;
|
||||||
|
|
||||||
|
uint32_t arr[log2exp];
|
||||||
|
arr[0] = q;
|
||||||
|
for (size_t i = 1; i < log2exp; i++)
|
||||||
|
arr[i] = multiply_modulo_polynomials_in_Z2(arr[i-1], arr[i-1], p);
|
||||||
|
|
||||||
|
uint32_t res = 1;
|
||||||
|
std::bitset<32> b(exp);
|
||||||
|
for (size_t i = 0; i <= log2exp; i++) {
|
||||||
|
if (b[i] == 1)
|
||||||
|
res = multiply_modulo_polynomials_in_Z2(res, arr[i], p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void rref_Z2(std::vector<std::bitset<32>> &A, size_t m, size_t n) {
|
||||||
|
if (m > 31 || n > 32)
|
||||||
|
throw std::logic_error("We only support rref_Z2 for up to a (31x32)-matrix.");
|
||||||
|
|
||||||
|
// calculate row echelon form
|
||||||
|
// https://en.wikipedia.org/wiki/Gaussian_elimination#Pseudocode
|
||||||
|
size_t h = 0; // Initialization of the pivot row
|
||||||
|
size_t k = 0; // Initialization of the pivot column
|
||||||
|
|
||||||
|
while (h < m && k < n) {
|
||||||
|
// Find the k-th pivot:
|
||||||
|
int i_max = -1;
|
||||||
|
for (size_t i = h; i < m; i++) {
|
||||||
|
if (A[i][k]) {
|
||||||
|
i_max = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i_max == -1) {
|
||||||
|
// No pivot in this column, pass to next column
|
||||||
|
k++;
|
||||||
|
} else {
|
||||||
|
auto tmp = A[i_max];
|
||||||
|
A[i_max] = A[h];
|
||||||
|
A[h] = tmp;
|
||||||
|
|
||||||
|
// Do for all rows below pivot:
|
||||||
|
for (size_t i = h+1; i < m; i++){
|
||||||
|
bool f = A[i][k];
|
||||||
|
if (f == 0)
|
||||||
|
continue;
|
||||||
|
// Fill with zeros the lower part of pivot column:
|
||||||
|
A[i][k] = 0;
|
||||||
|
// Do for all remaining elements in current row:
|
||||||
|
for (size_t j = k+1; j<n; j++) {
|
||||||
|
A[i][j] = A[i][j] ^ A[h][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Increase pivot row and column
|
||||||
|
h++;
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform back substitution
|
||||||
|
for (size_t i = 1; i < m; i++) {
|
||||||
|
for (size_t j = 0; j < i; j++) {
|
||||||
|
if (A[j][i] == 1) {
|
||||||
|
for (size_t l = i; l < n; l++) // check this
|
||||||
|
A[j][l] = A[j][l] ^ A[i][l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t get_random_irreducible_polynomial_in_Z2 (size_t k) {
|
||||||
|
if (k > 31)
|
||||||
|
throw std::logic_error("We only support polynomials of degree 31 or less.");
|
||||||
|
|
||||||
|
uint32_t init_p = irreducible_polynomials[k-1];
|
||||||
|
|
||||||
|
srand(time(0));
|
||||||
|
uint32_t gamma = (rand() % (((uint32_t)1 << (k+1)) - 2)) + 2; // gamma in GF(2^k)\GF(2)
|
||||||
|
|
||||||
|
uint32_t gamma_pow[k+1];
|
||||||
|
for (size_t i = 0; i <= k; i++)
|
||||||
|
gamma_pow[i] = polynomial_power_in_Z2(gamma, i, init_p);
|
||||||
|
|
||||||
|
// Create an array of the polynomials in gamma_pow, and transpose it at the same time
|
||||||
|
std::vector<std::bitset<32>> A(k); // NOTE: That 32 > 31 >= k
|
||||||
|
for (size_t i = 0; i < k; i++) {
|
||||||
|
for (size_t j = 0; j <= k; j++) {
|
||||||
|
A[i][k-j] = gamma_pow[j] & ((uint32_t)1<<i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rref_Z2(A, k, k+1);
|
||||||
|
|
||||||
|
std::bitset<32> bp;
|
||||||
|
for (size_t i = 0; i < k; i++) {
|
||||||
|
bp[i] = A[i][k];
|
||||||
|
}
|
||||||
|
bp[k] = 1;
|
||||||
|
uint32_t p = (uint32_t)bp.to_ulong();
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
14
V1/general_library.hpp
Normal file
14
V1/general_library.hpp
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#ifndef GENERAL_LIBRARY_H
|
||||||
|
#define GENERAL_LIBRARY_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
#include <bitset>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
// SECTION: Functions related to calculating with polynomials in Z2[x]
|
||||||
|
uint32_t get_random_irreducible_polynomial_in_Z2 (size_t k);
|
||||||
|
|
||||||
|
#endif
|
12
V1/generate_initial_irreducible_polynomials.sage
Normal file
12
V1/generate_initial_irreducible_polynomials.sage
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
F = GF(2)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
|
||||||
|
max_k = 31
|
||||||
|
whitespace = max_k+1
|
||||||
|
|
||||||
|
for i in range(1, max_k+1):
|
||||||
|
poly_int = '0b'+ ''.join(map(str, PR.irreducible_element(i).coefficients(sparse=False)[::-1]))
|
||||||
|
poly_int += ','
|
||||||
|
poly_int += ' '*(max_k-i)
|
||||||
|
print(f' {poly_int}// irreducible polynomial of degree {i}')
|
||||||
|
|
96
V1/generate_random_irreducible_polynomial.sage
Normal file
96
V1/generate_random_irreducible_polynomial.sage
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
k = 31
|
||||||
|
F = GF(2)
|
||||||
|
E = GF(2^k)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
p = PR.irreducible_element(k)
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
gamma = E.random_element()
|
||||||
|
if gamma not in F:
|
||||||
|
break
|
||||||
|
|
||||||
|
# gamma = PR('x^3+x')
|
||||||
|
# print('0b'+ ''.join(map(str, gamma.coefficients(sparse=False)[::-1])))
|
||||||
|
|
||||||
|
eqs = [PR(gamma)^i % p for i in range(k+1)]
|
||||||
|
for eq in eqs:
|
||||||
|
print(eq)
|
||||||
|
print()
|
||||||
|
|
||||||
|
eqs = list(map(lambda eq: eq.coefficients(sparse=False) + [0]*(k-eq.degree()-1), eqs))
|
||||||
|
for eq in eqs:
|
||||||
|
print(eq)
|
||||||
|
print()
|
||||||
|
eqs = matrix(eqs[::-1])
|
||||||
|
print(eqs)
|
||||||
|
print()
|
||||||
|
eqs = eqs.transpose()
|
||||||
|
print(eqs)
|
||||||
|
print()
|
||||||
|
|
||||||
|
rref = eqs.rref()
|
||||||
|
print(rref)
|
||||||
|
irrpol = PR(rref[:,-1].list() + [1])
|
||||||
|
print(irrpol)
|
||||||
|
print(irrpol.is_irreducible())
|
||||||
|
print()
|
||||||
|
|
||||||
|
# manual rref
|
||||||
|
def calc_rref_Z2(A, m, n):
|
||||||
|
"""Calculate the reduced row echelon form of a (mxn)-matrix"""
|
||||||
|
A = copy(A)
|
||||||
|
|
||||||
|
# calculate row echelon form
|
||||||
|
# https://en.wikipedia.org/wiki/Gaussian_elimination#Pseudocode
|
||||||
|
h = 0 # Initialization of the pivot row
|
||||||
|
k = 0 # Initialization of the pivot column
|
||||||
|
|
||||||
|
while h < m and k < n:
|
||||||
|
# Find the k-th pivot:
|
||||||
|
i_max = -1
|
||||||
|
for i in range(h, m):
|
||||||
|
if A[i,k] == 1:
|
||||||
|
i_max = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if i_max == -1:
|
||||||
|
# No pivot in this column, pass to next column
|
||||||
|
k += 1
|
||||||
|
else:
|
||||||
|
tmp = A[i_max, :]
|
||||||
|
A[i_max, :] = A[h, :]
|
||||||
|
A[h, :] = tmp
|
||||||
|
|
||||||
|
|
||||||
|
# Do for all rows below pivot:
|
||||||
|
for i in range(h+1, m):
|
||||||
|
f = A[i][k];
|
||||||
|
if f == 0:
|
||||||
|
continue;
|
||||||
|
A[i, k] = 0;
|
||||||
|
for j in range(k+1, n):
|
||||||
|
A[i, j] = A[i, j] - A[h, j]
|
||||||
|
|
||||||
|
# Increase pivot row and column
|
||||||
|
h += 1
|
||||||
|
k += 1
|
||||||
|
|
||||||
|
# perform back substitution
|
||||||
|
for i in range(1, m):
|
||||||
|
for j in range(i):
|
||||||
|
if A[j, i] == 1:
|
||||||
|
A[j, :] -= A[i, :]
|
||||||
|
|
||||||
|
return A
|
||||||
|
|
||||||
|
print('Manual RREF:\n')
|
||||||
|
|
||||||
|
rref = calc_rref_Z2(eqs, k, k+1) # cols = rows+1, since we transposed the matrix
|
||||||
|
print(rref)
|
||||||
|
print()
|
||||||
|
irrpol = PR(rref[:,-1].list() + [1])
|
||||||
|
print(irrpol)
|
||||||
|
print(irrpol.is_irreducible())
|
||||||
|
print()
|
||||||
|
|
||||||
|
print('0b'+ ''.join(map(str, irrpol.coefficients(sparse=False)[::-1])))
|
68
V1/multiply_polynomials_modulo_polynomial.sage
Normal file
68
V1/multiply_polynomials_modulo_polynomial.sage
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
k = 31
|
||||||
|
F = GF(2)
|
||||||
|
var = 'x'
|
||||||
|
PR = PolynomialRing(F, var)
|
||||||
|
p = PR.irreducible_element(k)
|
||||||
|
gamma = PR.random_element(k)
|
||||||
|
|
||||||
|
exp = 10
|
||||||
|
target = gamma^exp % p
|
||||||
|
|
||||||
|
def polynomial_power_in_Z2(q, exp, p, var):
|
||||||
|
if exp == 0:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if q.degree() == p.degree():
|
||||||
|
q += p
|
||||||
|
|
||||||
|
res = q
|
||||||
|
|
||||||
|
for i in range(2, exp+1):
|
||||||
|
res = multiply_polynomials_in_Z2(res, q, p, var)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def polynomial_power_in_Z2_V2(q, exp, p, var):
|
||||||
|
if exp == 0:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if q.degree() == p.degree():
|
||||||
|
q += p
|
||||||
|
|
||||||
|
log2deg = math.floor(math.log2(exp)) + 1
|
||||||
|
arr = [0]*log2deg
|
||||||
|
arr[0] = q
|
||||||
|
for i in range(1, log2deg):
|
||||||
|
arr[i] = multiply_polynomials_in_Z2(arr[i-1], arr[i-1], p, var)
|
||||||
|
|
||||||
|
res = PR(1)
|
||||||
|
for i, b in enumerate(bin(exp)[:1:-1]):
|
||||||
|
if b == '1':
|
||||||
|
res = multiply_polynomials_in_Z2(res, arr[i], p, var)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def multiply_polynomials_in_Z2(q1, q2, p, var):
|
||||||
|
if q1.degree() > p.degree():
|
||||||
|
raise ValueError('Unsupported!')
|
||||||
|
|
||||||
|
if q1.degree() == p.degree():
|
||||||
|
q1 += p
|
||||||
|
|
||||||
|
arr = [0]*(q2.degree()+1)
|
||||||
|
arr[0] = q1
|
||||||
|
for i in range(1, q2.degree()+1):
|
||||||
|
arr[i] = arr[i-1]*PR(var)
|
||||||
|
if arr[i].degree() == p.degree():
|
||||||
|
arr[i] += p
|
||||||
|
|
||||||
|
return sum(arr[i] for i in q2.exponents())
|
||||||
|
|
||||||
|
print(target)
|
||||||
|
res = polynomial_power_in_Z2(gamma, exp, p, var)
|
||||||
|
print(res)
|
||||||
|
res = polynomial_power_in_Z2_V2(gamma, exp, p, var)
|
||||||
|
print(res)
|
BIN
V1/simple_string_matching
Executable file
BIN
V1/simple_string_matching
Executable file
Binary file not shown.
65
V1/simple_string_matching.cpp
Normal file
65
V1/simple_string_matching.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
#include "Rabin_fingerprint.hpp"
|
||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
void print_match (size_t index, size_t length, std::string &T) {
|
||||||
|
std::cout << "Match found at index " << index << " with the text \"";
|
||||||
|
for (size_t i = 0; i < length; i++)
|
||||||
|
std::cout << T[index + i];
|
||||||
|
std::cout << "\"" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
/* std::ifstream ifs("../books/the_complete_works_of_william_shakespeare.txt"); */
|
||||||
|
std::ifstream ifs("../books/genji_monogatari_english.txt");
|
||||||
|
std::string T( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
|
||||||
|
/* std::string T = "Hello, this is my test string averylongword is a necessary word to exceed the 32 bit window."; */
|
||||||
|
// Test without the modulo polynomial - and two matches
|
||||||
|
std::string P = "word";
|
||||||
|
// Test with the modulo polynomial
|
||||||
|
/* std::string P = "averylongword"; */
|
||||||
|
|
||||||
|
std::cout << "Searching for pattern:" << std::endl;
|
||||||
|
std::cout << " " << P << std::endl;
|
||||||
|
/* std::cout << "in text:" << std::endl; */
|
||||||
|
/* std::cout << " " << T << std::endl; */
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
/* uint32_t polynomial = pow(2, 30) + pow(2, 2) + 1; // x^31 + x^3 + 1 */
|
||||||
|
uint32_t polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
||||||
|
/* uint32_t polynomial = 0b11010011100100000111101011110111; */
|
||||||
|
// Test without the modulo polynomial
|
||||||
|
size_t window_size_in_bits = P.length()*8;
|
||||||
|
|
||||||
|
// Hash the pattern
|
||||||
|
Rabin_fingerprint fP(polynomial, window_size_in_bits);
|
||||||
|
for (char c : P)
|
||||||
|
fP.push_char(c);
|
||||||
|
|
||||||
|
// Hash the text
|
||||||
|
Rabin_fingerprint fT(polynomial, window_size_in_bits);
|
||||||
|
for (size_t i = 0; i < P.length(); i++)
|
||||||
|
fT.push_char(T[i]);
|
||||||
|
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||||
|
print_match(0, P.length(), T);
|
||||||
|
|
||||||
|
for (size_t i = P.length(); i < T.length(); i++) {
|
||||||
|
fT.slide_char(T[i], T[i-P.length()]);
|
||||||
|
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||||
|
print_match(i-P.length()+1, P.length(), T);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "Done!" << std::endl;
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
97
V1/test_rabin_fingerprint.sage
Normal file
97
V1/test_rabin_fingerprint.sage
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from random import randint
|
||||||
|
|
||||||
|
F = GF(2)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
k = 31
|
||||||
|
P = PR.irreducible_element(k)
|
||||||
|
m = 100
|
||||||
|
|
||||||
|
# Test the calculation for moving the left edge of the sliding window
|
||||||
|
for _ in range(1):
|
||||||
|
q = tuple(randint(0, 1) for _ in range(m))
|
||||||
|
for i in range(1, m+1):
|
||||||
|
for j in range(i+1, m+1):
|
||||||
|
assert (PR(q[:i]) % P) == (PR(q) % P) - (PR(f'x^{i}')*PR(q[i:]) % P)
|
||||||
|
|
||||||
|
# Moving the right side of the window is as described in the Rabin Fingerprint article
|
||||||
|
|
||||||
|
# Next step (TODO)
|
||||||
|
# pattern = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1)
|
||||||
|
# period = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0)
|
||||||
|
# m = len(pattern)
|
||||||
|
# s_period = tuple(list(period)+[0]*(m-len(period)))
|
||||||
|
# pattern_1 = (1)
|
||||||
|
# period_1 = (1)
|
||||||
|
# s_period_1 = tuple(list(period_1)+[0]*(m-len(period_1)))
|
||||||
|
# pattern_2 = (1,0)
|
||||||
|
# period_2 = (1,0)
|
||||||
|
# s_period_2 = tuple(list(period_2)+[0]*(m-len(period_2)))
|
||||||
|
# pattern_4 = (1,0,1,1)
|
||||||
|
# period_4 = (1,0,1)
|
||||||
|
# s_period_4 = tuple(list(period_4)+[0]*(m-len(period_4)))
|
||||||
|
# pattern_8 = (1,0,1,1,1,1,0,1)
|
||||||
|
# period_8 = (1,0,1,1,1)
|
||||||
|
# s_period_8 = tuple(list(period_8)+[0]*(m-len(period_8)))
|
||||||
|
# pattern_16 = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1)
|
||||||
|
# period_16 = (1,0,1,1,1,1,0,1,1,0,1,0)
|
||||||
|
# s_period_16 = tuple(list(period_16)+[0]*(m-len(period_16)))
|
||||||
|
|
||||||
|
# for i in range(len(pattern)-len(pattern_4)-len(period_4)):
|
||||||
|
# assert PR(pattern[i+len(period_4):i+len(pattern_4)+len(period_4)]) == PR(pattern[i:i+len(pattern_4)] + pattern[i+len(pattern_4):i+len(pattern_4)+len(period_4)]) - PR(s_period_4)
|
||||||
|
# assert PR(pattern[i+len(period_4):i+len(pattern_4)+len(period_4)]) == pattern[i-len(period_4):i+len(pattern_4)-len(period_4)]+pattern[i+len(pattern_4)-len(period_4):i+len(pattern_4)-len(period_4)] - PR(s_period_4)
|
||||||
|
|
||||||
|
# Does the bits of a prime number correspond to the coefficients of an irreducible polynomial?
|
||||||
|
k = 31
|
||||||
|
reducible = 0
|
||||||
|
irreducible = 0
|
||||||
|
for p in Primes()[200000000:200010000]:#[100000:100000]:
|
||||||
|
# for p in range(2147483648,2147483648+10000):
|
||||||
|
poly = PR(tuple(bin(p)[2:][::-1]))
|
||||||
|
# print(f'{poly.degree()}')
|
||||||
|
if poly.degree() > k:
|
||||||
|
break
|
||||||
|
if poly.degree() != k:
|
||||||
|
continue
|
||||||
|
irr = poly.is_irreducible()
|
||||||
|
# print(f'The polynomial {poly}, corresponding to the prime {p}, being an irreducible polynomial is {irr}.')
|
||||||
|
if irr:
|
||||||
|
irreducible += 1
|
||||||
|
else:
|
||||||
|
reducible += 1
|
||||||
|
|
||||||
|
print(f'{irreducible}/{reducible+irreducible} of the primes correspond to irreducible polynomials. That is {float(irreducible/(reducible+irreducible))*100}%')
|
||||||
|
|
||||||
|
progress = 0
|
||||||
|
the_same = 0
|
||||||
|
different = 0
|
||||||
|
for p in Primes()[200000000:200010000]:
|
||||||
|
progress += 1
|
||||||
|
print(f'\r{progress}/10000', end='')
|
||||||
|
p_poly = PR(tuple(bin(p)[2:][::-1]))
|
||||||
|
if not p_poly.is_irreducible():
|
||||||
|
continue
|
||||||
|
# i = p + randint(1,2**29)
|
||||||
|
i = p + randint(1,p-1)
|
||||||
|
# print(f'{i}, {p}, {2*p}, {i%p}')
|
||||||
|
i_poly = PR(tuple(bin(i)[2:][::-1]))
|
||||||
|
i_mod_p = i % p
|
||||||
|
i_mod_p_poly = PR(tuple(bin(i_mod_p)[2:][::-1]))
|
||||||
|
# print(f'i_poly: {i_poly}')
|
||||||
|
# print(f'p_poly: {p_poly}')
|
||||||
|
# print(f'i_mod_p_poly: {i_mod_p_poly}')
|
||||||
|
if i_mod_p_poly == i_poly % p_poly:
|
||||||
|
the_same += 1
|
||||||
|
else:
|
||||||
|
different += 1
|
||||||
|
# if not i_mod_p_poly == i_poly - (p_poly - PR(f'x^{p_poly.degree()}')):
|
||||||
|
# if not i_mod_p_poly == i_poly % p_poly:
|
||||||
|
# print()
|
||||||
|
# print('ERROR')
|
||||||
|
# print(i_mod_p_poly)
|
||||||
|
# # print(i_poly - (p_poly - PR(f'x^{p_poly.degree()}')))
|
||||||
|
# print(i_poly % p_poly)
|
||||||
|
# break
|
||||||
|
# assert i_mod_p_poly == i_poly - (p_poly - PR(f'x^{p_poly.degree()}'))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f'{the_same}/{the_same+different} are the same as if we did polynomial calculations. This is {float(the_same/(the_same+different))*100}%')
|
4097
books/genji_monogatari_english.txt
Normal file
4097
books/genji_monogatari_english.txt
Normal file
File diff suppressed because it is too large
Load Diff
166902
books/the_complete_works_of_william_shakespeare.txt
Normal file
166902
books/the_complete_works_of_william_shakespeare.txt
Normal file
File diff suppressed because it is too large
Load Diff
73
compare_fingerprint_false_positive_probabilities.sage
Normal file
73
compare_fingerprint_false_positive_probabilities.sage
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# Rabin fingerprint: k > lg(n*m/e), e is the upper bound on the error probability
|
||||||
|
# Karp-Rabin fingerprint: <= pi(n(n-m+1))/pi(M)
|
||||||
|
|
||||||
|
# 32-bit
|
||||||
|
M = 2**32-1
|
||||||
|
k = 31
|
||||||
|
|
||||||
|
# 16-bit
|
||||||
|
# M = 2**16-1
|
||||||
|
# k = 13
|
||||||
|
|
||||||
|
def fpi(a):
|
||||||
|
p = Primes()
|
||||||
|
a_next = p.next(a)
|
||||||
|
i = 0
|
||||||
|
jump = 1
|
||||||
|
lastdirection = -2
|
||||||
|
while(True):
|
||||||
|
print(f'{jump:010d}', end='\r')
|
||||||
|
# print(f'{i}, {jump}')
|
||||||
|
val = p.unrank(i)
|
||||||
|
if val == a_next:
|
||||||
|
return i # i-1, is the value we search for, but we count from 0, so i is correct
|
||||||
|
elif val > a_next:
|
||||||
|
if lastdirection == 2:
|
||||||
|
# print('mul')
|
||||||
|
lastdirection -= 1
|
||||||
|
jump *= 2
|
||||||
|
else:
|
||||||
|
# print('div')
|
||||||
|
jump //= 2
|
||||||
|
lastdirection += 1
|
||||||
|
# print(jump)
|
||||||
|
i -= jump
|
||||||
|
else:
|
||||||
|
if lastdirection == -2:
|
||||||
|
# print('mul')
|
||||||
|
lastdirection += 1
|
||||||
|
jump *= 2
|
||||||
|
else:
|
||||||
|
# print('div')
|
||||||
|
jump //= 2
|
||||||
|
lastdirection -= 1
|
||||||
|
i += jump
|
||||||
|
|
||||||
|
# Rabin fingerprint
|
||||||
|
# k = lg(n*m/e) <=>
|
||||||
|
# 2^k = n*m/e <=>
|
||||||
|
# 2^k/(n*m) = 1/e <=>
|
||||||
|
# n*m/(2^k) = e
|
||||||
|
|
||||||
|
# Karp-Rabin fingerprint
|
||||||
|
# e = pi(n(n-m+1))/pi(M)
|
||||||
|
|
||||||
|
# print(pi(2**32-1))
|
||||||
|
|
||||||
|
# e_rabin = n*m/(2^k)
|
||||||
|
|
||||||
|
piM = fpi(M)
|
||||||
|
for m in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
|
||||||
|
for n in [10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000]:
|
||||||
|
if m>=n:
|
||||||
|
continue
|
||||||
|
print(f'(n, m) = ({n}, {m})')
|
||||||
|
e_rabin = n*m/(2**k)
|
||||||
|
print(f'Rabin: {float(e_rabin):.2e}')
|
||||||
|
e_karpr = fpi(n*(n-m+1))/piM
|
||||||
|
print(f'KarpR: {float(e_karpr):.2e}')
|
||||||
|
print()
|
||||||
|
print(f'{e_karpr} >= 1 is {e_karpr >= 1} | {e_rabin} >= 1 is {e_rabin >= 1}')
|
||||||
|
if float(e_karpr) >= 1 or float(e_rabin) >= 1:
|
||||||
|
break
|
||||||
|
|
175
ex53.cpp
Normal file
175
ex53.cpp
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
|
||||||
|
#include "hash_table.hpp"
|
||||||
|
#include "hashing_algorithms.hpp"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <bitset>
|
||||||
|
#include <fstream>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
const int max_string_length = 256;
|
||||||
|
const int D = (max_string_length+7) >> 3; // D = ceil(max_string_length/size(char))
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress);
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
const clock_t clock_before_seed = clock();
|
||||||
|
// load seed generated by random.org
|
||||||
|
string filename = "random_org_16_bit_numbers.txt";
|
||||||
|
std::ifstream seed_file(filename);
|
||||||
|
uint16_t seed_part;
|
||||||
|
uint16_t seed_parts[D << 2];
|
||||||
|
size_t count = 0;
|
||||||
|
while (seed_file >> seed_part) {
|
||||||
|
seed_parts[count] = seed_part;
|
||||||
|
count++;
|
||||||
|
if (count == D << 2)
|
||||||
|
break; // the seed is large enough for the max_string_length
|
||||||
|
}
|
||||||
|
if (count != D << 2) {
|
||||||
|
cout << "The current seed is not large enough. Please extend it by appending the numbers at https://www.random.org/integers/?num=10000&min=0&max=65535&col=1&base=10&format=plain&rnd=new." << endl;
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
// Get 64 bit seed from 16 bit seed
|
||||||
|
uint64_t seed[D];
|
||||||
|
memcpy(seed, seed_parts, D*sizeof(seed[0]));
|
||||||
|
|
||||||
|
const clock_t clock_before_hash_table = clock();
|
||||||
|
|
||||||
|
size_t l = 1; // initially, hash to a table of up to 1024 distinct words (2^10)
|
||||||
|
Hash_table ht = Hash_table(new Hash_function(seed, multiply_shift_string, l, D));
|
||||||
|
|
||||||
|
const clock_t clock_before_loading_book = clock();
|
||||||
|
|
||||||
|
// choose a book
|
||||||
|
/* std::ifstream ifs("genji_monogatari_english.txt"); */
|
||||||
|
/* std::ifstream ifs("Child_of_Light.txt"); */
|
||||||
|
/* std::ifstream ifs("the_adventures_of_sherlock_holmes.txt"); */
|
||||||
|
/* std::ifstream ifs("dracula.txt"); */
|
||||||
|
std::ifstream ifs("the_complete_works_of_william_shakespeare.txt");
|
||||||
|
|
||||||
|
string book_string( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
const char * book = book_string.c_str();
|
||||||
|
const size_t book_length = book_string.size();
|
||||||
|
|
||||||
|
const clock_t clock_before_reading = clock();
|
||||||
|
|
||||||
|
std::vector<My_string *> words;
|
||||||
|
|
||||||
|
size_t reading_progress = 0;
|
||||||
|
while (reading_progress < book_length-1) { // book_length includes '\0' which reading_progress avoids
|
||||||
|
My_string * word = read_word(book, &reading_progress);
|
||||||
|
if (word->size > 0)
|
||||||
|
words.push_back(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_reading = clock();
|
||||||
|
|
||||||
|
size_t count_words = 0;
|
||||||
|
for (My_string * word : words) {
|
||||||
|
count_words++;
|
||||||
|
ht.hash(word);
|
||||||
|
if (ht.is_time_for_rehash()) {
|
||||||
|
l++; // double the universe which we hashes to
|
||||||
|
ht.rehash(new Hash_function(seed, multiply_shift_string, l, D));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing = clock();
|
||||||
|
|
||||||
|
Hash_function hf = Hash_function(seed, multiply_shift_string, 10, D);
|
||||||
|
|
||||||
|
const clock_t clock_after_init_hash_function = clock();
|
||||||
|
|
||||||
|
uint64_t hashed_value = 0;
|
||||||
|
|
||||||
|
for (My_string * word : words) {
|
||||||
|
/* hf.hash(word); */
|
||||||
|
hashed_value += hf.hash(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing_only = clock();
|
||||||
|
|
||||||
|
cout << "Sum of the hashed values (after overflow): " << hashed_value << endl;
|
||||||
|
|
||||||
|
cout << "Nr of words: " << count_words << endl;
|
||||||
|
cout << "Distinct words: " << ht.get_distict_words() << endl;
|
||||||
|
|
||||||
|
cout << "Time: Load seed: " << float( clock_before_hash_table - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash table: " << float( clock_before_loading_book - clock_before_hash_table ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Load book: " << float( clock_before_reading - clock_before_loading_book ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Read: " << float( clock_after_reading - clock_before_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash & Table: " << float( clock_after_hashing - clock_after_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Total: " << float( clock_after_hashing - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash function: " << float( clock_after_init_hash_function - clock_after_hashing ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash, no table: " << float( clock_after_hashing_only - clock_after_init_hash_function ) / CLOCKS_PER_SEC << endl;
|
||||||
|
|
||||||
|
for (My_string * word : words)
|
||||||
|
delete word;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// BONUS: Polynomium hashing
|
||||||
|
uint32_t result_un;
|
||||||
|
size_t book_as_integer_length = (book_length+3)>>2; // ceil(book_length/4)
|
||||||
|
uint32_t book_as_integer[book_as_integer_length];
|
||||||
|
book_as_integer[book_as_integer_length-1] = 0;
|
||||||
|
memcpy(book_as_integer, book, book_length*sizeof(book[0]));
|
||||||
|
|
||||||
|
uint32_t a[3] = {12313,2212312,123332};
|
||||||
|
uint32_t b[3] = {5345,213213,123123};
|
||||||
|
uint32_t c[3] = {3231,213144,450022};
|
||||||
|
|
||||||
|
const clock_t begin_time_pol = clock();
|
||||||
|
result_un = polynomial_vector(book_as_integer, a, b, c, book_as_integer_length >> 2, 32);
|
||||||
|
const clock_t end_time_pol = clock();
|
||||||
|
cout << "polynomium hash function time: " << float( end_time_pol - begin_time_pol ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result_un << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time_pol_tun = clock();
|
||||||
|
result_un = polynomial_vector_tuned(book_as_integer, a, b, c, book_as_integer_length >> 2, 32, seed);
|
||||||
|
const clock_t end_time_pol_tun = clock();
|
||||||
|
cout << "tuned polynomium hash function time: " << float( end_time_pol_tun - begin_time_pol_tun ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result_un << endl;
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress) {
|
||||||
|
bool word_started = false;
|
||||||
|
char word[max_string_length];
|
||||||
|
size_t word_length = 0;
|
||||||
|
char c;
|
||||||
|
while (book[*reading_progress+1] != '\0') {
|
||||||
|
(*reading_progress)++;
|
||||||
|
c = book[*reading_progress];
|
||||||
|
if (word_started) {
|
||||||
|
if (isalnum(c)) {
|
||||||
|
if (word_length != max_string_length) { // crop words longer than the max_string_length
|
||||||
|
word[word_length] = tolower(c);
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
||||||
|
} else if (isalnum(c)) {
|
||||||
|
word_started = true;
|
||||||
|
word[word_length] = c;
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
48
fagprojekt_code/Makefile
Normal file
48
fagprojekt_code/Makefile
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
CC = cc # C compiler
|
||||||
|
# cc and gcc is the same
|
||||||
|
CXX = c++ # C++ compiler
|
||||||
|
# c++
|
||||||
|
# g++
|
||||||
|
# clang++
|
||||||
|
CPPFLAGS = # preprocessor flags
|
||||||
|
CFLAGS = -Wall -Wfloat-equal -std=c99 -O0 -mtune=native -funroll-loops $(OPT) # C compiler flags
|
||||||
|
# -Wall: Warnings all
|
||||||
|
# -Wfloat-equal: Warn if comparing floats with ==
|
||||||
|
# -OX: Optimization level -O0 (none, default), -O1, -O2, -O3 (most, may increase file size), -Os (decrease file size), -ffast-math
|
||||||
|
# -mtune: Tune for specific/generic CPU
|
||||||
|
# -funrool-loops: Unroll loops when compiling
|
||||||
|
# -fopenmp: Enable OpenMP (When running, set the environmental variable OMP_NUM_THREADS)
|
||||||
|
# $(OPT): Passes any parameter given when calling make with 'make OPT=...'
|
||||||
|
CXXFLAGS = -Wall -std=c++17 -O0 # C++ compiler flags
|
||||||
|
LDFLAGS = # linker flags
|
||||||
|
LDLIBS = # library flags
|
||||||
|
# -lm (for <math.h>)
|
||||||
|
# -lblas (BASIC LINEAR ALGEBRA SUBPROGRAMS)
|
||||||
|
# -lcblas (for <cblas.h>)
|
||||||
|
# -lopenblas (NOT INSTALLED; blas and cblas in one; conflicts with blas and cblas)
|
||||||
|
# -llapack (LAPACKE)
|
||||||
|
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
||||||
|
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
||||||
|
|
||||||
|
# What to do when making ex1.c
|
||||||
|
#ex1: datasize1.o
|
||||||
|
|
||||||
|
# What to do when making ex1.cpp (C++ style)
|
||||||
|
#ex1: datasize1.cpp
|
||||||
|
|
||||||
|
ex53: hashed_string.cpp hash_table.cpp hashing_algorithms.cpp
|
||||||
|
|
||||||
|
eq17: hashed_string.cpp hash_table.cpp hashing_algorithms.cpp
|
||||||
|
|
||||||
|
# Tell the compiler that 'clean' isn't referring to a file
|
||||||
|
#.PHONY: clean
|
||||||
|
# A make target that cleans (by deleting files)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) fast_modulo *.o
|
||||||
|
$(RM) eq17 *.o
|
||||||
|
$(RM) ex53 *.o
|
||||||
|
$(RM) hashed_string *.o
|
||||||
|
$(RM) hash_table *.o
|
||||||
|
|
||||||
|
fast_modulo:
|
57
fagprojekt_code/ascii_to_bits.cpp
Normal file
57
fagprojekt_code/ascii_to_bits.cpp
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
/* #include <chrono> */
|
||||||
|
#include <iostream>
|
||||||
|
/* #include <stdio.h> */
|
||||||
|
/* #include <cmath> */
|
||||||
|
/* #include <string.h> */
|
||||||
|
/* #include <stdint.h> */
|
||||||
|
#include <bitset>
|
||||||
|
#include <fstream>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
|
||||||
|
string s = "test";
|
||||||
|
|
||||||
|
const char * c = s.c_str();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < s.size(); i++) {
|
||||||
|
cout << std::bitset<__CHAR_BIT__>( c[i] ) << " ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
std::ifstream ifs("genji_monogatari_english.txt");
|
||||||
|
string genji( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
|
||||||
|
const char * c2 = genji.c_str();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 1000; i++) {
|
||||||
|
/* cout << std::bitset<__CHAR_BIT__>( c2[i] ) << " "; */
|
||||||
|
cout << c2[i];
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
/* cout << genji.size() << endl; */
|
||||||
|
|
||||||
|
/* unsigned char bitgenji[genji.size()]; */
|
||||||
|
|
||||||
|
/* for (size_t i = 0; i < genji.size(); ++i) */
|
||||||
|
/* { */
|
||||||
|
/* bitgenji[i] = static_cast<unsigned char>( bitset<8>(genji[i]).to_ulong() ); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* for (size_t i = 0; i < 4; ++i) */
|
||||||
|
/* { */
|
||||||
|
/* cout << (bitset<8>) bitgenji[i] << endl; */
|
||||||
|
/* cout << (unsigned int) bitgenji[i] << endl; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* for (size_t i = 0; i < 100; ++i) */
|
||||||
|
/* { */
|
||||||
|
/* cout << bitgenji[i]; */
|
||||||
|
/* } */
|
||||||
|
/* cout << endl; */
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
138
fagprojekt_code/eq17.cpp
Normal file
138
fagprojekt_code/eq17.cpp
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
#include "hashing_algorithms.hpp"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
/* #include <stdio.h> */
|
||||||
|
#include <cmath>
|
||||||
|
/* #include <string.h> */
|
||||||
|
/* #include <stdint.h> */
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
unsigned int M = 9299983;
|
||||||
|
unsigned int m = 3226097;
|
||||||
|
unsigned int result;
|
||||||
|
|
||||||
|
const clock_t begin_time1 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y++) {
|
||||||
|
result = (unsigned int) ((float)(y*m)/(float)M);
|
||||||
|
}
|
||||||
|
const clock_t end_time1 = clock();
|
||||||
|
cout << "Naive time: " << float( end_time1 - begin_time1 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time2 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y += 10) {
|
||||||
|
result = (unsigned int) ((float)(y*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+1)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+2)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+3)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+4)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+5)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+6)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+7)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+8)*m)/(float)M);
|
||||||
|
result = (unsigned int) ((float)((y+9)*m)/(float)M);
|
||||||
|
}
|
||||||
|
const clock_t end_time2 = clock();
|
||||||
|
cout << "Naive time (unrolled): " << float( end_time2 - begin_time2 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time6 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y++) {
|
||||||
|
result = y*m/M;
|
||||||
|
}
|
||||||
|
const clock_t end_time6 = clock();
|
||||||
|
cout << "Naive time (no-typecasting): " << float( end_time6 - begin_time6 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time7 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y += 10) {
|
||||||
|
result = y*m/M;
|
||||||
|
result = ((y+1)*m)/M;
|
||||||
|
result = ((y+2)*m)/M;
|
||||||
|
result = ((y+3)*m)/M;
|
||||||
|
result = ((y+4)*m)/M;
|
||||||
|
result = ((y+5)*m)/M;
|
||||||
|
result = ((y+6)*m)/M;
|
||||||
|
result = ((y+7)*m)/M;
|
||||||
|
result = ((y+8)*m)/M;
|
||||||
|
result = ((y+9)*m)/M;
|
||||||
|
}
|
||||||
|
const clock_t end_time7 = clock();
|
||||||
|
cout << "Naive time (no-typecasting, unrolled): " << float( end_time7 - begin_time7 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
float mM = (float) m/(float) M;
|
||||||
|
const clock_t begin_time3 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y ++) {
|
||||||
|
result = (unsigned int) (((float) y)*mM);
|
||||||
|
}
|
||||||
|
const clock_t end_time3 = clock();
|
||||||
|
cout << "Precalculation time: " << float( end_time3 - begin_time3 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time4 = clock();
|
||||||
|
for (unsigned int y = 0; y < M; y += 10) {
|
||||||
|
result = (unsigned int) ((float) y*mM);
|
||||||
|
result = (unsigned int) ((float) (y+1)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+2)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+3)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+4)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+5)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+6)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+7)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+8)*mM);
|
||||||
|
result = (unsigned int) ((float) (y+9)*mM);
|
||||||
|
}
|
||||||
|
const clock_t end_time4 = clock();
|
||||||
|
cout << "Precalculation time (unrolled): " << float( end_time4 - begin_time4 ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result << endl;
|
||||||
|
|
||||||
|
/* for (unsigned int y = 0; y < M; y++){ */
|
||||||
|
/* // Very often, differentiating by multiplying first or dividing first results in the result being off by 1 from each other */
|
||||||
|
/* /1* if (abs((int) ((float)y*((float)m/(float)M)) - (int) ((float) y*mM)) >= 1) *1/ */
|
||||||
|
/* /1* cout <<(unsigned int) ((float)y*(float)m/(float)M) << " != " << (unsigned int) ((float) y*mM) << endl; *1/ */
|
||||||
|
/* /1* if (abs((int)(y*m/M - (y*m)/M)) >= 1) *1/ */
|
||||||
|
/* /1* cout <<y*m/M << " != " << (y*m)/M << endl; *1/ */
|
||||||
|
/* /1* if (abs((int)(((float)y*((float)m/(float)M)) - (y*m)/M)) >= 1) *1/ */
|
||||||
|
/* /1* cout <<(unsigned int)((float)y*((float)m/(float)M)) << " != " << (y*m)/M << endl; *1/ */
|
||||||
|
/* if (abs((int)(((float)y*(float)m)/(float)M - (unsigned int)y*(unsigned int)m/(unsigned int)M)) >= 1) */
|
||||||
|
/* cout <<((float)(y*m)/(float)M << " != " << (unsigned int)y*(unsigned int)m/(unsigned int)M << endl; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
// The non-precalculation model is susceptible to overflow
|
||||||
|
/* for (unsigned int y = 0; y < 40; y++) { */
|
||||||
|
/* cout << "y: " << y << ", "; */
|
||||||
|
/* cout << "m: " << m << ", "; */
|
||||||
|
/* cout << "M: " << M << ", "; */
|
||||||
|
/* cout << "mM: " << mM << ", "; */
|
||||||
|
/* cout << "y*m: " << y*m << ", "; */
|
||||||
|
/* cout << "(y*m)/M: " << (y*m)/M << ", "; */
|
||||||
|
/* cout << "y*(m/M): " << y*(m/M) << ", "; */
|
||||||
|
/* cout << y*m/M; */
|
||||||
|
/* cout << " != "; */
|
||||||
|
/* cout << (unsigned int) ((float)(y*m)/(float)M); */
|
||||||
|
/* cout << " != "; */
|
||||||
|
/* cout << (unsigned int) ((float)y*(float)m/(float)M); */
|
||||||
|
/* cout << " != "; */
|
||||||
|
/* cout << (unsigned int) ((float) y*mM); */
|
||||||
|
/* cout << endl; */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
uint64_t result_un;
|
||||||
|
uint64_t p = pow(2, 61)-1;
|
||||||
|
uint64_t m_un = pow(2, 20)+2;
|
||||||
|
|
||||||
|
const clock_t begin_time_un = clock();
|
||||||
|
for (uint64_t y = 0; y < 100000000; y++) {
|
||||||
|
/* result_un = multiply_mod_prime_mersenne_overflow(y, m, M, p, m_un); */
|
||||||
|
result_un = multiply_shift_strongly_universal(y, p, m_un, 20);
|
||||||
|
}
|
||||||
|
const clock_t end_time_un = clock();
|
||||||
|
cout << "Unrelated hash function time: " << float( end_time_un - begin_time_un ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result_un << endl;
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
175
fagprojekt_code/ex53.cpp
Normal file
175
fagprojekt_code/ex53.cpp
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
|
||||||
|
#include "hash_table.hpp"
|
||||||
|
#include "hashing_algorithms.hpp"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <bitset>
|
||||||
|
#include <fstream>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
const int max_string_length = 256;
|
||||||
|
const int D = (max_string_length+7) >> 3; // D = ceil(max_string_length/size(char))
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress);
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
const clock_t clock_before_seed = clock();
|
||||||
|
// load seed generated by random.org
|
||||||
|
string filename = "random_org_16_bit_numbers.txt";
|
||||||
|
std::ifstream seed_file(filename);
|
||||||
|
uint16_t seed_part;
|
||||||
|
uint16_t seed_parts[D << 2];
|
||||||
|
size_t count = 0;
|
||||||
|
while (seed_file >> seed_part) {
|
||||||
|
seed_parts[count] = seed_part;
|
||||||
|
count++;
|
||||||
|
if (count == D << 2)
|
||||||
|
break; // the seed is large enough for the max_string_length
|
||||||
|
}
|
||||||
|
if (count != D << 2) {
|
||||||
|
cout << "The current seed is not large enough. Please extend it by appending the numbers at https://www.random.org/integers/?num=10000&min=0&max=65535&col=1&base=10&format=plain&rnd=new." << endl;
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
// Get 64 bit seed from 16 bit seed
|
||||||
|
uint64_t seed[D];
|
||||||
|
memcpy(seed, seed_parts, D*sizeof(seed[0]));
|
||||||
|
|
||||||
|
const clock_t clock_before_hash_table = clock();
|
||||||
|
|
||||||
|
size_t l = 1; // initially, hash to a table of up to 1024 distinct words (2^10)
|
||||||
|
Hash_table ht = Hash_table(new Hash_function(seed, multiply_shift_string, l, D));
|
||||||
|
|
||||||
|
const clock_t clock_before_loading_book = clock();
|
||||||
|
|
||||||
|
// choose a book
|
||||||
|
/* std::ifstream ifs("genji_monogatari_english.txt"); */
|
||||||
|
/* std::ifstream ifs("Child_of_Light.txt"); */
|
||||||
|
/* std::ifstream ifs("the_adventures_of_sherlock_holmes.txt"); */
|
||||||
|
/* std::ifstream ifs("dracula.txt"); */
|
||||||
|
std::ifstream ifs("the_complete_works_of_william_shakespeare.txt");
|
||||||
|
|
||||||
|
string book_string( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
const char * book = book_string.c_str();
|
||||||
|
const size_t book_length = book_string.size();
|
||||||
|
|
||||||
|
const clock_t clock_before_reading = clock();
|
||||||
|
|
||||||
|
std::vector<My_string *> words;
|
||||||
|
|
||||||
|
size_t reading_progress = 0;
|
||||||
|
while (reading_progress < book_length-1) { // book_length includes '\0' which reading_progress avoids
|
||||||
|
My_string * word = read_word(book, &reading_progress);
|
||||||
|
if (word->size > 0)
|
||||||
|
words.push_back(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_reading = clock();
|
||||||
|
|
||||||
|
size_t count_words = 0;
|
||||||
|
for (My_string * word : words) {
|
||||||
|
count_words++;
|
||||||
|
ht.hash(word);
|
||||||
|
if (ht.is_time_for_rehash()) {
|
||||||
|
l++; // double the universe which we hashes to
|
||||||
|
ht.rehash(new Hash_function(seed, multiply_shift_string, l, D));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing = clock();
|
||||||
|
|
||||||
|
Hash_function hf = Hash_function(seed, multiply_shift_string, 10, D);
|
||||||
|
|
||||||
|
const clock_t clock_after_init_hash_function = clock();
|
||||||
|
|
||||||
|
uint64_t hashed_value = 0;
|
||||||
|
|
||||||
|
for (My_string * word : words) {
|
||||||
|
/* hf.hash(word); */
|
||||||
|
hashed_value += hf.hash(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing_only = clock();
|
||||||
|
|
||||||
|
cout << "Sum of the hashed values (after overflow): " << hashed_value << endl;
|
||||||
|
|
||||||
|
cout << "Nr of words: " << count_words << endl;
|
||||||
|
cout << "Distinct words: " << ht.get_distict_words() << endl;
|
||||||
|
|
||||||
|
cout << "Time: Load seed: " << float( clock_before_hash_table - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash table: " << float( clock_before_loading_book - clock_before_hash_table ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Load book: " << float( clock_before_reading - clock_before_loading_book ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Read: " << float( clock_after_reading - clock_before_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash & Table: " << float( clock_after_hashing - clock_after_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Total: " << float( clock_after_hashing - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash function: " << float( clock_after_init_hash_function - clock_after_hashing ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash, no table: " << float( clock_after_hashing_only - clock_after_init_hash_function ) / CLOCKS_PER_SEC << endl;
|
||||||
|
|
||||||
|
for (My_string * word : words)
|
||||||
|
delete word;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// BONUS: Polynomium hashing
|
||||||
|
uint32_t result_un;
|
||||||
|
size_t book_as_integer_length = (book_length+3)>>2; // ceil(book_length/4)
|
||||||
|
uint32_t book_as_integer[book_as_integer_length];
|
||||||
|
book_as_integer[book_as_integer_length-1] = 0;
|
||||||
|
memcpy(book_as_integer, book, book_length*sizeof(book[0]));
|
||||||
|
|
||||||
|
uint32_t a[3] = {12313,2212312,123332};
|
||||||
|
uint32_t b[3] = {5345,213213,123123};
|
||||||
|
uint32_t c[3] = {3231,213144,450022};
|
||||||
|
|
||||||
|
const clock_t begin_time_pol = clock();
|
||||||
|
result_un = polynomial_vector(book_as_integer, a, b, c, book_as_integer_length >> 2, 32);
|
||||||
|
const clock_t end_time_pol = clock();
|
||||||
|
cout << "polynomium hash function time: " << float( end_time_pol - begin_time_pol ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result_un << endl;
|
||||||
|
|
||||||
|
const clock_t begin_time_pol_tun = clock();
|
||||||
|
result_un = polynomial_vector_tuned(book_as_integer, a, b, c, book_as_integer_length >> 2, 32, seed);
|
||||||
|
const clock_t end_time_pol_tun = clock();
|
||||||
|
cout << "tuned polynomium hash function time: " << float( end_time_pol_tun - begin_time_pol_tun ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Result: " << result_un << endl;
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress) {
|
||||||
|
bool word_started = false;
|
||||||
|
char word[max_string_length];
|
||||||
|
size_t word_length = 0;
|
||||||
|
char c;
|
||||||
|
while (book[*reading_progress+1] != '\0') {
|
||||||
|
(*reading_progress)++;
|
||||||
|
c = book[*reading_progress];
|
||||||
|
if (word_started) {
|
||||||
|
if (isalnum(c)) {
|
||||||
|
if (word_length != max_string_length) { // crop words longer than the max_string_length
|
||||||
|
word[word_length] = tolower(c);
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
||||||
|
} else if (isalnum(c)) {
|
||||||
|
word_started = true;
|
||||||
|
word[word_length] = c;
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
202
fagprojekt_code/fast_modulo.cpp
Normal file
202
fagprojekt_code/fast_modulo.cpp
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void printIntInBinary(unsigned int n);
|
||||||
|
void intToBinary(unsigned int n, char* p);
|
||||||
|
void strrev(char* p, unsigned int len);
|
||||||
|
unsigned int mod32BitMersenne(unsigned int x, unsigned int m);
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
|
||||||
|
unsigned int q = 2;
|
||||||
|
unsigned int x = pow(2, 2*q) - 1; // pow(2, 2*q) is the max value for x
|
||||||
|
unsigned int p = pow(2, q) - 1;
|
||||||
|
unsigned int y;
|
||||||
|
|
||||||
|
y=(x&p)+(x>>q);
|
||||||
|
if (y>=p) y-=p;
|
||||||
|
|
||||||
|
printf("q = %d, x = %d, p = %d, y = %d\n", q, x, p, y);
|
||||||
|
printf("x&p = %d, x>>q = %d\n", x&p, x>>q);
|
||||||
|
|
||||||
|
printIntInBinary(x);
|
||||||
|
printIntInBinary(p);
|
||||||
|
printIntInBinary(x&p);
|
||||||
|
printIntInBinary(x>>q);
|
||||||
|
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printIntInBinary( ((unsigned int)0b1111101111011101101<<26)>>26 );
|
||||||
|
printIntInBinary( ((unsigned int)0b1111101111011101101<<27)>>27 );
|
||||||
|
printIntInBinary( ((unsigned int)0B1111101111011101101<<28)>>28 );
|
||||||
|
printIntInBinary( ((unsigned int)0B1111101111011101101<<29)>>29 );
|
||||||
|
printIntInBinary( ((unsigned int)0B1111101111011101101<<30)>>30 );
|
||||||
|
|
||||||
|
unsigned int a = 0b0110000000;
|
||||||
|
unsigned int b = 0b1111111001;
|
||||||
|
printf("\n");
|
||||||
|
printIntInBinary(a);
|
||||||
|
printIntInBinary(b);
|
||||||
|
b = (b & ~0b110) | ((a & 0b110000000) >> 6);
|
||||||
|
printIntInBinary(a);
|
||||||
|
printIntInBinary(b);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* // Too general an approach */
|
||||||
|
/* // TODO: Special case where ax+b < p < m */
|
||||||
|
/* // ((ax+b) mod p) mod m, p=2^89-1, m=2^20 */
|
||||||
|
/* size_t r = 89; // aka q */
|
||||||
|
/* size_t bits = 32; */
|
||||||
|
/* size_t remainingBits = r; */
|
||||||
|
/* size_t lastIndex = 0; */
|
||||||
|
/* size_t len = 4; */
|
||||||
|
/* uint32_t data[len], tmp[len]; */
|
||||||
|
|
||||||
|
/* // mod p, p = 2^89-1 */
|
||||||
|
/* // x&p */
|
||||||
|
/* for (size_t i = 0; (i+1)*bits < r; i++ ) { */
|
||||||
|
/* tmp[i] = data[i]&0b1111'1111'1111'1111'1111'1111'1111'1111; // 32 ones. Standard since C++14 */
|
||||||
|
/* remainingBits -= 32; */
|
||||||
|
/* lastIndex = i+1; */
|
||||||
|
/* } */
|
||||||
|
/* size_t shiftLen = bits-remainingBits; // TODO: Check for 1-off errors */
|
||||||
|
/* tmp[lastIndex] = (data[lastIndex]<<shiftLen)>>shiftLen; // mod 2^remainingBits */
|
||||||
|
/* // x>>q */
|
||||||
|
/* /1* for (size_t i = 0; i < len; i++) { *1/ */
|
||||||
|
|
||||||
|
/* /1* } *1/ */
|
||||||
|
|
||||||
|
/* // mod m, m = 2^20 */
|
||||||
|
/* unsigned int mMinusOne = 0b1111'1111'1111'1111'1111; // 20 ones. Standard since C++14 */
|
||||||
|
/* data[len-1] = data[len-1]&mMinusOne; */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* // Can be optimized further */
|
||||||
|
/* size_t len = 4; // input parameter */
|
||||||
|
/* uint32_t data[len], tmp[len]; */
|
||||||
|
|
||||||
|
/* //// mod p, p = 2^89-1 */
|
||||||
|
/* // x&p */
|
||||||
|
/* tmp[len] = data[len]&0b1111'1111'1111'1111'1111'1111'1111'1111; // 32 ones. */
|
||||||
|
/* tmp[len] = data[len]&0b1111'1111'1111'1111'1111'1111'1111'1111; // 32 ones. */
|
||||||
|
/* tmp[len] = data[len]&0b1'1111'1111'1111'1111'1111'1111; // remaining bits = 89-32*2 = 25 ones. */
|
||||||
|
|
||||||
|
/* // x>>q */
|
||||||
|
|
||||||
|
/* //// mod m, m = 2^20 */
|
||||||
|
/* unsigned int mMinusOne = 0b1111'1111'1111'1111'1111; // 20 ones. */
|
||||||
|
/* data[len-1] = data[len-1]&mMinusOne; */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* // Optimize more! */
|
||||||
|
/* size_t len = 4; // input parameter */
|
||||||
|
/* uint32_t data[len]; // input parameter */
|
||||||
|
/* uint32_t tmp; */
|
||||||
|
|
||||||
|
/* //// mod p, p = 2^89-1 */
|
||||||
|
/* // x&p */
|
||||||
|
/* tmp = data[len-1]&0b1111'1111'1111'1111'1111; // 20 ones */
|
||||||
|
/* // x>>q */
|
||||||
|
/* // 89 - 2*32 = 25 so */
|
||||||
|
/* tmp = tmp + (data[len]>>5)&0b1111'1111'1111'1111'1111; // 20 ones */
|
||||||
|
|
||||||
|
/* //// mod m, m = 2^20 */
|
||||||
|
/* tmp = tmp&0b1111'1111'1111'1111'1111; // 20 ones = m - 1 */
|
||||||
|
|
||||||
|
/* // clean up before output */
|
||||||
|
/* for (size_t i = 0; i < len; i++) */
|
||||||
|
/* data[i] = 0; */
|
||||||
|
/* // output */
|
||||||
|
/* data[len-1] = tmp; */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* // More!!! */
|
||||||
|
/* size_t len = 4; // input parameter */
|
||||||
|
/* uint32_t data[len]; // input parameter */
|
||||||
|
|
||||||
|
/* //// mod p, p = 2^89-1 */
|
||||||
|
/* // x&p */
|
||||||
|
/* data[len-1] = data[len-1]&0b1111'1111'1111'1111'1111; // 20 ones */
|
||||||
|
/* // x>>q */
|
||||||
|
/* // 89 - 2*32 = 25 so */
|
||||||
|
/* data[len-1] = data[len-1] + ((data[len-3]>>5)&0b1111'1111'1111'1111'1111); // 20 ones */
|
||||||
|
|
||||||
|
/* //// mod m, m = 2^20 */
|
||||||
|
/* data[len-1] = data[len-1]&0b1111'1111'1111'1111'1111; // 20 ones = m - 1 */
|
||||||
|
|
||||||
|
/* // data[len-1] is the result */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
size_t len = 4; // input parameter
|
||||||
|
uint32_t data[len]; // input parameter
|
||||||
|
data[len-1] = (data[len-1] + (data[len-3]>>5))&0b1111'1111'1111'1111'1111; // 20 ones
|
||||||
|
// data[len-1] is the result
|
||||||
|
|
||||||
|
// Or as a function
|
||||||
|
// void mod89mod20 (uint32_t * data, size_t len) {
|
||||||
|
// data[len-1] = (data[len-1] + (data[len-3]>>5))&0b1111'1111'1111'1111'1111; // 20 ones
|
||||||
|
// }
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T mod (T x, T p) {
|
||||||
|
while (x >= p)
|
||||||
|
x -= p;
|
||||||
|
return x;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void printIntInBinary(unsigned int n) {
|
||||||
|
char b[100];
|
||||||
|
intToBinary(n, b);
|
||||||
|
printf("%5u = %10s\n", n, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
void intToBinary(unsigned int n, char* p) {
|
||||||
|
if (NULL == p)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char* q = p;
|
||||||
|
|
||||||
|
int count = 0;
|
||||||
|
do {
|
||||||
|
if (n % 2 == 0)
|
||||||
|
*p = '0';
|
||||||
|
else
|
||||||
|
*p = '1';
|
||||||
|
|
||||||
|
p++;
|
||||||
|
|
||||||
|
n = n>>1;
|
||||||
|
count++;
|
||||||
|
} while (n > 0);
|
||||||
|
*p = '\0';
|
||||||
|
strrev(q, count);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void strrev(char* p, unsigned int len) {
|
||||||
|
char tmp;
|
||||||
|
int i, j = 0;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
j = len - 1;
|
||||||
|
|
||||||
|
while (i < j) {
|
||||||
|
tmp = p[i];
|
||||||
|
p[i] = p[j];
|
||||||
|
p[j] = tmp;
|
||||||
|
i++;
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
}
|
56
fagprojekt_code/hash_table.cpp
Normal file
56
fagprojekt_code/hash_table.cpp
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#include "hash_table.hpp"
|
||||||
|
#include "hashed_string.hpp"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
Hash_table::Hash_table(Hash_function * hash_function) : hash_function(hash_function) {
|
||||||
|
m = hash_function->get_m();
|
||||||
|
|
||||||
|
words = new Hashed_string[m];
|
||||||
|
}
|
||||||
|
|
||||||
|
Hash_table::~Hash_table() {
|
||||||
|
delete hash_function;
|
||||||
|
delete[] words;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Hash_table::get_distict_words() {
|
||||||
|
return distinct_words;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hash_table::hash(My_string * word) {
|
||||||
|
uint64_t hash = hash_function->hash(word);
|
||||||
|
distinct_words += words[hash].append(new Hashed_string(word));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hash_table::hash(Hashed_string * hs) {
|
||||||
|
uint64_t hash = hash_function->hash( &(hs->string) );
|
||||||
|
distinct_words += words[hash].append(hs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hash_table::rehash(Hash_function * hash_function) {
|
||||||
|
delete this->hash_function;
|
||||||
|
this->hash_function = hash_function;
|
||||||
|
|
||||||
|
size_t old_m = m;
|
||||||
|
m = this->hash_function->get_m();
|
||||||
|
|
||||||
|
Hashed_string * old_words = this->words;
|
||||||
|
|
||||||
|
words = new Hashed_string[m];
|
||||||
|
|
||||||
|
distinct_words = 0; // reset, because we are hashing all the words once more
|
||||||
|
|
||||||
|
for (size_t i = 0; i < old_m; i++) {
|
||||||
|
Hashed_string * hs = &(old_words[i]);
|
||||||
|
// skip the initializer Hashed_string
|
||||||
|
while (hs->next != NULL) {
|
||||||
|
hs = hs->next;
|
||||||
|
hash(new Hashed_string(hs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete the old stuff
|
||||||
|
delete[] old_words;
|
||||||
|
}
|
56
fagprojekt_code/hash_table.hpp
Normal file
56
fagprojekt_code/hash_table.hpp
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#ifndef HASH_TABLE_H
|
||||||
|
#define HASH_TABLE_H
|
||||||
|
|
||||||
|
#include "hashed_string.hpp"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
struct Hash_function {
|
||||||
|
private:
|
||||||
|
uint64_t * seed;
|
||||||
|
uint64_t(* hash_function)(const My_string *, const uint64_t *, uint64_t *, size_t);
|
||||||
|
size_t l;
|
||||||
|
uint64_t * x;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Hash_function(uint64_t * seed, uint64_t(* hash_function)(const My_string *, const uint64_t *, uint64_t *, size_t), size_t l, size_t D) {
|
||||||
|
this->seed = seed;
|
||||||
|
this->hash_function = hash_function;
|
||||||
|
this->l = l;
|
||||||
|
this->x = new uint64_t[D];
|
||||||
|
}
|
||||||
|
|
||||||
|
~Hash_function() {
|
||||||
|
delete[] x;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_m() {
|
||||||
|
return 1 << l;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t hash(const My_string * string) {
|
||||||
|
return hash_function(string, seed, x, l);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Hash_table {
|
||||||
|
private:
|
||||||
|
Hash_function * hash_function;
|
||||||
|
Hashed_string * words;
|
||||||
|
size_t distinct_words = 0;
|
||||||
|
size_t m;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Hash_table(Hash_function * hash_function);
|
||||||
|
~Hash_table();
|
||||||
|
|
||||||
|
size_t get_distict_words();
|
||||||
|
|
||||||
|
void hash(My_string * word);
|
||||||
|
void hash(Hashed_string * hs);
|
||||||
|
|
||||||
|
void rehash(Hash_function * hash_function);
|
||||||
|
int is_time_for_rehash() {return distinct_words >= m;}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
78
fagprojekt_code/hashed_string.cpp
Normal file
78
fagprojekt_code/hashed_string.cpp
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#include "hashed_string.hpp"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
#include <string.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
My_string::My_string(char * chars, size_t size) {
|
||||||
|
this->size = size;
|
||||||
|
this->chars = new char[size];
|
||||||
|
if (size != 0) {
|
||||||
|
memcpy(this->chars, chars, size*sizeof(this->chars[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
My_string::~My_string() {
|
||||||
|
delete[] chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool My_string::operator==(const My_string& other)
|
||||||
|
{
|
||||||
|
if (size != other.size)
|
||||||
|
return false;
|
||||||
|
return strncmp(chars, other.chars, size) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Hashed_string::Hashed_string() : string(My_string(NULL, 0)) {
|
||||||
|
initializer = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Hashed_string::Hashed_string(My_string * s) : string(My_string(s->chars, s->size)) {};
|
||||||
|
|
||||||
|
Hashed_string::Hashed_string(Hashed_string * hs) : string(My_string(hs->string.chars, hs->string.size)), appearances(hs->appearances) {};
|
||||||
|
|
||||||
|
Hashed_string::~Hashed_string() {
|
||||||
|
// if you delete the initializer, delete the whole chain
|
||||||
|
if (initializer) {
|
||||||
|
while (next != NULL)
|
||||||
|
next->remove(next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int Hashed_string::append(Hashed_string * new_string) {
|
||||||
|
if (!initializer) {
|
||||||
|
if (string == new_string->string) {
|
||||||
|
appearances += new_string->appearances;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last_element) {
|
||||||
|
new_string->previous = this;
|
||||||
|
next = new_string;
|
||||||
|
last_element = false;
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return next->append(new_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hashed_string::remove(Hashed_string * removed_string) {
|
||||||
|
if (initializer && last_element)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (string == removed_string->string) {
|
||||||
|
if (last_element) {
|
||||||
|
previous->last_element = true;
|
||||||
|
previous->next = NULL;
|
||||||
|
} else {
|
||||||
|
previous->next = next;
|
||||||
|
next->previous = previous;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete this;
|
||||||
|
} else if (!last_element)
|
||||||
|
next->remove(removed_string);
|
||||||
|
}
|
34
fagprojekt_code/hashed_string.hpp
Normal file
34
fagprojekt_code/hashed_string.hpp
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#ifndef HASHED_OBJECT_H
|
||||||
|
#define HASHED_OBJECT_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct My_string {
|
||||||
|
char * chars;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
My_string(char *, size_t);
|
||||||
|
~My_string();
|
||||||
|
|
||||||
|
bool operator==(const My_string& other);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Hashed_string {
|
||||||
|
Hashed_string * next = NULL;
|
||||||
|
Hashed_string * previous = NULL;
|
||||||
|
bool last_element = true;
|
||||||
|
bool initializer = false; // signal the object to act as the "link" between the its reference and the linked list of hashed elemenets. This instance DOES NOT represent a hashed value.
|
||||||
|
|
||||||
|
My_string string;
|
||||||
|
unsigned int appearances = 1;
|
||||||
|
|
||||||
|
Hashed_string();
|
||||||
|
Hashed_string(My_string * s);
|
||||||
|
Hashed_string(Hashed_string * hs);
|
||||||
|
~Hashed_string();
|
||||||
|
|
||||||
|
int append(Hashed_string * new_string); // returns 1 if the word was new and 0 otherwise.
|
||||||
|
void remove(Hashed_string * removed_string);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
294
fagprojekt_code/hashing_algorithms.cpp
Normal file
294
fagprojekt_code/hashing_algorithms.cpp
Normal file
@ -0,0 +1,294 @@
|
|||||||
|
#include "hashing_algorithms.hpp"
|
||||||
|
|
||||||
|
#include "hashed_string.hpp"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
|
// p is prime. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m) {
|
||||||
|
return ((a*x+b) % p) % m;
|
||||||
|
}
|
||||||
|
|
||||||
|
// p is a Mersenne prime. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m) {
|
||||||
|
uint64_t y = a*x+b;
|
||||||
|
y = (y&p)+(y>>p);
|
||||||
|
if (y>=p) y-=p;
|
||||||
|
return y % m;
|
||||||
|
}
|
||||||
|
|
||||||
|
// p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne_overflow(uint64_t x, uint64_t a, uint64_t b, uint64_t p, char q) {
|
||||||
|
uint64_t y = a*x+b;
|
||||||
|
y = (y&p)+(y>>p);
|
||||||
|
if (y>=p) y-=p;
|
||||||
|
return y & ~( ( ~(uint64_t)0 ) << q); // OBS: Behaviour undefined for shifting n-bit integers n times
|
||||||
|
}
|
||||||
|
|
||||||
|
// p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne_overflow_no_b(uint64_t x, uint64_t a, uint64_t p, char q) {
|
||||||
|
uint64_t y = a*x;
|
||||||
|
y = (y&p)+(y>>p);
|
||||||
|
if (y>=p) y-=p;
|
||||||
|
return y & ~( ( ~(uint64_t)0 ) << q); // OBS: Behaviour undefined for shifting n-bit integers n times
|
||||||
|
}
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime. m=2^l. 32 >= l > 0. p > a > 0, p > b >= 0. x is an 64 bit integer. We assume x, a, and b are arrays of 32 bit integers.
|
||||||
|
uint32_t multiply_mod_prime_mersenne_overflow_high_bitcount(uint32_t * x, uint32_t * a, uint32_t * b, char l) {
|
||||||
|
// x is a 64 bit integer given as a 2 long array of 32 bit integers
|
||||||
|
// a is a 89 bit integer given as a 3 long array of 32 bit integers
|
||||||
|
// b is a 89 bit integer given as a 3 long array of 32 bit integers
|
||||||
|
|
||||||
|
// ax: array to hold the sub-calculations of a*x
|
||||||
|
// there are 2*3 sub-calculations with each result being split into the least significant 32 bits and the most significant 32 bits
|
||||||
|
uint32_t ax[12]; // 12 = 2*3*2
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 2; i++) { // index x
|
||||||
|
for (size_t ii = 0; ii < 3; ii++) { // index a
|
||||||
|
uint64_t tmp = (uint64_t)x[i] * (uint64_t)a[ii];
|
||||||
|
ax[6*i+2*ii] = (uint32_t)(tmp & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
ax[6*i+2*ii+1] = (uint32_t)(tmp >> 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate y = ax+b
|
||||||
|
uint32_t * y = new uint32_t[5];
|
||||||
|
uint64_t sum = 0;
|
||||||
|
uint32_t carry = 0;
|
||||||
|
|
||||||
|
// calculate bits 0-32
|
||||||
|
sum = (uint64_t)ax[10]+(uint64_t)b[2];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[4] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// calculate bits 33-64
|
||||||
|
sum = (uint64_t)carry + (uint64_t)ax[11] + (uint64_t)ax[8] + (uint64_t)ax[4] + (uint64_t)b[1];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[3] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// calculate bits 65-96
|
||||||
|
sum = (uint64_t)carry + (uint64_t)ax[9] + (uint64_t)ax[5] + (uint64_t)ax[2] + (uint64_t)ax[6] + (uint64_t)b[0];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[2] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// calculate bits 97-128
|
||||||
|
sum = (uint64_t)carry + (uint64_t)ax[3] + (uint64_t)ax[7] + (uint64_t)ax[0];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[1] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// calculate bits 129-160
|
||||||
|
y[0] = ax[1];
|
||||||
|
|
||||||
|
//// calculate modulo p for p=2^89-1
|
||||||
|
// y&p
|
||||||
|
// we take the 89 first bits
|
||||||
|
uint32_t yandp[3];
|
||||||
|
yandp[2] = y[4];
|
||||||
|
yandp[1] = y[3];
|
||||||
|
yandp[0] = (y[2] & 0b11111'11111'11111'11111'11111); // 25 ones
|
||||||
|
|
||||||
|
// y>>q
|
||||||
|
// we bitshift 89 times, so we only keep the 5*32 - 89 = 71 most significant bits
|
||||||
|
uint32_t yshiftq[3];
|
||||||
|
yshiftq[2] = (y[2] >> 25) // keep 7 bits
|
||||||
|
| (y[1] << 7); // keep 25 bits
|
||||||
|
yshiftq[1] = (y[1] >> 25) | (y[0] << 7);
|
||||||
|
yshiftq[0] = y[0] >> 25;
|
||||||
|
|
||||||
|
// y = (y&p) + (y >> q)
|
||||||
|
// bits 0-32
|
||||||
|
sum = (uint64_t)yandp[2] + (uint64_t)yshiftq[2];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[4] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// bits 33-64
|
||||||
|
sum = (uint64_t)carry + (uint64_t)yandp[1] + (uint64_t)yshiftq[1];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[3] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// bits 65-71
|
||||||
|
y[2] = (uint64_t)carry + (uint64_t)yandp[0] + (uint64_t)yshiftq[0];
|
||||||
|
y[1] = 0;
|
||||||
|
// y[0] = 0;
|
||||||
|
|
||||||
|
//// if y >= p; y -= p
|
||||||
|
// y >= p if bits 90 to 96 are != 0 (actually if bit 90 is set, but this is prettier)
|
||||||
|
if ((y[2] >> 25) != 0) {
|
||||||
|
// subtracting 2^89-1 is equal to subtracting 2^89 and adding 1.
|
||||||
|
// - 2^89
|
||||||
|
y[2] = y[2] & 0b1'1111'1111'1111'1111'1111'1111; // 25 ones. We know that bit 91 to 96 are 0
|
||||||
|
// + 1
|
||||||
|
for (size_t i = 4; i > 1; i--) {
|
||||||
|
y[i] += 1;
|
||||||
|
if (y[i] != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mod 2^l (mod m)
|
||||||
|
return (y[4] >> (32-l));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t multiply_shift_c_universal(uint32_t x, uint64_t a, char l) {
|
||||||
|
return (a*x) >> (64-l);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t multiply_shift_strongly_universal(uint32_t x, uint64_t a, uint64_t b, char l) {
|
||||||
|
return (a*x+b) >> (64-l);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t multiply_shift_vector(uint32_t * x, uint64_t * seed, size_t d, char l) {
|
||||||
|
uint64_t val = 0;
|
||||||
|
for (size_t i = 0; i < d; i++)
|
||||||
|
val += seed[i]*x[i];
|
||||||
|
return (val + seed[d-1]) >> (64-l);
|
||||||
|
}
|
||||||
|
|
||||||
|
// requires x to be of size D and the seed to be of size D
|
||||||
|
uint64_t multiply_shift_string(const My_string * string, const uint64_t * seed, uint64_t * x, size_t l) {
|
||||||
|
size_t d = (string->size+7) >> 3; // d = ceil(string->size/8)
|
||||||
|
|
||||||
|
x[d-1] = 0;
|
||||||
|
memcpy(x, string->chars, string->size*sizeof(string->chars[0]));
|
||||||
|
|
||||||
|
uint64_t val = 0;
|
||||||
|
for (size_t i = 0; i < d; i++)
|
||||||
|
val += (seed[2*i]+(uint32_t)(x[i]>>32))*(seed[2*i+1]+(uint32_t)x[i]);
|
||||||
|
return (val + seed[d]) >> (64-l);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate (ax+b) mod p
|
||||||
|
// p=2^89-1 is a Mersenne prime. p > a. p > x. a and x are size 3 arrays of uint32_t.
|
||||||
|
// b is a 64 bit integer given as a size 2 array of uint32_t.
|
||||||
|
// The result is saved in x.
|
||||||
|
void high_bitcount_ax_b_mod_p(uint32_t * x, uint32_t * a, uint32_t * b) {
|
||||||
|
// x, a, and b are 89 bit integers given as a 3 long arrays of 32 bit integers.
|
||||||
|
|
||||||
|
// ax: array to hold the sub-calculations of a*x
|
||||||
|
// there are 3*3 sub-calculations with each result being split into the least significant 32 bits and the most significant 32 bits
|
||||||
|
size_t ax_size = 18; // 18 = 3*3*2
|
||||||
|
uint32_t ax[ax_size];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 3; i++) { // index x
|
||||||
|
for (size_t ii = 0; ii < 3; ii++) { // index a
|
||||||
|
uint64_t tmp = (uint64_t)x[i] * (uint64_t)a[ii];
|
||||||
|
ax[6*i+2*ii] = (uint32_t)(tmp & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
ax[6*i+2*ii+1] = (uint32_t)(tmp >> 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate y = ax+b
|
||||||
|
size_t y_size = 6; // 6 = ceil( (89+89+1)/32 )
|
||||||
|
uint32_t * y = new uint32_t[y_size];
|
||||||
|
uint64_t sum = 0;
|
||||||
|
uint32_t carry = 0;
|
||||||
|
y[0] = ax[0]+b[0];
|
||||||
|
for (size_t index = 1; index < y_size-1; index++) {
|
||||||
|
sum = 0;
|
||||||
|
for (size_t i = 0; i <= index; i++) {
|
||||||
|
size_t ii = index - i;
|
||||||
|
sum += ax[6*i+2*ii];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < index; i++) {
|
||||||
|
size_t ii = index - i - 1;
|
||||||
|
sum += ax[6*i+2*ii+1];
|
||||||
|
}
|
||||||
|
if (index < 2) {
|
||||||
|
sum += b[index];
|
||||||
|
}
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[index] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
}
|
||||||
|
y[y_size-1] = carry+ax[ax_size-1];
|
||||||
|
|
||||||
|
//// calculate modulo p for p=2^89-1
|
||||||
|
// y&p
|
||||||
|
// we take the 89 first bits
|
||||||
|
uint32_t yandp[3];
|
||||||
|
yandp[0] = y[0];
|
||||||
|
yandp[1] = y[1];
|
||||||
|
yandp[2] = (y[2] & 0b11111'11111'11111'11111'11111); // 25 ones
|
||||||
|
|
||||||
|
// y>>q
|
||||||
|
// we bitshift 89 times, and keep the 89 following bits
|
||||||
|
uint32_t yshiftq[3];
|
||||||
|
yshiftq[0] = (y[2] >> 25) // keep 7 bits
|
||||||
|
| (y[3] << 7); // keep 25 bits
|
||||||
|
yshiftq[1] = (y[3] >> 25) | (y[4] << 7);
|
||||||
|
yshiftq[2] = (y[4] >> 25) // keep 7 bits
|
||||||
|
| (y[5] << 7); // keep 25 bits, but only 18 of them can be nonzero in practice.
|
||||||
|
|
||||||
|
// y = (y&p) + (y >> q)
|
||||||
|
// bits 0-32
|
||||||
|
sum = (uint64_t)yandp[0] + (uint64_t)yshiftq[0];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[0] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// bits 33-64
|
||||||
|
sum = (uint64_t)carry + (uint64_t)yandp[1] + (uint64_t)yshiftq[1];
|
||||||
|
carry = (uint32_t)(sum >> 32);
|
||||||
|
y[1] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones
|
||||||
|
// bits 65-71
|
||||||
|
y[2] = (uint64_t)carry + (uint64_t)yandp[2] + (uint64_t)yshiftq[2];
|
||||||
|
// y[3] = 0; // we don't use it anyway
|
||||||
|
// y[4] = 0;
|
||||||
|
// y[5] = 0;
|
||||||
|
|
||||||
|
//// if y >= p; y -= p
|
||||||
|
// y >= p if bits 90 to 96 are != 0 (actually if bit 90 is set, but this is prettier)
|
||||||
|
if ((y[2] << 25) != 0) {
|
||||||
|
// subtracting 2^89-1 is equal to subtracting 2^89 and adding 1.
|
||||||
|
// - 2^89
|
||||||
|
y[2] = y[2] & 0b1'1111'1111'1111'1111'1111'1111; // 25 ones. We know that bit 91 to 96 are 0
|
||||||
|
// + 1
|
||||||
|
for (size_t i = 0; i < 3; i++) {
|
||||||
|
y[i] += 1;
|
||||||
|
if (y[i] != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
x[0] = y[0];
|
||||||
|
x[1] = y[1];
|
||||||
|
x[2] = y[2];
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime.
|
||||||
|
// p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers.
|
||||||
|
// We assume a, b, and c are size 3 arrays of 32 bit integers.
|
||||||
|
uint32_t polynomial_vector(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l) {
|
||||||
|
uint32_t H[3];
|
||||||
|
H[0] = x[0];
|
||||||
|
H[1] = x[1];
|
||||||
|
|
||||||
|
for (size_t i = 1; i < d; i++) {
|
||||||
|
high_bitcount_ax_b_mod_p(H, c, x+2*i);
|
||||||
|
}
|
||||||
|
high_bitcount_ax_b_mod_p(H, a, b);
|
||||||
|
|
||||||
|
return H[0] >> (32-l);
|
||||||
|
}
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime.
|
||||||
|
// p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers.
|
||||||
|
// We assume a, b, and c are size 3 arrays of 32 bit integers.
|
||||||
|
// we assume the seed to be of size 4 (at minimum).
|
||||||
|
uint32_t polynomial_vector_tuned(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l, const uint64_t * seed) {
|
||||||
|
size_t x_remainder = d - ((d >> 2) << 2); // abuse integer division and multiplication (via bitshifts) as modulo
|
||||||
|
size_t x_tuned_size = (d >> 2) + 1;
|
||||||
|
uint32_t x_tuned[x_tuned_size];
|
||||||
|
|
||||||
|
// prehash chunks of x using the bounded string algorithm
|
||||||
|
uint64_t buffer_memory[4];
|
||||||
|
char word[256];
|
||||||
|
for (size_t i = 0; i <= (d >> 4) - 1; i++ ) { // d 64 bit integers -> d/4 256 char strings
|
||||||
|
memcpy(word, x + i*4, 256*sizeof(word[0]));
|
||||||
|
My_string str = My_string(word, 256);
|
||||||
|
x_tuned[i] = (uint32_t) multiply_shift_string(&str, seed, buffer_memory, 32);
|
||||||
|
}
|
||||||
|
// prehash the leftovers
|
||||||
|
if (x_remainder != 0) {
|
||||||
|
memcpy(word, x + d-x_remainder, (x_remainder << 3)*sizeof(word[0]));
|
||||||
|
My_string str = My_string(word, (x_remainder << 3));
|
||||||
|
x_tuned[x_tuned_size-1] = (uint32_t) multiply_shift_string(&str, seed, buffer_memory, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
return polynomial_vector(x_tuned, a, b, c, x_tuned_size, l);
|
||||||
|
}
|
42
fagprojekt_code/hashing_algorithms.hpp
Normal file
42
fagprojekt_code/hashing_algorithms.hpp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef HASHING_ALGORITHMS_H
|
||||||
|
#define HASHING_ALGORITHMS_H
|
||||||
|
|
||||||
|
#include "hashed_string.hpp"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// p is prime. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m);
|
||||||
|
|
||||||
|
// p is a Mersenne prime. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m);
|
||||||
|
|
||||||
|
// p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0, p > b >= 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne_overflow(uint64_t x, uint64_t a, uint64_t b, uint64_t p, char q);
|
||||||
|
|
||||||
|
// p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0
|
||||||
|
uint64_t multiply_mod_prime_mersenne_overflow_no_b(uint64_t x, uint64_t a, uint64_t p, char q);
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime. m=2^l. 32 >= l > 0. p > a > 0, p > b >= 0. x is an 64 bit integer. We assume x, a, and b are arrays of 32 bit integers.
|
||||||
|
uint32_t multiply_mod_prime_mersenne_overflow_high_bitcount(uint32_t * x, uint32_t * a, uint32_t * b, char l);
|
||||||
|
|
||||||
|
uint64_t multiply_shift_c_universal(uint32_t x, uint64_t a, char l);
|
||||||
|
|
||||||
|
uint64_t multiply_shift_strongly_universal(uint32_t x, uint64_t a, uint64_t b, char l);
|
||||||
|
|
||||||
|
uint64_t multiply_shift_vector(uint32_t * x, uint64_t * seed, size_t d, char l);
|
||||||
|
|
||||||
|
// requires x to be of size D and the seed to be of size D
|
||||||
|
uint64_t multiply_shift_string(const My_string * string, const uint64_t * seed, uint64_t * x, size_t l);
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime.
|
||||||
|
// p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers.
|
||||||
|
// We assume a, b, and c are size 3 arrays of 32 bit integers.
|
||||||
|
uint32_t polynomial_vector(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l);
|
||||||
|
|
||||||
|
// p=2^89-1 is a Mersenne prime.
|
||||||
|
// p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers.
|
||||||
|
// We assume a, b, and c are size 3 arrays of 32 bit integers.
|
||||||
|
// we assume the seed to be of size 4 (at minimum).
|
||||||
|
uint32_t polynomial_vector_tuned(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l, const uint64_t * seed);
|
||||||
|
#endif
|
152
fagprojekt_code/hashing_speed_comparisons.cpp
Normal file
152
fagprojekt_code/hashing_speed_comparisons.cpp
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
|
||||||
|
#include "hash_table.hpp"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
|
#include <bitset>
|
||||||
|
#include <fstream>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
const int max_string_length = 256;
|
||||||
|
const int D = (max_string_length+7) >> 3; // D = ceil(max_string_length/size(char))
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress);
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void speedtest_using_hash_table(string title, uint64_t(* algorithm)(const My_string *, const uint64_t *, uint64_t *, size_t), string seed_file) {
|
||||||
|
const clock_t clock_before_seed = clock();
|
||||||
|
// load seed generated by random.org
|
||||||
|
string filename = "random_org_16_bit_numbers.txt";
|
||||||
|
std::ifstream seed_file(filename);
|
||||||
|
uint16_t seed_part;
|
||||||
|
uint16_t seed_parts[D << 2];
|
||||||
|
size_t count = 0;
|
||||||
|
while (seed_file >> seed_part) {
|
||||||
|
seed_parts[count] = seed_part;
|
||||||
|
count++;
|
||||||
|
if (count == D << 2)
|
||||||
|
break; // the seed is large enough for the max_string_length
|
||||||
|
}
|
||||||
|
if (count != D << 2) {
|
||||||
|
cout << "The current seed is not large enough. Please extend it by appending the numbers at https://www.random.org/integers/?num=10000&min=0&max=65535&col=1&base=10&format=plain&rnd=new." << endl;
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
// Get 64 bit seed from 16 bit seed
|
||||||
|
uint64_t seed[D];
|
||||||
|
memcpy(seed, seed_parts, D*sizeof(seed[0]));
|
||||||
|
|
||||||
|
const clock_t clock_before_hash_table = clock();
|
||||||
|
|
||||||
|
size_t l = 1; // initially, hash to a table of up to 1024 distinct words (2^10)
|
||||||
|
Hash_table ht = Hash_table(new Hash_function(seed, hash_string, l, D));
|
||||||
|
|
||||||
|
const clock_t clock_before_loading_book = clock();
|
||||||
|
|
||||||
|
// choose a book
|
||||||
|
/* std::ifstream ifs("genji_monogatari_english.txt"); */
|
||||||
|
/* std::ifstream ifs("Child_of_Light.txt"); */
|
||||||
|
/* std::ifstream ifs("the_adventures_of_sherlock_holmes.txt"); */
|
||||||
|
/* std::ifstream ifs("dracula.txt"); */
|
||||||
|
std::ifstream ifs("the_complete_works_of_william_shakespeare.txt");
|
||||||
|
|
||||||
|
string book_string( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
const char * book = book_string.c_str();
|
||||||
|
const size_t book_length = book_string.size();
|
||||||
|
|
||||||
|
const clock_t clock_before_reading = clock();
|
||||||
|
|
||||||
|
std::vector<My_string *> words;
|
||||||
|
|
||||||
|
size_t reading_progress = 0;
|
||||||
|
while (reading_progress < book_length-1) { // book_length includes '\0' which reading_progress avoids
|
||||||
|
My_string * word = read_word(book, &reading_progress);
|
||||||
|
if (word->size > 0)
|
||||||
|
words.push_back(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_reading = clock();
|
||||||
|
|
||||||
|
size_t count_words = 0;
|
||||||
|
for (My_string * word : words) {
|
||||||
|
count_words++;
|
||||||
|
ht.hash(word);
|
||||||
|
if (ht.is_time_for_rehash()) {
|
||||||
|
l++; // double the universe which we hashes to
|
||||||
|
ht.rehash(new Hash_function(seed, hash_string, l, D));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing = clock();
|
||||||
|
|
||||||
|
Hash_function hf = Hash_function(seed, hash_string, 10, D);
|
||||||
|
|
||||||
|
const clock_t clock_after_init_hash_function = clock();
|
||||||
|
|
||||||
|
uint64_t hashed_value = 0;
|
||||||
|
|
||||||
|
for (My_string * word : words) {
|
||||||
|
/* hf.hash(word); */
|
||||||
|
hashed_value += hf.hash(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
const clock_t clock_after_hashing_only = clock();
|
||||||
|
|
||||||
|
cout << "Sum of the hashed values (after overflow): " << hashed_value << endl;
|
||||||
|
|
||||||
|
cout << "Nr of words: " << count_words << endl;
|
||||||
|
cout << "Distinct words: " << ht.get_distict_words() << endl;
|
||||||
|
|
||||||
|
cout << "Time: Load seed: " << float( clock_before_hash_table - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash table: " << float( clock_before_loading_book - clock_before_hash_table ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Load book: " << float( clock_before_reading - clock_before_loading_book ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Read: " << float( clock_after_reading - clock_before_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash & Table: " << float( clock_after_hashing - clock_after_reading ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Total: " << float( clock_after_hashing - clock_before_seed ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Init hash function: " << float( clock_after_init_hash_function - clock_after_hashing ) / CLOCKS_PER_SEC << endl;
|
||||||
|
cout << "Time: Hash, no table: " << float( clock_after_hashing_only - clock_after_init_hash_function ) / CLOCKS_PER_SEC << endl;
|
||||||
|
|
||||||
|
for (My_string * word : words)
|
||||||
|
delete word;
|
||||||
|
}
|
||||||
|
|
||||||
|
My_string * read_word(const char * book, size_t * reading_progress) {
|
||||||
|
bool word_started = false;
|
||||||
|
char word[max_string_length];
|
||||||
|
size_t word_length = 0;
|
||||||
|
char c;
|
||||||
|
while (book[*reading_progress+1] != '\0') {
|
||||||
|
(*reading_progress)++;
|
||||||
|
c = book[*reading_progress];
|
||||||
|
if (word_started) {
|
||||||
|
if (isalnum(c)) {
|
||||||
|
if (word_length != max_string_length) { // crop words longer than the max_string_length
|
||||||
|
word[word_length] = tolower(c);
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
||||||
|
} else if (isalnum(c)) {
|
||||||
|
word_started = true;
|
||||||
|
word[word_length] = c;
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new My_string(word, word_length);
|
||||||
|
}
|
245
general_library.cpp
Normal file
245
general_library.cpp
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
// https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Modular_integers
|
||||||
|
uint32_t multiplicative_inverse(int a, int n) {
|
||||||
|
int t = 0, newt = 1, r = n, newr = a;
|
||||||
|
while (newr != 0) {
|
||||||
|
int quotient = r/newr;
|
||||||
|
|
||||||
|
int tmp = newt;
|
||||||
|
newt = t-quotient*newt;
|
||||||
|
t = tmp;
|
||||||
|
|
||||||
|
tmp = newr;
|
||||||
|
newr = r-quotient*newr;
|
||||||
|
r = tmp;
|
||||||
|
}
|
||||||
|
if (r > 1)
|
||||||
|
throw std::runtime_error("Tried to inverse a noninvertible element!");
|
||||||
|
if (t < 0)
|
||||||
|
t = t + n;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/18620942/find-the-smallest-period-of-input-string-in-on
|
||||||
|
std::vector<int> calculateLPS(const char * pat, int m) {
|
||||||
|
/* int[] lps = new int[pat.length()]; */
|
||||||
|
int len = 0;
|
||||||
|
int i = 1;
|
||||||
|
std::vector<int> lps = {0};
|
||||||
|
lps.resize(m);
|
||||||
|
|
||||||
|
while (i < m) {
|
||||||
|
if (pat[i] == pat[len]) {
|
||||||
|
len++;
|
||||||
|
lps[i] = len;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (len != 0) {
|
||||||
|
len = lps[len - 1];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
lps[i] = len;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lps;
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculates the length of the shortest period
|
||||||
|
int len_of_shortest_period (const char * pattern, int m) {
|
||||||
|
std::vector<int> lps = calculateLPS(pattern, m);
|
||||||
|
//start at the end of the string
|
||||||
|
int i = lps.size()-1;
|
||||||
|
while (lps[i] != 0) {
|
||||||
|
//shift back
|
||||||
|
i -= lps[i];
|
||||||
|
}
|
||||||
|
return i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// SECTION: Functions related to calculating with polynomials in Z2[x]
|
||||||
|
|
||||||
|
uint32_t irreducible_polynomials[] {
|
||||||
|
0b11, // irreducible polynomial of degree 1
|
||||||
|
0b111, // irreducible polynomial of degree 2
|
||||||
|
0b1011, // irreducible polynomial of degree 3
|
||||||
|
0b10011, // irreducible polynomial of degree 4
|
||||||
|
0b100101, // irreducible polynomial of degree 5
|
||||||
|
0b1011011, // irreducible polynomial of degree 6
|
||||||
|
0b10000011, // irreducible polynomial of degree 7
|
||||||
|
0b100011101, // irreducible polynomial of degree 8
|
||||||
|
0b1000010001, // irreducible polynomial of degree 9
|
||||||
|
0b10001101111, // irreducible polynomial of degree 10
|
||||||
|
0b100000000101, // irreducible polynomial of degree 11
|
||||||
|
0b1000011101011, // irreducible polynomial of degree 12
|
||||||
|
0b10000000011011, // irreducible polynomial of degree 13
|
||||||
|
0b100000010101001, // irreducible polynomial of degree 14
|
||||||
|
0b1000000000110101, // irreducible polynomial of degree 15
|
||||||
|
0b10000000000101101, // irreducible polynomial of degree 16
|
||||||
|
0b100000000000001001, // irreducible polynomial of degree 17
|
||||||
|
0b1000001010000000011, // irreducible polynomial of degree 18
|
||||||
|
0b10000000000000100111, // irreducible polynomial of degree 19
|
||||||
|
0b100000000011011110011, // irreducible polynomial of degree 20
|
||||||
|
0b1000000000000001100101, // irreducible polynomial of degree 21
|
||||||
|
0b10000000001111101100001, // irreducible polynomial of degree 22
|
||||||
|
0b100000000000000000100001, // irreducible polynomial of degree 23
|
||||||
|
0b1000000011110011010101001, // irreducible polynomial of degree 24
|
||||||
|
0b10000000000000000101000101, // irreducible polynomial of degree 25
|
||||||
|
0b100000000000100010111010011, // irreducible polynomial of degree 26
|
||||||
|
0b1000000000000001011010101101, // irreducible polynomial of degree 27
|
||||||
|
0b10000000000000010000011100101, // irreducible polynomial of degree 28
|
||||||
|
0b100000000000000000000000000101, // irreducible polynomial of degree 29
|
||||||
|
0b1000000000000110010100010101111, // irreducible polynomial of degree 30
|
||||||
|
0b10000000000000000000000000001001 // irreducible polynomial of degree 31
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t degree (uint32_t polynomial) {
|
||||||
|
return 31 - __builtin_clz(polynomial);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t multiply_modulo_polynomials_in_Z2 (uint32_t q1, uint32_t q2, uint32_t p) {
|
||||||
|
size_t p_degree = degree(p);
|
||||||
|
size_t q1_degree = degree(q1);
|
||||||
|
size_t q2_degree = degree(q2);
|
||||||
|
|
||||||
|
if (q1_degree > p_degree)
|
||||||
|
throw std::logic_error("We only support multiply-modulo whenever the multiplied polynomials are initially at most the same degree as the modulo polynomial.");
|
||||||
|
|
||||||
|
if (q1_degree == p_degree)
|
||||||
|
q1 ^= p;
|
||||||
|
|
||||||
|
uint32_t arr[q2_degree+1];
|
||||||
|
arr[0] = q1;
|
||||||
|
for (size_t i = 1; i <= q2_degree; i++) {
|
||||||
|
arr[i] = arr[i-1]<<1;
|
||||||
|
if (degree(arr[i]) == p_degree)
|
||||||
|
arr[i] ^= p;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t sum = 0;
|
||||||
|
|
||||||
|
std::bitset<32> b(q2);
|
||||||
|
for (size_t i = 0; i <= q2_degree; i++) {
|
||||||
|
if (b[i] == 1)
|
||||||
|
sum ^= arr[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t polynomial_power_in_Z2(uint32_t q, uint32_t exp, uint32_t p) {
|
||||||
|
if (exp == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (degree(q) == degree(p))
|
||||||
|
q ^= p;
|
||||||
|
|
||||||
|
size_t log2exp = degree(exp) + 1;
|
||||||
|
|
||||||
|
uint32_t arr[log2exp];
|
||||||
|
arr[0] = q;
|
||||||
|
for (size_t i = 1; i < log2exp; i++)
|
||||||
|
arr[i] = multiply_modulo_polynomials_in_Z2(arr[i-1], arr[i-1], p);
|
||||||
|
|
||||||
|
uint32_t res = 1;
|
||||||
|
std::bitset<32> b(exp);
|
||||||
|
for (size_t i = 0; i <= log2exp; i++) {
|
||||||
|
if (b[i] == 1)
|
||||||
|
res = multiply_modulo_polynomials_in_Z2(res, arr[i], p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void rref_Z2(std::vector<std::bitset<32>> &A, size_t m, size_t n) {
|
||||||
|
if (m > 31 || n > 32)
|
||||||
|
throw std::logic_error("We only support rref_Z2 for up to a (31x32)-matrix.");
|
||||||
|
|
||||||
|
// calculate row echelon form
|
||||||
|
// https://en.wikipedia.org/wiki/Gaussian_elimination#Pseudocode
|
||||||
|
size_t h = 0; // Initialization of the pivot row
|
||||||
|
size_t k = 0; // Initialization of the pivot column
|
||||||
|
|
||||||
|
while (h < m && k < n) {
|
||||||
|
// Find the k-th pivot:
|
||||||
|
int i_max = -1;
|
||||||
|
for (size_t i = h; i < m; i++) {
|
||||||
|
if (A[i][k]) {
|
||||||
|
i_max = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i_max == -1) {
|
||||||
|
// No pivot in this column, pass to next column
|
||||||
|
k++;
|
||||||
|
} else {
|
||||||
|
auto tmp = A[i_max];
|
||||||
|
A[i_max] = A[h];
|
||||||
|
A[h] = tmp;
|
||||||
|
|
||||||
|
// Do for all rows below pivot:
|
||||||
|
for (size_t i = h+1; i < m; i++){
|
||||||
|
bool f = A[i][k];
|
||||||
|
if (f == 0)
|
||||||
|
continue;
|
||||||
|
// Fill with zeros the lower part of pivot column:
|
||||||
|
A[i][k] = 0;
|
||||||
|
// Do for all remaining elements in current row:
|
||||||
|
for (size_t j = k+1; j<n; j++) {
|
||||||
|
A[i][j] = A[i][j] ^ A[h][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Increase pivot row and column
|
||||||
|
h++;
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform back substitution
|
||||||
|
for (size_t i = 1; i < m; i++) {
|
||||||
|
for (size_t j = 0; j < i; j++) {
|
||||||
|
if (A[j][i] == 1) {
|
||||||
|
for (size_t l = i; l < n; l++) // check this
|
||||||
|
A[j][l] = A[j][l] ^ A[i][l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t get_random_irreducible_polynomial_in_Z2 (size_t k) {
|
||||||
|
if (k > 31)
|
||||||
|
throw std::logic_error("We only support polynomials of degree 31 or less.");
|
||||||
|
|
||||||
|
uint32_t init_p = irreducible_polynomials[k-1];
|
||||||
|
|
||||||
|
srand(time(0));
|
||||||
|
uint32_t gamma = (rand() % (((uint32_t)1 << (k+1)) - 2)) + 2; // gamma in GF(2^k)\GF(2)
|
||||||
|
|
||||||
|
uint32_t gamma_pow[k+1];
|
||||||
|
for (size_t i = 0; i <= k; i++)
|
||||||
|
gamma_pow[i] = polynomial_power_in_Z2(gamma, i, init_p);
|
||||||
|
|
||||||
|
// Create an array of the polynomials in gamma_pow, and transpose it at the same time
|
||||||
|
std::vector<std::bitset<32>> A(k); // NOTE: That 32 > 31 >= k
|
||||||
|
for (size_t i = 0; i < k; i++) {
|
||||||
|
for (size_t j = 0; j <= k; j++) {
|
||||||
|
A[i][k-j] = gamma_pow[j] & ((uint32_t)1<<i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rref_Z2(A, k, k+1);
|
||||||
|
|
||||||
|
std::bitset<32> bp;
|
||||||
|
for (size_t i = 0; i < k; i++) {
|
||||||
|
bp[i] = A[i][k];
|
||||||
|
}
|
||||||
|
bp[k] = 1;
|
||||||
|
uint32_t p = (uint32_t)bp.to_ulong();
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
17
general_library.hpp
Normal file
17
general_library.hpp
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#ifndef GENERAL_LIBRARY_H
|
||||||
|
#define GENERAL_LIBRARY_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
#include <bitset>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
uint32_t multiplicative_inverse(int a, int n);
|
||||||
|
int len_of_shortest_period(const char * pattern, int m);
|
||||||
|
|
||||||
|
// SECTION: Functions related to calculating with polynomials in Z2[x]
|
||||||
|
uint32_t get_random_irreducible_polynomial_in_Z2 (size_t k);
|
||||||
|
|
||||||
|
#endif
|
12
generate_initial_irreducible_polynomials.sage
Normal file
12
generate_initial_irreducible_polynomials.sage
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
F = GF(2)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
|
||||||
|
max_k = 31
|
||||||
|
whitespace = max_k+1
|
||||||
|
|
||||||
|
for i in range(1, max_k+1):
|
||||||
|
poly_int = '0b'+ ''.join(map(str, PR.irreducible_element(i).coefficients(sparse=False)[::-1]))
|
||||||
|
poly_int += ','
|
||||||
|
poly_int += ' '*(max_k-i)
|
||||||
|
print(f' {poly_int}// irreducible polynomial of degree {i}')
|
||||||
|
|
96
generate_random_irreducible_polynomial.sage
Normal file
96
generate_random_irreducible_polynomial.sage
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
k = 31
|
||||||
|
F = GF(2)
|
||||||
|
E = GF(2^k)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
p = PR.irreducible_element(k)
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
gamma = E.random_element()
|
||||||
|
if gamma not in F:
|
||||||
|
break
|
||||||
|
|
||||||
|
# gamma = PR('x^3+x')
|
||||||
|
# print('0b'+ ''.join(map(str, gamma.coefficients(sparse=False)[::-1])))
|
||||||
|
|
||||||
|
eqs = [PR(gamma)^i % p for i in range(k+1)]
|
||||||
|
for eq in eqs:
|
||||||
|
print(eq)
|
||||||
|
print()
|
||||||
|
|
||||||
|
eqs = list(map(lambda eq: eq.coefficients(sparse=False) + [0]*(k-eq.degree()-1), eqs))
|
||||||
|
for eq in eqs:
|
||||||
|
print(eq)
|
||||||
|
print()
|
||||||
|
eqs = matrix(eqs[::-1])
|
||||||
|
print(eqs)
|
||||||
|
print()
|
||||||
|
eqs = eqs.transpose()
|
||||||
|
print(eqs)
|
||||||
|
print()
|
||||||
|
|
||||||
|
rref = eqs.rref()
|
||||||
|
print(rref)
|
||||||
|
irrpol = PR(rref[:,-1].list() + [1])
|
||||||
|
print(irrpol)
|
||||||
|
print(irrpol.is_irreducible())
|
||||||
|
print()
|
||||||
|
|
||||||
|
# manual rref
|
||||||
|
def calc_rref_Z2(A, m, n):
|
||||||
|
"""Calculate the reduced row echelon form of a (mxn)-matrix"""
|
||||||
|
A = copy(A)
|
||||||
|
|
||||||
|
# calculate row echelon form
|
||||||
|
# https://en.wikipedia.org/wiki/Gaussian_elimination#Pseudocode
|
||||||
|
h = 0 # Initialization of the pivot row
|
||||||
|
k = 0 # Initialization of the pivot column
|
||||||
|
|
||||||
|
while h < m and k < n:
|
||||||
|
# Find the k-th pivot:
|
||||||
|
i_max = -1
|
||||||
|
for i in range(h, m):
|
||||||
|
if A[i,k] == 1:
|
||||||
|
i_max = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if i_max == -1:
|
||||||
|
# No pivot in this column, pass to next column
|
||||||
|
k += 1
|
||||||
|
else:
|
||||||
|
tmp = A[i_max, :]
|
||||||
|
A[i_max, :] = A[h, :]
|
||||||
|
A[h, :] = tmp
|
||||||
|
|
||||||
|
|
||||||
|
# Do for all rows below pivot:
|
||||||
|
for i in range(h+1, m):
|
||||||
|
f = A[i][k];
|
||||||
|
if f == 0:
|
||||||
|
continue;
|
||||||
|
A[i, k] = 0;
|
||||||
|
for j in range(k+1, n):
|
||||||
|
A[i, j] = A[i, j] - A[h, j]
|
||||||
|
|
||||||
|
# Increase pivot row and column
|
||||||
|
h += 1
|
||||||
|
k += 1
|
||||||
|
|
||||||
|
# perform back substitution
|
||||||
|
for i in range(1, m):
|
||||||
|
for j in range(i):
|
||||||
|
if A[j, i] == 1:
|
||||||
|
A[j, :] -= A[i, :]
|
||||||
|
|
||||||
|
return A
|
||||||
|
|
||||||
|
print('Manual RREF:\n')
|
||||||
|
|
||||||
|
rref = calc_rref_Z2(eqs, k, k+1) # cols = rows+1, since we transposed the matrix
|
||||||
|
print(rref)
|
||||||
|
print()
|
||||||
|
irrpol = PR(rref[:,-1].list() + [1])
|
||||||
|
print(irrpol)
|
||||||
|
print(irrpol.is_irreducible())
|
||||||
|
print()
|
||||||
|
|
||||||
|
print('0b'+ ''.join(map(str, irrpol.coefficients(sparse=False)[::-1])))
|
106
hash_function_library.cpp
Normal file
106
hash_function_library.cpp
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
#include "hash_function_library.hpp"
|
||||||
|
|
||||||
|
int Rolling_hash::get_fingerprint() {
|
||||||
|
return (int)fingerprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
Rabin_karp::Rabin_karp(uint32_t p, size_t length) : p(p) {
|
||||||
|
for (size_t i = 0; i < length; i++)
|
||||||
|
elements.push(0);
|
||||||
|
|
||||||
|
if (p >= (uint32_t)(1<<31))
|
||||||
|
throw std::overflow_error("A Rabin-Karp hash function has been initialized with too large a prime, such that we will encounter overflow errors.");
|
||||||
|
|
||||||
|
xi = (2 << length) % p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_karp::slide(unsigned char c_in) {
|
||||||
|
unsigned char c_out = elements.front();
|
||||||
|
elements.pop();
|
||||||
|
elements.push(c_in);
|
||||||
|
|
||||||
|
for (size_t i = 7; i >= 0; i--) {
|
||||||
|
bool bit_in = (c_in & (1 << i)) != 0;
|
||||||
|
bool bit_out = (c_out & (1 << i)) != 0;
|
||||||
|
slide_bit(bit_in, bit_out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_karp::slide_bit(bool bit_in, bool bit_out) {
|
||||||
|
fingerprint = ((fingerprint << 1) - xi*bit_out + bit_in);
|
||||||
|
// fast mod p
|
||||||
|
if (fingerprint > (uint32_t)p)
|
||||||
|
fingerprint -= p;
|
||||||
|
}
|
||||||
|
|
||||||
|
Polynomial_fingerprint::Polynomial_fingerprint(int32_t p, int32_t r) : p(p), r(r) {}
|
||||||
|
|
||||||
|
void Polynomial_fingerprint::push(unsigned char c) {
|
||||||
|
int exp = elements.size() % (p-1);
|
||||||
|
|
||||||
|
fingerprint = (fingerprint + c*(unsigned int)pow(r, exp)) % p;
|
||||||
|
|
||||||
|
elements.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Polynomial_fingerprint::shift(size_t i) {
|
||||||
|
int32_t subtract_fingerprint = 0;
|
||||||
|
for (size_t ii = 0; ii < i; ii++) {
|
||||||
|
unsigned char c = elements.front();
|
||||||
|
elements.pop();
|
||||||
|
int exp = (ii+1) % (p-1);
|
||||||
|
subtract_fingerprint += c*pow(r, exp);
|
||||||
|
}
|
||||||
|
fingerprint = (fingerprint-subtract_fingerprint)*multiplicative_inverse(pow(r, i), p) % p;
|
||||||
|
}
|
||||||
|
|
||||||
|
Porat_porat_polynomial_fingerprint::Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r) : p(p), r(r) {
|
||||||
|
// Calculate the shortest periods for all prefixes of length 2^i (and the full pattern)
|
||||||
|
{
|
||||||
|
size_t i = 1;
|
||||||
|
const char * P_c_str = P.c_str();
|
||||||
|
while (i < P.length()) {
|
||||||
|
shortest_periods.push_back(len_of_shortest_period(P_c_str, i));
|
||||||
|
i <<= 1;
|
||||||
|
}
|
||||||
|
shortest_periods.push_back(len_of_shortest_period(P_c_str, P.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the fingerprints of all prefixes of length 2^i, and of the shortest periods found before
|
||||||
|
{
|
||||||
|
size_t next_2_exponent = 1;
|
||||||
|
size_t shortest_period_index = 0;
|
||||||
|
for (size_t i = 0; i < P.length(); i++) {
|
||||||
|
if (i == next_2_exponent) {
|
||||||
|
if (!prehashed_indices.contains(i)) {
|
||||||
|
prehashed_values.push_back(get_fingerprint());
|
||||||
|
prehashed_indices[i] = prehashed_values.size()-1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (shortest_period_index < shortest_periods.size() && i == shortest_periods[shortest_period_index]) {
|
||||||
|
if (!prehashed_indices.contains(i)) {
|
||||||
|
prehashed_values.push_back(get_fingerprint());
|
||||||
|
prehashed_indices[i] = prehashed_values.size()-1;
|
||||||
|
}
|
||||||
|
shortest_period_index++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
push(P[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We have now pushed the entire pattern
|
||||||
|
prehashed_values.push_back(get_fingerprint());
|
||||||
|
prehashed_indices[P.length()] = prehashed_values.size()-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the length of the pattern before we throw the pattern away
|
||||||
|
m = P.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Porat_porat_polynomial_fingerprint::push(unsigned char c){
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
void Porat_porat_polynomial_fingerprint::shift(size_t i){
|
||||||
|
// TODO
|
||||||
|
}
|
108
hash_function_library.hpp
Normal file
108
hash_function_library.hpp
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#ifndef HASH_LIBRARY_H
|
||||||
|
#define HASH_LIBRARY_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <string>
|
||||||
|
#include <queue>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
class Rolling_hash {
|
||||||
|
public:
|
||||||
|
// pushes element
|
||||||
|
virtual void push(unsigned char c);
|
||||||
|
|
||||||
|
// shifts i indices
|
||||||
|
// the hash function should already know the values
|
||||||
|
virtual void shift(size_t i);
|
||||||
|
virtual void slide(unsigned char c);
|
||||||
|
|
||||||
|
int get_fingerprint();
|
||||||
|
|
||||||
|
private:
|
||||||
|
int fingerprint;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Rabin_karp : Rolling_hash {
|
||||||
|
// Hash function: sum_{i=1}^n x_i*2^{n-i} mod p
|
||||||
|
// With X being a binary string
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Let prime `p` be an int32_t, which ensures that it is small enough to avoid underflows
|
||||||
|
Rabin_karp(uint32_t p, size_t length);
|
||||||
|
void slide(unsigned char c);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void slide_bit(bool bit_in, bool bit_out);
|
||||||
|
std::queue<unsigned char> elements;
|
||||||
|
uint32_t fingerprint = 0;
|
||||||
|
uint32_t xi; // 2^n mod p
|
||||||
|
uint32_t p; // prime
|
||||||
|
};
|
||||||
|
|
||||||
|
class Polynomial_fingerprint : Rolling_hash {
|
||||||
|
// Hash function: sum_{i=1}^l s_i*r^i mod p
|
||||||
|
// r in F_p
|
||||||
|
|
||||||
|
// TODO: Add some overflow warning
|
||||||
|
// TODO: Untested
|
||||||
|
|
||||||
|
public:
|
||||||
|
Polynomial_fingerprint(int32_t p, int32_t r);
|
||||||
|
void push(unsigned char c);
|
||||||
|
void shift(size_t i);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::queue<unsigned char> elements;
|
||||||
|
int32_t fingerprint = 0;
|
||||||
|
int32_t p; // prime
|
||||||
|
int32_t r; // r in F_p
|
||||||
|
};
|
||||||
|
|
||||||
|
class Porat_porat_polynomial_fingerprint : Rolling_hash {
|
||||||
|
// Hash function: sum_{i=1}^l s_i*r^i mod p
|
||||||
|
// r in F_p
|
||||||
|
|
||||||
|
// TODO: Add some overflow warning
|
||||||
|
// TODO: Untested
|
||||||
|
|
||||||
|
public:
|
||||||
|
Porat_porat_polynomial_fingerprint(std::string P, int32_t p, int32_t r);
|
||||||
|
void push(unsigned char c);
|
||||||
|
void shift(size_t i);
|
||||||
|
bool should_children_be_killed();
|
||||||
|
int get_generation();
|
||||||
|
int get_child();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// TODO: Optimize the code such that we don't copy the prehashed values, but instead share it between all instances.
|
||||||
|
// I guess we would have to store it in a separate object, and then pass a reference to it around (to avoid it getting destroyed early).
|
||||||
|
Porat_porat_polynomial_fingerprint(int32_t p, int32_t r, std::vector<int32_t> prehashed_values, std::vector<unsigned int> shortest_periods, std::map<unsigned int, unsigned int> prehashed_indices);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::queue<unsigned char> elements;
|
||||||
|
int32_t fingerprint = 0;
|
||||||
|
int32_t p; // prime
|
||||||
|
int32_t r; // r in F_p
|
||||||
|
|
||||||
|
size_t m; // pattern length
|
||||||
|
std::vector<int32_t> prehashed_values;
|
||||||
|
std::vector<unsigned int> shortest_periods;
|
||||||
|
std::map<unsigned int, unsigned int> prehashed_indices;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* void push(const char * cs, size_t n) { */
|
||||||
|
/* for (size_t i = 0; i < n; i++) */
|
||||||
|
/* push(cs[i]); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
/* void push(std::string s) { */
|
||||||
|
/* for (char c : s) */
|
||||||
|
/* push(c); */
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
#endif
|
68
multiply_polynomials_modulo_polynomial.sage
Normal file
68
multiply_polynomials_modulo_polynomial.sage
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
k = 31
|
||||||
|
F = GF(2)
|
||||||
|
var = 'x'
|
||||||
|
PR = PolynomialRing(F, var)
|
||||||
|
p = PR.irreducible_element(k)
|
||||||
|
gamma = PR.random_element(k)
|
||||||
|
|
||||||
|
exp = 10
|
||||||
|
target = gamma^exp % p
|
||||||
|
|
||||||
|
def polynomial_power_in_Z2(q, exp, p, var):
|
||||||
|
if exp == 0:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if q.degree() == p.degree():
|
||||||
|
q += p
|
||||||
|
|
||||||
|
res = q
|
||||||
|
|
||||||
|
for i in range(2, exp+1):
|
||||||
|
res = multiply_polynomials_in_Z2(res, q, p, var)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def polynomial_power_in_Z2_V2(q, exp, p, var):
|
||||||
|
if exp == 0:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if q.degree() == p.degree():
|
||||||
|
q += p
|
||||||
|
|
||||||
|
log2deg = math.floor(math.log2(exp)) + 1
|
||||||
|
arr = [0]*log2deg
|
||||||
|
arr[0] = q
|
||||||
|
for i in range(1, log2deg):
|
||||||
|
arr[i] = multiply_polynomials_in_Z2(arr[i-1], arr[i-1], p, var)
|
||||||
|
|
||||||
|
res = PR(1)
|
||||||
|
for i, b in enumerate(bin(exp)[:1:-1]):
|
||||||
|
if b == '1':
|
||||||
|
res = multiply_polynomials_in_Z2(res, arr[i], p, var)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def multiply_polynomials_in_Z2(q1, q2, p, var):
|
||||||
|
if q1.degree() > p.degree():
|
||||||
|
raise ValueError('Unsupported!')
|
||||||
|
|
||||||
|
if q1.degree() == p.degree():
|
||||||
|
q1 += p
|
||||||
|
|
||||||
|
arr = [0]*(q2.degree()+1)
|
||||||
|
arr[0] = q1
|
||||||
|
for i in range(1, q2.degree()+1):
|
||||||
|
arr[i] = arr[i-1]*PR(var)
|
||||||
|
if arr[i].degree() == p.degree():
|
||||||
|
arr[i] += p
|
||||||
|
|
||||||
|
return sum(arr[i] for i in q2.exponents())
|
||||||
|
|
||||||
|
print(target)
|
||||||
|
res = polynomial_power_in_Z2(gamma, exp, p, var)
|
||||||
|
print(res)
|
||||||
|
res = polynomial_power_in_Z2_V2(gamma, exp, p, var)
|
||||||
|
print(res)
|
226
porat-porat.cpp
Normal file
226
porat-porat.cpp
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <thread>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
#include "hash_function_library.hpp"
|
||||||
|
|
||||||
|
// Initialization of constants
|
||||||
|
int p = 7919; // the prime for our hash function
|
||||||
|
int r = 11; // random int in \in F_p
|
||||||
|
char T[] = "abcabcabcdabc"; // text
|
||||||
|
/* char P[] = "abcabc"; // pattern */
|
||||||
|
char P[] = "abcabcabcabcabcabcddddddddddddddddddabc"; // pattern
|
||||||
|
std::vector<long> prehashed_values;
|
||||||
|
std::vector<int> shortest_periods;
|
||||||
|
std::map<int, int> prehashed_indices;
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/18620942/find-the-smallest-period-of-input-string-in-on
|
||||||
|
// BEGIN stolen code
|
||||||
|
std::vector<int> calculateLPS(char * pat, int m) {
|
||||||
|
/* int[] lps = new int[pat.length()]; */
|
||||||
|
int len = 0;
|
||||||
|
int i = 1;
|
||||||
|
std::vector<int> lps = {0};
|
||||||
|
lps.resize(m);
|
||||||
|
|
||||||
|
while (i < m) {
|
||||||
|
if (pat[i] == pat[len]) {
|
||||||
|
len++;
|
||||||
|
lps[i] = len;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (len != 0) {
|
||||||
|
len = lps[len - 1];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
lps[i] = len;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lps;
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculates the length of the shortest period
|
||||||
|
int len_of_shortest_period (char * pattern, int m) {
|
||||||
|
std::vector<int> lps = calculateLPS(pattern, m);
|
||||||
|
//start at the end of the string
|
||||||
|
int i = lps.size()-1;
|
||||||
|
while (lps[i] != 0) {
|
||||||
|
//shift back
|
||||||
|
i -= lps[i];
|
||||||
|
}
|
||||||
|
return i+1;
|
||||||
|
}
|
||||||
|
// END
|
||||||
|
|
||||||
|
class porat_process {
|
||||||
|
// TODO: use a different hash function. This one is BAD
|
||||||
|
public:
|
||||||
|
// we use the polynomial fingerprint
|
||||||
|
void increment_hash (char c) {
|
||||||
|
prev_pow = prev_pow*r % p;
|
||||||
|
hash = (hash + c*prev_pow) % p;
|
||||||
|
l++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void subtract_hash (long pre_fingerprint, int i) {
|
||||||
|
// i is the number of removed elements
|
||||||
|
// pre_fingerprint is the fingerprint of those previous elements
|
||||||
|
hash = (hash - pre_fingerprint)/(long)pow(r, i); // we are guaranteed that integer division will return a whole number
|
||||||
|
prev_pow /= (long)pow(r, i);
|
||||||
|
l -= i;
|
||||||
|
// TODO: untested, especially prev_pow
|
||||||
|
}
|
||||||
|
|
||||||
|
bool should_spawn_child() {
|
||||||
|
if (l == next_i_squared) {
|
||||||
|
next_i_squared <<= 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
long get_fingerprint() {
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
long prev_pow = 1;
|
||||||
|
int l = 0;
|
||||||
|
int next_i_squared = 1;
|
||||||
|
long hash = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
void print_map(std::string_view comment, const std::map<int, int>& m)
|
||||||
|
{
|
||||||
|
std::cout << comment;
|
||||||
|
for (const auto& [key, value] : m) {
|
||||||
|
std::cout << key << " = " << value << "; ";
|
||||||
|
}
|
||||||
|
std::cout << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_vector(std::string_view comment, const std::vector<int>& m)
|
||||||
|
{
|
||||||
|
std::cout << comment << "[";
|
||||||
|
for (const auto& a : m) {
|
||||||
|
std::cout << a << ", ";
|
||||||
|
}
|
||||||
|
std::cout << "]\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_vector_long(std::string_view comment, const std::vector<long>& m)
|
||||||
|
{
|
||||||
|
std::cout << comment << "[";
|
||||||
|
for (const auto& a : m) {
|
||||||
|
std::cout << a << ", ";
|
||||||
|
}
|
||||||
|
std::cout << "]\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int n = sizeof(T)/sizeof(char) - 1;
|
||||||
|
int m = sizeof(P)/sizeof(char) - 1;
|
||||||
|
|
||||||
|
{
|
||||||
|
int i = 1;
|
||||||
|
while (i < m) {
|
||||||
|
// calculate shortest period length
|
||||||
|
int period = len_of_shortest_period(P, i);
|
||||||
|
shortest_periods.push_back(period);
|
||||||
|
|
||||||
|
// calculate fingerprint of period
|
||||||
|
if (!prehashed_indices.contains(period)) {
|
||||||
|
porat_process process;
|
||||||
|
for (int ii = 0; ii < period; ii++){
|
||||||
|
std::cout << P[ii];
|
||||||
|
process.increment_hash(P[ii]);
|
||||||
|
}
|
||||||
|
/* prehashed_indices[period] = process.get_fingerprint(); */
|
||||||
|
prehashed_values.push_back(process.get_fingerprint());
|
||||||
|
std::cout << period << " " << prehashed_values.size() << std::endl;
|
||||||
|
prehashed_indices[period] = prehashed_values.size()-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
i <<= 1;
|
||||||
|
}
|
||||||
|
if (i != m) { // so i>m, which means we skipped exactly m
|
||||||
|
// calculate shortest period length
|
||||||
|
int period = len_of_shortest_period(P, m);
|
||||||
|
shortest_periods.push_back(period);
|
||||||
|
|
||||||
|
// calculate fingerprint of period
|
||||||
|
if (!prehashed_indices.contains(period)) {
|
||||||
|
porat_process process;
|
||||||
|
std::cout << "[";
|
||||||
|
for (int ii = 0; ii < period; ii++) {
|
||||||
|
std::cout << P[ii];
|
||||||
|
process.increment_hash(P[ii]);
|
||||||
|
}
|
||||||
|
std::cout << "]\n";
|
||||||
|
prehashed_values.push_back(process.get_fingerprint());
|
||||||
|
std::cout << period << " " << prehashed_values.size() << std::endl;
|
||||||
|
prehashed_indices[period] = prehashed_values.size()-1;
|
||||||
|
}
|
||||||
|
/* // calculate fingerprint of phi(P_{2^i}) */
|
||||||
|
/* while (ii < m) { */
|
||||||
|
/* process.increment_hash(P[ii]); */
|
||||||
|
/* ii++; */
|
||||||
|
/* } */
|
||||||
|
/* prehashed_values.push_back(process.get_fingerprint()); */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::cout << P << std::endl;
|
||||||
|
int i = 0;
|
||||||
|
while ((1 << i) < m) {
|
||||||
|
std::cout << "pattern: ";
|
||||||
|
for (int ii = 0; ii < (1 << i); ii++)
|
||||||
|
std::cout << P[ii];
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "period: ";
|
||||||
|
for (int ii = 0; ii < shortest_periods[i]; ii++)
|
||||||
|
std::cout << P[ii];
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "|prefix_{P_" << (1 << i) << "}| = " << shortest_periods[i] << std::endl;
|
||||||
|
std::cout << prehashed_values[prehashed_indices[shortest_periods[i]]] << std::endl;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if ((1 << i) != m) { // so i>m, which means we skipped exactly m
|
||||||
|
std::cout << "pattern: ";
|
||||||
|
for (int ii = 0; ii < m; ii++)
|
||||||
|
std::cout << P[ii];
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "period: ";
|
||||||
|
for (int ii = 0; ii < shortest_periods[i]; ii++)
|
||||||
|
std::cout << P[ii];
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "|prefix_{P_" << m << "}| = " << shortest_periods[i] << std::endl;
|
||||||
|
std::cout << prehashed_values[prehashed_indices[shortest_periods[i]]] << std::endl;
|
||||||
|
std::cout << prehashed_values[0] << std::endl;
|
||||||
|
std::cout << prehashed_values[1] << std::endl;
|
||||||
|
std::cout << prehashed_values[2] << std::endl;
|
||||||
|
std::cout << prehashed_values[3] << std::endl;
|
||||||
|
std::cout << prehashed_values[4] << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_map("Indices map: ", prehashed_indices);
|
||||||
|
print_vector_long("Values vector: ", prehashed_values);
|
||||||
|
print_vector("Periods vector: ", shortest_periods);
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
BIN
simple_string_matching
Executable file
BIN
simple_string_matching
Executable file
Binary file not shown.
65
simple_string_matching.cpp
Normal file
65
simple_string_matching.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/* #define NDEBUG */
|
||||||
|
#include "Rabin_fingerprint.hpp"
|
||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
void print_match (size_t index, size_t length, std::string &T) {
|
||||||
|
std::cout << "Match found at index " << index << " with the text \"";
|
||||||
|
for (size_t i = 0; i < length; i++)
|
||||||
|
std::cout << T[index + i];
|
||||||
|
std::cout << "\"" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
/* std::ifstream ifs("books/the_complete_works_of_william_shakespeare.txt"); */
|
||||||
|
std::ifstream ifs("books/genji_monogatari_english.txt");
|
||||||
|
std::string T( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
|
||||||
|
/* std::string T = "Hello, this is my test string averylongword is a necessary word to exceed the 32 bit window."; */
|
||||||
|
// Test without the modulo polynomial - and two matches
|
||||||
|
std::string P = "word";
|
||||||
|
// Test with the modulo polynomial
|
||||||
|
/* std::string P = "averylongword"; */
|
||||||
|
|
||||||
|
std::cout << "Searching for pattern:" << std::endl;
|
||||||
|
std::cout << " " << P << std::endl;
|
||||||
|
/* std::cout << "in text:" << std::endl; */
|
||||||
|
/* std::cout << " " << T << std::endl; */
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
/* uint32_t polynomial = pow(2, 30) + pow(2, 2) + 1; // x^31 + x^3 + 1 */
|
||||||
|
uint32_t polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
||||||
|
/* uint32_t polynomial = 0b11010011100100000111101011110111; */
|
||||||
|
// Test without the modulo polynomial
|
||||||
|
size_t window_size_in_bits = P.length()*8;
|
||||||
|
|
||||||
|
// Hash the pattern
|
||||||
|
Rabin_fingerprint fP(polynomial, window_size_in_bits);
|
||||||
|
for (char c : P)
|
||||||
|
fP.push_char(c);
|
||||||
|
|
||||||
|
// Hash the text
|
||||||
|
Rabin_fingerprint fT(polynomial, window_size_in_bits);
|
||||||
|
for (size_t i = 0; i < P.length(); i++)
|
||||||
|
fT.push_char(T[i]);
|
||||||
|
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||||
|
print_match(0, P.length(), T);
|
||||||
|
|
||||||
|
for (size_t i = P.length(); i < T.length(); i++) {
|
||||||
|
fT.slide_char(T[i], T[i-P.length()]);
|
||||||
|
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||||
|
print_match(i-P.length()+1, P.length(), T);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "Done!" << std::endl;
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
97
test_rabin_fingerprint.sage
Normal file
97
test_rabin_fingerprint.sage
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from random import randint
|
||||||
|
|
||||||
|
F = GF(2)
|
||||||
|
PR = PolynomialRing(F, 'x')
|
||||||
|
k = 31
|
||||||
|
P = PR.irreducible_element(k)
|
||||||
|
m = 100
|
||||||
|
|
||||||
|
# Test the calculation for moving the left edge of the sliding window
|
||||||
|
for _ in range(1):
|
||||||
|
q = tuple(randint(0, 1) for _ in range(m))
|
||||||
|
for i in range(1, m+1):
|
||||||
|
for j in range(i+1, m+1):
|
||||||
|
assert (PR(q[:i]) % P) == (PR(q) % P) - (PR(f'x^{i}')*PR(q[i:]) % P)
|
||||||
|
|
||||||
|
# Moving the right side of the window is as described in the Rabin Fingerprint article
|
||||||
|
|
||||||
|
# Next step (TODO)
|
||||||
|
# pattern = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0,1)
|
||||||
|
# period = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,1,0,0,0)
|
||||||
|
# m = len(pattern)
|
||||||
|
# s_period = tuple(list(period)+[0]*(m-len(period)))
|
||||||
|
# pattern_1 = (1)
|
||||||
|
# period_1 = (1)
|
||||||
|
# s_period_1 = tuple(list(period_1)+[0]*(m-len(period_1)))
|
||||||
|
# pattern_2 = (1,0)
|
||||||
|
# period_2 = (1,0)
|
||||||
|
# s_period_2 = tuple(list(period_2)+[0]*(m-len(period_2)))
|
||||||
|
# pattern_4 = (1,0,1,1)
|
||||||
|
# period_4 = (1,0,1)
|
||||||
|
# s_period_4 = tuple(list(period_4)+[0]*(m-len(period_4)))
|
||||||
|
# pattern_8 = (1,0,1,1,1,1,0,1)
|
||||||
|
# period_8 = (1,0,1,1,1)
|
||||||
|
# s_period_8 = tuple(list(period_8)+[0]*(m-len(period_8)))
|
||||||
|
# pattern_16 = (1,0,1,1,1,1,0,1,1,0,1,0,1,0,1,1)
|
||||||
|
# period_16 = (1,0,1,1,1,1,0,1,1,0,1,0)
|
||||||
|
# s_period_16 = tuple(list(period_16)+[0]*(m-len(period_16)))
|
||||||
|
|
||||||
|
# for i in range(len(pattern)-len(pattern_4)-len(period_4)):
|
||||||
|
# assert PR(pattern[i+len(period_4):i+len(pattern_4)+len(period_4)]) == PR(pattern[i:i+len(pattern_4)] + pattern[i+len(pattern_4):i+len(pattern_4)+len(period_4)]) - PR(s_period_4)
|
||||||
|
# assert PR(pattern[i+len(period_4):i+len(pattern_4)+len(period_4)]) == pattern[i-len(period_4):i+len(pattern_4)-len(period_4)]+pattern[i+len(pattern_4)-len(period_4):i+len(pattern_4)-len(period_4)] - PR(s_period_4)
|
||||||
|
|
||||||
|
# Does the bits of a prime number correspond to the coefficients of an irreducible polynomial?
|
||||||
|
k = 31
|
||||||
|
reducible = 0
|
||||||
|
irreducible = 0
|
||||||
|
for p in Primes()[200000000:200010000]:#[100000:100000]:
|
||||||
|
# for p in range(2147483648,2147483648+10000):
|
||||||
|
poly = PR(tuple(bin(p)[2:][::-1]))
|
||||||
|
# print(f'{poly.degree()}')
|
||||||
|
if poly.degree() > k:
|
||||||
|
break
|
||||||
|
if poly.degree() != k:
|
||||||
|
continue
|
||||||
|
irr = poly.is_irreducible()
|
||||||
|
# print(f'The polynomial {poly}, corresponding to the prime {p}, being an irreducible polynomial is {irr}.')
|
||||||
|
if irr:
|
||||||
|
irreducible += 1
|
||||||
|
else:
|
||||||
|
reducible += 1
|
||||||
|
|
||||||
|
print(f'{irreducible}/{reducible+irreducible} of the primes correspond to irreducible polynomials. That is {float(irreducible/(reducible+irreducible))*100}%')
|
||||||
|
|
||||||
|
progress = 0
|
||||||
|
the_same = 0
|
||||||
|
different = 0
|
||||||
|
for p in Primes()[200000000:200010000]:
|
||||||
|
progress += 1
|
||||||
|
print(f'\r{progress}/10000', end='')
|
||||||
|
p_poly = PR(tuple(bin(p)[2:][::-1]))
|
||||||
|
if not p_poly.is_irreducible():
|
||||||
|
continue
|
||||||
|
# i = p + randint(1,2**29)
|
||||||
|
i = p + randint(1,p-1)
|
||||||
|
# print(f'{i}, {p}, {2*p}, {i%p}')
|
||||||
|
i_poly = PR(tuple(bin(i)[2:][::-1]))
|
||||||
|
i_mod_p = i % p
|
||||||
|
i_mod_p_poly = PR(tuple(bin(i_mod_p)[2:][::-1]))
|
||||||
|
# print(f'i_poly: {i_poly}')
|
||||||
|
# print(f'p_poly: {p_poly}')
|
||||||
|
# print(f'i_mod_p_poly: {i_mod_p_poly}')
|
||||||
|
if i_mod_p_poly == i_poly % p_poly:
|
||||||
|
the_same += 1
|
||||||
|
else:
|
||||||
|
different += 1
|
||||||
|
# if not i_mod_p_poly == i_poly - (p_poly - PR(f'x^{p_poly.degree()}')):
|
||||||
|
# if not i_mod_p_poly == i_poly % p_poly:
|
||||||
|
# print()
|
||||||
|
# print('ERROR')
|
||||||
|
# print(i_mod_p_poly)
|
||||||
|
# # print(i_poly - (p_poly - PR(f'x^{p_poly.degree()}')))
|
||||||
|
# print(i_poly % p_poly)
|
||||||
|
# break
|
||||||
|
# assert i_mod_p_poly == i_poly - (p_poly - PR(f'x^{p_poly.degree()}'))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f'{the_same}/{the_same+different} are the same as if we did polynomial calculations. This is {float(the_same/(the_same+different))*100}%')
|
Loading…
Reference in New Issue
Block a user