Compare commits

..

No commits in common. "b9645f15ff8956732f796d1f45fe4873002ae528" and "76c0d7f49ec37a7d5692b8cbc1f170002a91e65a" have entirely different histories.

9 changed files with 44 additions and 134 deletions

View File

@ -24,8 +24,8 @@ LDLIBS = # library flags
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important # -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking #LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o -o simple_string_matching $(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o -o simple_string_matching
simple_string_matching.o: simple_string_matching.cpp simple_string_matching.o: simple_string_matching.cpp
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp $(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
@ -36,9 +36,6 @@ Rabin_fingerprint.o: Rabin_fingerprint.cpp Rabin_fingerprint.hpp
general_library.o: general_library.cpp general_library.hpp general_library.o: general_library.cpp general_library.hpp
$(CXX) $(CXXFLAGS) -c general_library.cpp $(CXX) $(CXXFLAGS) -c general_library.cpp
processes.o: processes.cpp processes.hpp
$(CXX) $(CXXFLAGS) -c processes.cpp
porat-porat: porat-porat.cpp porat-porat: porat-porat.cpp
# Tell the compiler that 'clean' isn't referring to a file # Tell the compiler that 'clean' isn't referring to a file

View File

@ -2,20 +2,9 @@
Code used for my Bachelor's Thesis Code used for my Bachelor's Thesis
# Issues
When reading the pattern from a file, according to UNIX standards ([read details here](https://unix.stackexchange.com/questions/18743/whats-the-point-in-adding-a-new-line-to-the-end-of-a-file)) a "text file" has to end on a newline.
This adds an unintended newline to the end of the pattern, so to avoid adding said newline, write the file using:
`echo -n word > pattern.txt`
or if you use Vim, remove the newline using
```
:set binary
:set noeol
:wq
```
# Versions # Versions
## Current Version ## Current Version
Compared to V1, in this current state `generate_initial_irreducible_polynomials.sage`, `generate_random_irreducible_polynomial.sage`, and `multiply_polynomials_modulo_polynomial.sage` have been removed from the main folder, as they have already been used for implementing their corresponding features in `general_library.cpp`. They will be preserved in the V1 folder. Compared to V1, this current state `generate_initial_irreducible_polynomials.sage`, `generate_random_irreducible_polynomial.sage`, and `multiply_polynomials_modulo_polynomial.sage` have been removed from the main folder, as they have already been used for implementing their corresponding features in `general_library.cpp`. They will be preserved in the V1 folder.
This version also contains code relating to Porat-Porat and other random code stumps. This code is going to be completely reimplemented, but is preserved for now to avoid redoing work. This version also contains code relating to Porat-Porat and other random code stumps. This code is going to be completely reimplemented, but is preserved for now to avoid redoing work.

View File

@ -14,7 +14,7 @@ class Rabin_fingerprint {
void push_bit (bool b); void push_bit (bool b);
void shift_bit (bool b); void shift_bit (bool b);
void slide_char (char c_in, char c_out); void slide_char (char c_in, char c_out);
void slide_bit (bool b_in, bool b_out); void slide_bit (bool b1, bool b2);
uint32_t get_fingerprint(); uint32_t get_fingerprint();

Binary file not shown.

View File

@ -1 +0,0 @@
word

View File

@ -1,59 +0,0 @@
#include "processes.hpp"
Rabin_fingerprint_process::Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits)
: window_size_in_bits(window_size_in_bits),
phi(irr_poly, window_size_in_bits)
{}
void Rabin_fingerprint_process::stream_char(char c) {
std::bitset<8> b(c);
for (char i = 7; i >= 0; i--) {
stream_bit((bool)b[i]);
}
}
void Rabin_fingerprint_process::stream_bit(bool b) {
if (window.size() == window_size_in_bits) {
window.push(b);
bool b_out = window.front();
window.pop();
phi.slide_bit(b, b_out);
} else {
window.push(b);
phi.push_bit(b);
}
}
uint32_t Rabin_fingerprint_process::get_fingerprint() {
return phi.get_fingerprint();
}
std::string Rabin_fingerprint_process::get_string_in_window() {
// check if window contains a whole number of chars
if ((window_size_in_bits & 0b111) != 0)
throw std::logic_error("The fingerprinting window doesn't contain a whole number of chars (counting the bits), so it doesn't make sense to return it as a string.");
#ifndef NDEBUG
if (window.size() != window_size_in_bits)
throw std::logic_error("False match! The sliding window isn't even filled yet, which means you matched the pattern of a substring shorter than the pattern. This case should be handled/avoided elsewhere, so we throw an error.");
#endif
std::ostringstream os;
for (size_t i = 0; i < window.size()>>3; i++) {
// cycle the char
char c = 0;
for (size_t j = 0; j < 8; j++) {
bool b = window.front();
window.pop();
window.push(b);
c <<= 1;
c |= b;
}
os << c;
}
std::string s = os.str();
return s;
}

View File

@ -1,26 +0,0 @@
#ifndef PROCESSES_H
#define PROCESSES_H
#include "Rabin_fingerprint.hpp"
#include "general_library.hpp"
#include <stdint.h>
#include <queue>
#include <string>
#include <sstream>
class Rabin_fingerprint_process {
public:
Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits);
void stream_char(char c);
void stream_bit(bool b);
uint32_t get_fingerprint();
std::string get_string_in_window();
private:
std::queue<bool> window;
size_t window_size_in_bits;
Rabin_fingerprint phi;
};
#endif

Binary file not shown.

View File

@ -1,7 +1,6 @@
/* #define NDEBUG */ /* #define NDEBUG */
/* #include "Rabin_fingerprint.hpp" */ #include "Rabin_fingerprint.hpp"
#include "general_library.hpp" #include "general_library.hpp"
#include "processes.hpp"
#include <iostream> #include <iostream>
#include <stdint.h> #include <stdint.h>
@ -9,42 +8,53 @@
#include <string> #include <string>
#include <fstream> #include <fstream>
int main() { void print_match (size_t index, size_t length, std::string &T) {
/* std::ifstream T("books/the_complete_works_of_william_shakespeare.txt"); */ std::cout << "Match found at index " << index << " with the text \"";
std::ifstream T("books/genji_monogatari_english.txt"); for (size_t i = 0; i < length; i++)
std::ifstream P("books/pattern.txt"); std::cout << T[index + i];
std::cout << "\"" << std::endl;
char c;
size_t P_length = 0;
std::cout << "Searching for pattern:" << std::endl;
std::cout << " ";
while(P.get(c)) {
std::cout << c;
P_length++;
} }
std::cout << std::endl << std::endl;
P.clear(); // clear fail and eof bits int main() {
P.seekg(0, std::ios::beg); // back to the start! /* std::ifstream ifs("books/the_complete_works_of_william_shakespeare.txt"); */
std::ifstream ifs("books/genji_monogatari_english.txt");
std::string T( (std::istreambuf_iterator<char>(ifs) ),
(std::istreambuf_iterator<char>() ) );
uint32_t irreducible_polynomial = get_random_irreducible_polynomial_in_Z2(31); /* std::string T = "Hello, this is my test string averylongword is a necessary word to exceed the 32 bit window."; */
size_t window_size_in_bits = P_length*8; // Test without the modulo polynomial - and two matches
std::string P = "word";
// Test with the modulo polynomial
/* std::string P = "averylongword"; */
std::cout << "Searching for pattern:" << std::endl;
std::cout << " " << P << std::endl;
/* std::cout << "in text:" << std::endl; */
/* std::cout << " " << T << std::endl; */
std::cout << std::endl;
/* uint32_t polynomial = pow(2, 30) + pow(2, 2) + 1; // x^31 + x^3 + 1 */
uint32_t polynomial = get_random_irreducible_polynomial_in_Z2(31);
/* uint32_t polynomial = 0b11010011100100000111101011110111; */
// Test without the modulo polynomial
size_t window_size_in_bits = P.length()*8;
// Hash the pattern // Hash the pattern
Rabin_fingerprint_process phiP(irreducible_polynomial, (size_t)window_size_in_bits); Rabin_fingerprint fP(polynomial, window_size_in_bits);
while(P.get(c)) { for (char c : P)
phiP.stream_char(c); fP.push_char(c);
}
// Hash the text // Hash the text
Rabin_fingerprint_process phiT(irreducible_polynomial, window_size_in_bits); Rabin_fingerprint fT(polynomial, window_size_in_bits);
size_t index = 0; for (size_t i = 0; i < P.length(); i++)
while(T.get(c)) { fT.push_char(T[i]);
phiT.stream_char(c); if (fT.get_fingerprint() == fP.get_fingerprint())
index++; print_match(0, P.length(), T);
if (phiT.get_fingerprint() == phiP.get_fingerprint())
std::cout << "Match found at index " << index-P_length << " with the text \"" << phiT.get_string_in_window() << "\"" << std::endl; for (size_t i = P.length(); i < T.length(); i++) {
fT.slide_char(T[i], T[i-P.length()]);
if (fT.get_fingerprint() == fP.get_fingerprint())
print_match(i-P.length()+1, P.length(), T);
} }
std::cout << std::endl; std::cout << std::endl;