Compare commits
No commits in common. "b9645f15ff8956732f796d1f45fe4873002ae528" and "76c0d7f49ec37a7d5692b8cbc1f170002a91e65a" have entirely different histories.
b9645f15ff
...
76c0d7f49e
7
Makefile
7
Makefile
@ -24,8 +24,8 @@ LDLIBS = # library flags
|
||||
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
||||
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
||||
|
||||
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o
|
||||
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o -o simple_string_matching
|
||||
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o
|
||||
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o -o simple_string_matching
|
||||
|
||||
simple_string_matching.o: simple_string_matching.cpp
|
||||
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
|
||||
@ -36,9 +36,6 @@ Rabin_fingerprint.o: Rabin_fingerprint.cpp Rabin_fingerprint.hpp
|
||||
general_library.o: general_library.cpp general_library.hpp
|
||||
$(CXX) $(CXXFLAGS) -c general_library.cpp
|
||||
|
||||
processes.o: processes.cpp processes.hpp
|
||||
$(CXX) $(CXXFLAGS) -c processes.cpp
|
||||
|
||||
porat-porat: porat-porat.cpp
|
||||
|
||||
# Tell the compiler that 'clean' isn't referring to a file
|
||||
|
13
README.md
13
README.md
@ -2,20 +2,9 @@
|
||||
|
||||
Code used for my Bachelor's Thesis
|
||||
|
||||
# Issues
|
||||
When reading the pattern from a file, according to UNIX standards ([read details here](https://unix.stackexchange.com/questions/18743/whats-the-point-in-adding-a-new-line-to-the-end-of-a-file)) a "text file" has to end on a newline.
|
||||
This adds an unintended newline to the end of the pattern, so to avoid adding said newline, write the file using:
|
||||
`echo -n word > pattern.txt`
|
||||
or if you use Vim, remove the newline using
|
||||
```
|
||||
:set binary
|
||||
:set noeol
|
||||
:wq
|
||||
```
|
||||
|
||||
# Versions
|
||||
## Current Version
|
||||
Compared to V1, in this current state `generate_initial_irreducible_polynomials.sage`, `generate_random_irreducible_polynomial.sage`, and `multiply_polynomials_modulo_polynomial.sage` have been removed from the main folder, as they have already been used for implementing their corresponding features in `general_library.cpp`. They will be preserved in the V1 folder.
|
||||
Compared to V1, this current state `generate_initial_irreducible_polynomials.sage`, `generate_random_irreducible_polynomial.sage`, and `multiply_polynomials_modulo_polynomial.sage` have been removed from the main folder, as they have already been used for implementing their corresponding features in `general_library.cpp`. They will be preserved in the V1 folder.
|
||||
|
||||
This version also contains code relating to Porat-Porat and other random code stumps. This code is going to be completely reimplemented, but is preserved for now to avoid redoing work.
|
||||
|
||||
|
@ -14,7 +14,7 @@ class Rabin_fingerprint {
|
||||
void push_bit (bool b);
|
||||
void shift_bit (bool b);
|
||||
void slide_char (char c_in, char c_out);
|
||||
void slide_bit (bool b_in, bool b_out);
|
||||
void slide_bit (bool b1, bool b2);
|
||||
|
||||
uint32_t get_fingerprint();
|
||||
|
||||
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
word
|
@ -1,59 +0,0 @@
|
||||
#include "processes.hpp"
|
||||
|
||||
Rabin_fingerprint_process::Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits)
|
||||
: window_size_in_bits(window_size_in_bits),
|
||||
phi(irr_poly, window_size_in_bits)
|
||||
{}
|
||||
|
||||
void Rabin_fingerprint_process::stream_char(char c) {
|
||||
std::bitset<8> b(c);
|
||||
for (char i = 7; i >= 0; i--) {
|
||||
stream_bit((bool)b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Rabin_fingerprint_process::stream_bit(bool b) {
|
||||
if (window.size() == window_size_in_bits) {
|
||||
window.push(b);
|
||||
bool b_out = window.front();
|
||||
window.pop();
|
||||
phi.slide_bit(b, b_out);
|
||||
} else {
|
||||
window.push(b);
|
||||
phi.push_bit(b);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t Rabin_fingerprint_process::get_fingerprint() {
|
||||
return phi.get_fingerprint();
|
||||
}
|
||||
|
||||
std::string Rabin_fingerprint_process::get_string_in_window() {
|
||||
// check if window contains a whole number of chars
|
||||
if ((window_size_in_bits & 0b111) != 0)
|
||||
throw std::logic_error("The fingerprinting window doesn't contain a whole number of chars (counting the bits), so it doesn't make sense to return it as a string.");
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (window.size() != window_size_in_bits)
|
||||
throw std::logic_error("False match! The sliding window isn't even filled yet, which means you matched the pattern of a substring shorter than the pattern. This case should be handled/avoided elsewhere, so we throw an error.");
|
||||
#endif
|
||||
|
||||
std::ostringstream os;
|
||||
for (size_t i = 0; i < window.size()>>3; i++) {
|
||||
// cycle the char
|
||||
char c = 0;
|
||||
for (size_t j = 0; j < 8; j++) {
|
||||
bool b = window.front();
|
||||
window.pop();
|
||||
window.push(b);
|
||||
c <<= 1;
|
||||
c |= b;
|
||||
}
|
||||
|
||||
os << c;
|
||||
}
|
||||
|
||||
std::string s = os.str();
|
||||
|
||||
return s;
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
#ifndef PROCESSES_H
|
||||
#define PROCESSES_H
|
||||
|
||||
#include "Rabin_fingerprint.hpp"
|
||||
#include "general_library.hpp"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
class Rabin_fingerprint_process {
|
||||
public:
|
||||
Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits);
|
||||
void stream_char(char c);
|
||||
void stream_bit(bool b);
|
||||
uint32_t get_fingerprint();
|
||||
std::string get_string_in_window();
|
||||
|
||||
private:
|
||||
std::queue<bool> window;
|
||||
size_t window_size_in_bits;
|
||||
Rabin_fingerprint phi;
|
||||
};
|
||||
|
||||
#endif
|
Binary file not shown.
@ -1,7 +1,6 @@
|
||||
/* #define NDEBUG */
|
||||
/* #include "Rabin_fingerprint.hpp" */
|
||||
#include "Rabin_fingerprint.hpp"
|
||||
#include "general_library.hpp"
|
||||
#include "processes.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <stdint.h>
|
||||
@ -9,42 +8,53 @@
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
int main() {
|
||||
/* std::ifstream T("books/the_complete_works_of_william_shakespeare.txt"); */
|
||||
std::ifstream T("books/genji_monogatari_english.txt");
|
||||
std::ifstream P("books/pattern.txt");
|
||||
|
||||
char c;
|
||||
size_t P_length = 0;
|
||||
std::cout << "Searching for pattern:" << std::endl;
|
||||
std::cout << " ";
|
||||
while(P.get(c)) {
|
||||
std::cout << c;
|
||||
P_length++;
|
||||
|
||||
void print_match (size_t index, size_t length, std::string &T) {
|
||||
std::cout << "Match found at index " << index << " with the text \"";
|
||||
for (size_t i = 0; i < length; i++)
|
||||
std::cout << T[index + i];
|
||||
std::cout << "\"" << std::endl;
|
||||
}
|
||||
std::cout << std::endl << std::endl;
|
||||
|
||||
P.clear(); // clear fail and eof bits
|
||||
P.seekg(0, std::ios::beg); // back to the start!
|
||||
int main() {
|
||||
/* std::ifstream ifs("books/the_complete_works_of_william_shakespeare.txt"); */
|
||||
std::ifstream ifs("books/genji_monogatari_english.txt");
|
||||
std::string T( (std::istreambuf_iterator<char>(ifs) ),
|
||||
(std::istreambuf_iterator<char>() ) );
|
||||
|
||||
uint32_t irreducible_polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
||||
size_t window_size_in_bits = P_length*8;
|
||||
/* std::string T = "Hello, this is my test string averylongword is a necessary word to exceed the 32 bit window."; */
|
||||
// Test without the modulo polynomial - and two matches
|
||||
std::string P = "word";
|
||||
// Test with the modulo polynomial
|
||||
/* std::string P = "averylongword"; */
|
||||
|
||||
std::cout << "Searching for pattern:" << std::endl;
|
||||
std::cout << " " << P << std::endl;
|
||||
/* std::cout << "in text:" << std::endl; */
|
||||
/* std::cout << " " << T << std::endl; */
|
||||
std::cout << std::endl;
|
||||
|
||||
/* uint32_t polynomial = pow(2, 30) + pow(2, 2) + 1; // x^31 + x^3 + 1 */
|
||||
uint32_t polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
||||
/* uint32_t polynomial = 0b11010011100100000111101011110111; */
|
||||
// Test without the modulo polynomial
|
||||
size_t window_size_in_bits = P.length()*8;
|
||||
|
||||
// Hash the pattern
|
||||
Rabin_fingerprint_process phiP(irreducible_polynomial, (size_t)window_size_in_bits);
|
||||
while(P.get(c)) {
|
||||
phiP.stream_char(c);
|
||||
}
|
||||
Rabin_fingerprint fP(polynomial, window_size_in_bits);
|
||||
for (char c : P)
|
||||
fP.push_char(c);
|
||||
|
||||
// Hash the text
|
||||
Rabin_fingerprint_process phiT(irreducible_polynomial, window_size_in_bits);
|
||||
size_t index = 0;
|
||||
while(T.get(c)) {
|
||||
phiT.stream_char(c);
|
||||
index++;
|
||||
if (phiT.get_fingerprint() == phiP.get_fingerprint())
|
||||
std::cout << "Match found at index " << index-P_length << " with the text \"" << phiT.get_string_in_window() << "\"" << std::endl;
|
||||
Rabin_fingerprint fT(polynomial, window_size_in_bits);
|
||||
for (size_t i = 0; i < P.length(); i++)
|
||||
fT.push_char(T[i]);
|
||||
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||
print_match(0, P.length(), T);
|
||||
|
||||
for (size_t i = P.length(); i < T.length(); i++) {
|
||||
fT.slide_char(T[i], T[i-P.length()]);
|
||||
if (fT.get_fingerprint() == fP.get_fingerprint())
|
||||
print_match(i-P.length()+1, P.length(), T);
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
Loading…
Reference in New Issue
Block a user