Abstrack simple string matching to a process
The process contains the fingerprinting function along with a queue representing the sliding window. This means that the user no longer needs to remember the outgoing character when streaming the text.
This commit is contained in:
parent
76c0d7f49e
commit
c6b6329134
7
Makefile
7
Makefile
@ -24,8 +24,8 @@ LDLIBS = # library flags
|
|||||||
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
# -llapacke (LAPACK) NOTE: The order of "-llapacke -llapack -lblas" is very important
|
||||||
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
#LINK.o = $(CXX) $(LDFLAGS) # use CXX for linking
|
||||||
|
|
||||||
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o
|
simple_string_matching: simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o
|
||||||
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o -o simple_string_matching
|
$(CXX) $(CXXFLAGS) simple_string_matching.o Rabin_fingerprint.o general_library.o processes.o -o simple_string_matching
|
||||||
|
|
||||||
simple_string_matching.o: simple_string_matching.cpp
|
simple_string_matching.o: simple_string_matching.cpp
|
||||||
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
|
$(CXX) $(CXXFLAGS) -c simple_string_matching.cpp
|
||||||
@ -36,6 +36,9 @@ Rabin_fingerprint.o: Rabin_fingerprint.cpp Rabin_fingerprint.hpp
|
|||||||
general_library.o: general_library.cpp general_library.hpp
|
general_library.o: general_library.cpp general_library.hpp
|
||||||
$(CXX) $(CXXFLAGS) -c general_library.cpp
|
$(CXX) $(CXXFLAGS) -c general_library.cpp
|
||||||
|
|
||||||
|
processes.o: processes.cpp processes.hpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c processes.cpp
|
||||||
|
|
||||||
porat-porat: porat-porat.cpp
|
porat-porat: porat-porat.cpp
|
||||||
|
|
||||||
# Tell the compiler that 'clean' isn't referring to a file
|
# Tell the compiler that 'clean' isn't referring to a file
|
||||||
|
@ -14,7 +14,7 @@ class Rabin_fingerprint {
|
|||||||
void push_bit (bool b);
|
void push_bit (bool b);
|
||||||
void shift_bit (bool b);
|
void shift_bit (bool b);
|
||||||
void slide_char (char c_in, char c_out);
|
void slide_char (char c_in, char c_out);
|
||||||
void slide_bit (bool b1, bool b2);
|
void slide_bit (bool b_in, bool b_out);
|
||||||
|
|
||||||
uint32_t get_fingerprint();
|
uint32_t get_fingerprint();
|
||||||
|
|
||||||
|
Binary file not shown.
29
processes.cpp
Normal file
29
processes.cpp
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#include "processes.hpp"
|
||||||
|
|
||||||
|
Rabin_fingerprint_process::Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits)
|
||||||
|
: window_size_in_bits(window_size_in_bits),
|
||||||
|
phi(irr_poly, window_size_in_bits)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void Rabin_fingerprint_process::stream_char (char c) {
|
||||||
|
std::bitset<8> b(c);
|
||||||
|
for (char i = 7; i >= 0; i--) {
|
||||||
|
stream_bit((bool)b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Rabin_fingerprint_process::stream_bit (bool b) {
|
||||||
|
if (window.size() == window_size_in_bits) {
|
||||||
|
window.push(b);
|
||||||
|
bool b_out = window.front();
|
||||||
|
window.pop();
|
||||||
|
phi.slide_bit(b, b_out);
|
||||||
|
} else {
|
||||||
|
window.push(b);
|
||||||
|
phi.push_bit(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Rabin_fingerprint_process::get_fingerprint () {
|
||||||
|
return phi.get_fingerprint();
|
||||||
|
}
|
23
processes.hpp
Normal file
23
processes.hpp
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#ifndef PROCESSES_H
|
||||||
|
#define PROCESSES_H
|
||||||
|
|
||||||
|
#include "Rabin_fingerprint.hpp"
|
||||||
|
#include "general_library.hpp"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
class Rabin_fingerprint_process {
|
||||||
|
public:
|
||||||
|
Rabin_fingerprint_process(uint32_t irr_poly, size_t window_size_in_bits);
|
||||||
|
void stream_char(char c);
|
||||||
|
void stream_bit(bool b);
|
||||||
|
uint32_t get_fingerprint();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::queue<bool> window;
|
||||||
|
size_t window_size_in_bits;
|
||||||
|
Rabin_fingerprint phi;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
BIN
simple_string_matching
Executable file
BIN
simple_string_matching
Executable file
Binary file not shown.
@ -1,6 +1,7 @@
|
|||||||
/* #define NDEBUG */
|
/* #define NDEBUG */
|
||||||
#include "Rabin_fingerprint.hpp"
|
/* #include "Rabin_fingerprint.hpp" */
|
||||||
#include "general_library.hpp"
|
/* #include "general_library.hpp" */
|
||||||
|
#include "processes.hpp"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
@ -21,11 +22,7 @@ int main() {
|
|||||||
std::string T( (std::istreambuf_iterator<char>(ifs) ),
|
std::string T( (std::istreambuf_iterator<char>(ifs) ),
|
||||||
(std::istreambuf_iterator<char>() ) );
|
(std::istreambuf_iterator<char>() ) );
|
||||||
|
|
||||||
/* std::string T = "Hello, this is my test string averylongword is a necessary word to exceed the 32 bit window."; */
|
|
||||||
// Test without the modulo polynomial - and two matches
|
|
||||||
std::string P = "word";
|
std::string P = "word";
|
||||||
// Test with the modulo polynomial
|
|
||||||
/* std::string P = "averylongword"; */
|
|
||||||
|
|
||||||
std::cout << "Searching for pattern:" << std::endl;
|
std::cout << "Searching for pattern:" << std::endl;
|
||||||
std::cout << " " << P << std::endl;
|
std::cout << " " << P << std::endl;
|
||||||
@ -33,27 +30,24 @@ int main() {
|
|||||||
/* std::cout << " " << T << std::endl; */
|
/* std::cout << " " << T << std::endl; */
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
/* uint32_t polynomial = pow(2, 30) + pow(2, 2) + 1; // x^31 + x^3 + 1 */
|
uint32_t irreducible_polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
||||||
uint32_t polynomial = get_random_irreducible_polynomial_in_Z2(31);
|
|
||||||
/* uint32_t polynomial = 0b11010011100100000111101011110111; */
|
|
||||||
// Test without the modulo polynomial
|
|
||||||
size_t window_size_in_bits = P.length()*8;
|
size_t window_size_in_bits = P.length()*8;
|
||||||
|
|
||||||
// Hash the pattern
|
// Hash the pattern
|
||||||
Rabin_fingerprint fP(polynomial, window_size_in_bits);
|
Rabin_fingerprint_process phiP(irreducible_polynomial, (size_t)window_size_in_bits);
|
||||||
for (char c : P)
|
for (char c : P)
|
||||||
fP.push_char(c);
|
phiP.stream_char(c);
|
||||||
|
|
||||||
// Hash the text
|
// Hash the text
|
||||||
Rabin_fingerprint fT(polynomial, window_size_in_bits);
|
Rabin_fingerprint_process phiT(irreducible_polynomial, window_size_in_bits);
|
||||||
for (size_t i = 0; i < P.length(); i++)
|
for (size_t i = 0; i < P.length(); i++)
|
||||||
fT.push_char(T[i]);
|
phiT.stream_char(T[i]);
|
||||||
if (fT.get_fingerprint() == fP.get_fingerprint())
|
if (phiT.get_fingerprint() == phiP.get_fingerprint())
|
||||||
print_match(0, P.length(), T);
|
print_match(0, P.length(), T);
|
||||||
|
|
||||||
for (size_t i = P.length(); i < T.length(); i++) {
|
for (size_t i = P.length(); i < T.length(); i++) {
|
||||||
fT.slide_char(T[i], T[i-P.length()]);
|
phiT.stream_char(T[i]);
|
||||||
if (fT.get_fingerprint() == fP.get_fingerprint())
|
if (phiT.get_fingerprint() == phiP.get_fingerprint())
|
||||||
print_match(i-P.length()+1, P.length(), T);
|
print_match(i-P.length()+1, P.length(), T);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user