Bachelors_Thesis_Code/compare_fingerprint_false_positive_probabilities.sage
2021-11-14 14:35:05 +01:00

74 lines
1.8 KiB
Python

# Rabin fingerprint: k > lg(n*m/e), e is the upper bound on the error probability
# Karp-Rabin fingerprint: <= pi(n(n-m+1))/pi(M)
# 32-bit
M = 2**32-1
k = 31
# 16-bit
# M = 2**16-1
# k = 13
def fpi(a):
p = Primes()
a_next = p.next(a)
i = 0
jump = 1
lastdirection = -2
while(True):
print(f'{jump:010d}', end='\r')
# print(f'{i}, {jump}')
val = p.unrank(i)
if val == a_next:
return i # i-1, is the value we search for, but we count from 0, so i is correct
elif val > a_next:
if lastdirection == 2:
# print('mul')
lastdirection -= 1
jump *= 2
else:
# print('div')
jump //= 2
lastdirection += 1
# print(jump)
i -= jump
else:
if lastdirection == -2:
# print('mul')
lastdirection += 1
jump *= 2
else:
# print('div')
jump //= 2
lastdirection -= 1
i += jump
# Rabin fingerprint
# k = lg(n*m/e) <=>
# 2^k = n*m/e <=>
# 2^k/(n*m) = 1/e <=>
# n*m/(2^k) = e
# Karp-Rabin fingerprint
# e = pi(n(n-m+1))/pi(M)
# print(pi(2**32-1))
# e_rabin = n*m/(2^k)
piM = fpi(M)
for m in [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000]:
for n in [10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000]:
if m>=n:
continue
print(f'(n, m) = ({n}, {m})')
e_rabin = n*m/(2**k)
print(f'Rabin: {float(e_rabin):.2e}')
e_karpr = fpi(n*(n-m+1))/piM
print(f'KarpR: {float(e_karpr):.2e}')
print()
print(f'{e_karpr} >= 1 is {e_karpr >= 1} | {e_rabin} >= 1 is {e_rabin >= 1}')
if float(e_karpr) >= 1 or float(e_rabin) >= 1:
break