#include "hashing_algorithms.hpp" #include "hashed_string.hpp" #include // p is prime. p > m > 1, p > a > 0, p > b >= 0 uint64_t multiply_mod_prime(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m) { return ((a*x+b) % p) % m; } // p is a Mersenne prime. p > m > 1, p > a > 0, p > b >= 0 uint64_t multiply_mod_prime_mersenne(uint64_t x, uint64_t a, uint64_t b, uint64_t p, uint64_t m) { uint64_t y = a*x+b; y = (y&p)+(y>>p); if (y>=p) y-=p; return y % m; } // p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0, p > b >= 0 uint64_t multiply_mod_prime_mersenne_overflow(uint64_t x, uint64_t a, uint64_t b, uint64_t p, char q) { uint64_t y = a*x+b; y = (y&p)+(y>>p); if (y>=p) y-=p; return y & ~( ( ~(uint64_t)0 ) << q); // OBS: Behaviour undefined for shifting n-bit integers n times } // p is a Mersenne prime. m=2^q. p > m > 1, p > a > 0 uint64_t multiply_mod_prime_mersenne_overflow_no_b(uint64_t x, uint64_t a, uint64_t p, char q) { uint64_t y = a*x; y = (y&p)+(y>>p); if (y>=p) y-=p; return y & ~( ( ~(uint64_t)0 ) << q); // OBS: Behaviour undefined for shifting n-bit integers n times } // p=2^89-1 is a Mersenne prime. m=2^l. 32 >= l > 0. p > a > 0, p > b >= 0. x is an 64 bit integer. We assume x, a, and b are arrays of 32 bit integers. uint32_t multiply_mod_prime_mersenne_overflow_high_bitcount(uint32_t * x, uint32_t * a, uint32_t * b, char l) { // x is a 64 bit integer given as a 2 long array of 32 bit integers // a is a 89 bit integer given as a 3 long array of 32 bit integers // b is a 89 bit integer given as a 3 long array of 32 bit integers // ax: array to hold the sub-calculations of a*x // there are 2*3 sub-calculations with each result being split into the least significant 32 bits and the most significant 32 bits uint32_t ax[12]; // 12 = 2*3*2 for (size_t i = 0; i < 2; i++) { // index x for (size_t ii = 0; ii < 3; ii++) { // index a uint64_t tmp = (uint64_t)x[i] * (uint64_t)a[ii]; ax[6*i+2*ii] = (uint32_t)(tmp & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones ax[6*i+2*ii+1] = (uint32_t)(tmp >> 32); } } // calculate y = ax+b uint32_t * y = new uint32_t[5]; uint64_t sum = 0; uint32_t carry = 0; // calculate bits 0-32 sum = (uint64_t)ax[10]+(uint64_t)b[2]; carry = (uint32_t)(sum >> 32); y[4] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // calculate bits 33-64 sum = (uint64_t)carry + (uint64_t)ax[11] + (uint64_t)ax[8] + (uint64_t)ax[4] + (uint64_t)b[1]; carry = (uint32_t)(sum >> 32); y[3] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // calculate bits 65-96 sum = (uint64_t)carry + (uint64_t)ax[9] + (uint64_t)ax[5] + (uint64_t)ax[2] + (uint64_t)ax[6] + (uint64_t)b[0]; carry = (uint32_t)(sum >> 32); y[2] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // calculate bits 97-128 sum = (uint64_t)carry + (uint64_t)ax[3] + (uint64_t)ax[7] + (uint64_t)ax[0]; carry = (uint32_t)(sum >> 32); y[1] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // calculate bits 129-160 y[0] = ax[1]; //// calculate modulo p for p=2^89-1 // y&p // we take the 89 first bits uint32_t yandp[3]; yandp[2] = y[4]; yandp[1] = y[3]; yandp[0] = (y[2] & 0b11111'11111'11111'11111'11111); // 25 ones // y>>q // we bitshift 89 times, so we only keep the 5*32 - 89 = 71 most significant bits uint32_t yshiftq[3]; yshiftq[2] = (y[2] >> 25) // keep 7 bits | (y[1] << 7); // keep 25 bits yshiftq[1] = (y[1] >> 25) | (y[0] << 7); yshiftq[0] = y[0] >> 25; // y = (y&p) + (y >> q) // bits 0-32 sum = (uint64_t)yandp[2] + (uint64_t)yshiftq[2]; carry = (uint32_t)(sum >> 32); y[4] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // bits 33-64 sum = (uint64_t)carry + (uint64_t)yandp[1] + (uint64_t)yshiftq[1]; carry = (uint32_t)(sum >> 32); y[3] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // bits 65-71 y[2] = (uint64_t)carry + (uint64_t)yandp[0] + (uint64_t)yshiftq[0]; y[1] = 0; // y[0] = 0; //// if y >= p; y -= p // y >= p if bits 90 to 96 are != 0 (actually if bit 90 is set, but this is prettier) if ((y[2] >> 25) != 0) { // subtracting 2^89-1 is equal to subtracting 2^89 and adding 1. // - 2^89 y[2] = y[2] & 0b1'1111'1111'1111'1111'1111'1111; // 25 ones. We know that bit 91 to 96 are 0 // + 1 for (size_t i = 4; i > 1; i--) { y[i] += 1; if (y[i] != 0) break; } } // mod 2^l (mod m) return (y[4] >> (32-l)); } uint64_t multiply_shift_c_universal(uint32_t x, uint64_t a, char l) { return (a*x) >> (64-l); } uint64_t multiply_shift_strongly_universal(uint32_t x, uint64_t a, uint64_t b, char l) { return (a*x+b) >> (64-l); } uint64_t multiply_shift_vector(uint32_t * x, uint64_t * seed, size_t d, char l) { uint64_t val = 0; for (size_t i = 0; i < d; i++) val += seed[i]*x[i]; return (val + seed[d-1]) >> (64-l); } // requires x to be of size D and the seed to be of size D uint64_t multiply_shift_string(const My_string * string, const uint64_t * seed, uint64_t * x, size_t l) { size_t d = (string->size+7) >> 3; // d = ceil(string->size/8) x[d-1] = 0; memcpy(x, string->chars, string->size*sizeof(string->chars[0])); uint64_t val = 0; for (size_t i = 0; i < d; i++) val += (seed[2*i]+(uint32_t)(x[i]>>32))*(seed[2*i+1]+(uint32_t)x[i]); return (val + seed[d]) >> (64-l); } // calculate (ax+b) mod p // p=2^89-1 is a Mersenne prime. p > a. p > x. a and x are size 3 arrays of uint32_t. // b is a 64 bit integer given as a size 2 array of uint32_t. // The result is saved in x. void high_bitcount_ax_b_mod_p(uint32_t * x, uint32_t * a, uint32_t * b) { // x, a, and b are 89 bit integers given as a 3 long arrays of 32 bit integers. // ax: array to hold the sub-calculations of a*x // there are 3*3 sub-calculations with each result being split into the least significant 32 bits and the most significant 32 bits size_t ax_size = 18; // 18 = 3*3*2 uint32_t ax[ax_size]; for (size_t i = 0; i < 3; i++) { // index x for (size_t ii = 0; ii < 3; ii++) { // index a uint64_t tmp = (uint64_t)x[i] * (uint64_t)a[ii]; ax[6*i+2*ii] = (uint32_t)(tmp & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones ax[6*i+2*ii+1] = (uint32_t)(tmp >> 32); } } // calculate y = ax+b size_t y_size = 6; // 6 = ceil( (89+89+1)/32 ) uint32_t * y = new uint32_t[y_size]; uint64_t sum = 0; uint32_t carry = 0; y[0] = ax[0]+b[0]; for (size_t index = 1; index < y_size-1; index++) { sum = 0; for (size_t i = 0; i <= index; i++) { size_t ii = index - i; sum += ax[6*i+2*ii]; } for (size_t i = 0; i < index; i++) { size_t ii = index - i - 1; sum += ax[6*i+2*ii+1]; } if (index < 2) { sum += b[index]; } carry = (uint32_t)(sum >> 32); y[index] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones } y[y_size-1] = carry+ax[ax_size-1]; //// calculate modulo p for p=2^89-1 // y&p // we take the 89 first bits uint32_t yandp[3]; yandp[0] = y[0]; yandp[1] = y[1]; yandp[2] = (y[2] & 0b11111'11111'11111'11111'11111); // 25 ones // y>>q // we bitshift 89 times, and keep the 89 following bits uint32_t yshiftq[3]; yshiftq[0] = (y[2] >> 25) // keep 7 bits | (y[3] << 7); // keep 25 bits yshiftq[1] = (y[3] >> 25) | (y[4] << 7); yshiftq[2] = (y[4] >> 25) // keep 7 bits | (y[5] << 7); // keep 25 bits, but only 18 of them can be nonzero in practice. // y = (y&p) + (y >> q) // bits 0-32 sum = (uint64_t)yandp[0] + (uint64_t)yshiftq[0]; carry = (uint32_t)(sum >> 32); y[0] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // bits 33-64 sum = (uint64_t)carry + (uint64_t)yandp[1] + (uint64_t)yshiftq[1]; carry = (uint32_t)(sum >> 32); y[1] = (uint32_t)(sum & 0b1111'1111'1111'1111'1111'1111'1111'1111); // 32 ones // bits 65-71 y[2] = (uint64_t)carry + (uint64_t)yandp[2] + (uint64_t)yshiftq[2]; // y[3] = 0; // we don't use it anyway // y[4] = 0; // y[5] = 0; //// if y >= p; y -= p // y >= p if bits 90 to 96 are != 0 (actually if bit 90 is set, but this is prettier) if ((y[2] << 25) != 0) { // subtracting 2^89-1 is equal to subtracting 2^89 and adding 1. // - 2^89 y[2] = y[2] & 0b1'1111'1111'1111'1111'1111'1111; // 25 ones. We know that bit 91 to 96 are 0 // + 1 for (size_t i = 0; i < 3; i++) { y[i] += 1; if (y[i] != 0) break; } } x[0] = y[0]; x[1] = y[1]; x[2] = y[2]; return; } // p=2^89-1 is a Mersenne prime. // p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers. // We assume a, b, and c are size 3 arrays of 32 bit integers. uint32_t polynomial_vector(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l) { uint32_t H[3]; H[0] = x[0]; H[1] = x[1]; for (size_t i = 1; i < d; i++) { high_bitcount_ax_b_mod_p(H, c, x+2*i); } high_bitcount_ax_b_mod_p(H, a, b); return H[0] >> (32-l); } // p=2^89-1 is a Mersenne prime. // p > a >= 0. p > b >= 0. p > c >= 0. x is a size 2d list of 64 bit integers split into 32 bit integers. // We assume a, b, and c are size 3 arrays of 32 bit integers. // we assume the seed to be of size 4 (at minimum). uint32_t polynomial_vector_tuned(uint32_t * x, uint32_t * a, uint32_t * b, uint32_t * c, size_t d, char l, const uint64_t * seed) { size_t x_remainder = d - ((d >> 2) << 2); // abuse integer division and multiplication (via bitshifts) as modulo size_t x_tuned_size = (d >> 2) + 1; uint32_t x_tuned[x_tuned_size]; // prehash chunks of x using the bounded string algorithm uint64_t buffer_memory[4]; char word[256]; for (size_t i = 0; i <= (d >> 4) - 1; i++ ) { // d 64 bit integers -> d/4 256 char strings memcpy(word, x + i*4, 256*sizeof(word[0])); My_string str = My_string(word, 256); x_tuned[i] = (uint32_t) multiply_shift_string(&str, seed, buffer_memory, 32); } // prehash the leftovers if (x_remainder != 0) { memcpy(word, x + d-x_remainder, (x_remainder << 3)*sizeof(word[0])); My_string str = My_string(word, (x_remainder << 3)); x_tuned[x_tuned_size-1] = (uint32_t) multiply_shift_string(&str, seed, buffer_memory, 32); } return polynomial_vector(x_tuned, a, b, c, x_tuned_size, l); }