#include "umash.h"
#if !defined(UMASH_TEST_ONLY) && !defined(NDEBUG)
#define NDEBUG
#endif
#include <assert.h>
#include <string.h>
#ifdef __PCLMUL__
#include <immintrin.h>
typedef __m128i v128;
#define V128_ZERO { 0 };
static inline v128
v128_create(uint64_t lo, uint64_t hi)
{
return _mm_set_epi64x(hi, lo);
}
static inline v128
v128_shift(v128 x)
{
return _mm_add_epi64(x, x);
}
static inline v128
v128_clmul(uint64_t x, uint64_t y)
{
return _mm_clmulepi64_si128(_mm_cvtsi64_si128(x), _mm_cvtsi64_si128(y), 0);
}
static inline v128
v128_clmul_cross(v128 x)
{
return _mm_clmulepi64_si128(x, x, 1);
}
#elif defined(__ARM_FEATURE_CRYPTO)
#include <arm_neon.h>
typedef uint64x2_t v128;
#define V128_ZERO { 0 };
static inline v128
v128_create(uint64_t lo, uint64_t hi)
{
return vcombine_u64(vcreate_u64(lo), vcreate_u64(hi));
}
static inline v128
v128_shift(v128 x)
{
return vshlq_n_u64(x, 1);
}
static inline v128
v128_clmul(uint64_t x, uint64_t y)
{
return vreinterpretq_u64_p128(vmull_p64(x, y));
}
static inline v128
v128_clmul_cross(v128 x)
{
return v128_clmul(vgetq_lane_u64(x, 0), vgetq_lane_u64(x, 1));
}
#else
#error \
"Unsupported platform: umash requires CLMUL (-mpclmul) on x86-64, or crypto (-march=...+crypto) extensions on aarch64."
#endif
#if defined(UMASH_STAP_PROBE) && UMASH_STAP_PROBE
#include <sys/sdt.h>
#else
#define DTRACE_PROBE1(lib, name, a0)
#define DTRACE_PROBE2(lib, name, a0, a1)
#define DTRACE_PROBE3(lib, name, a0, a1, a2)
#define DTRACE_PROBE4(lib, name, a0, a1, a2, a3)
#endif
#if defined(UMASH_SECTION) && defined(__GNUC__)
#define FN __attribute__((__section__(UMASH_SECTION)))
#else
#define FN
#endif
#ifdef UMASH_TEST_ONLY
#define TEST_DEF FN
#include "t/umash_test_only.h"
#else
#define TEST_DEF static FN
#endif
#ifdef __GNUC__
#define LIKELY(X) __builtin_expect(!!(X), 1)
#define UNLIKELY(X) __builtin_expect(!!(X), 0)
#else
#define LIKELY(X) X
#define UNLIKELY(X) X
#endif
#define ARRAY_SIZE(ARR) (sizeof(ARR) / sizeof(ARR[0]))
#define BLOCK_SIZE (sizeof(uint64_t) * UMASH_OH_PARAM_COUNT)
#define OH_SHORT_HASH_SHIFT 4
#define INCREMENTAL_GRANULARITY 16
TEST_DEF inline uint64_t
add_mod_fast(uint64_t x, uint64_t y)
{
unsigned long long sum;
return (__builtin_uaddll_overflow(x, y, &sum) ? sum + 8 : sum);
}
static FN uint64_t
add_mod_slow_slow_path(uint64_t sum, uint64_t fixup)
{
sum = (sum >= (uint64_t)-8) ? sum + 8 : sum;
sum += fixup;
sum = (sum >= (uint64_t)-8) ? sum + 8 : sum;
return sum;
}
TEST_DEF inline uint64_t
add_mod_slow(uint64_t x, uint64_t y)
{
unsigned long long sum;
uint64_t fixup = 0;
if (__builtin_uaddll_overflow(x, y, &sum))
fixup = 8;
if (LIKELY(sum < (uint64_t)-16))
return sum + fixup;
return add_mod_slow_slow_path(sum, fixup);
}
TEST_DEF inline uint64_t
mul_mod_fast(uint64_t m, uint64_t x)
{
__uint128_t product = m;
product *= x;
return add_mod_fast((uint64_t)product, 8 * (uint64_t)(product >> 64));
}
TEST_DEF inline uint64_t
horner_double_update(uint64_t acc, uint64_t m0, uint64_t m1, uint64_t x, uint64_t y)
{
acc = add_mod_fast(acc, x);
return add_mod_slow(mul_mod_fast(m0, acc), mul_mod_fast(m1, y));
}
static inline uint32_t
rotate(uint32_t u, int c)
{
return (u << c) | (u >> (32 - c));
}
static inline uint32_t
load_littleendian(const void *buf)
{
uint32_t ret = 0;
uint8_t x[4];
memcpy(x, buf, sizeof(x));
for (size_t i = 0; i < 4; i++)
ret |= (uint32_t)x[i] << (8 * i);
return ret;
}
static inline void
store_littleendian(void *dst, uint32_t u)
{
for (size_t i = 0; i < 4; i++) {
uint8_t lo = u;
memcpy(dst, &lo, 1);
u >>= 8;
dst = (char *)dst + 1;
}
return;
}
static FN void
core_salsa20(char *out, const uint8_t in[static 16], const uint8_t key[static 32],
const uint8_t constant[16])
{
enum { ROUNDS = 20 };
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
j0 = x0 = load_littleendian(constant + 0);
j1 = x1 = load_littleendian(key + 0);
j2 = x2 = load_littleendian(key + 4);
j3 = x3 = load_littleendian(key + 8);
j4 = x4 = load_littleendian(key + 12);
j5 = x5 = load_littleendian(constant + 4);
j6 = x6 = load_littleendian(in + 0);
j7 = x7 = load_littleendian(in + 4);
j8 = x8 = load_littleendian(in + 8);
j9 = x9 = load_littleendian(in + 12);
j10 = x10 = load_littleendian(constant + 8);
j11 = x11 = load_littleendian(key + 16);
j12 = x12 = load_littleendian(key + 20);
j13 = x13 = load_littleendian(key + 24);
j14 = x14 = load_littleendian(key + 28);
j15 = x15 = load_littleendian(constant + 12);
for (size_t i = 0; i < ROUNDS; i += 2) {
x4 ^= rotate(x0 + x12, 7);
x8 ^= rotate(x4 + x0, 9);
x12 ^= rotate(x8 + x4, 13);
x0 ^= rotate(x12 + x8, 18);
x9 ^= rotate(x5 + x1, 7);
x13 ^= rotate(x9 + x5, 9);
x1 ^= rotate(x13 + x9, 13);
x5 ^= rotate(x1 + x13, 18);
x14 ^= rotate(x10 + x6, 7);
x2 ^= rotate(x14 + x10, 9);
x6 ^= rotate(x2 + x14, 13);
x10 ^= rotate(x6 + x2, 18);
x3 ^= rotate(x15 + x11, 7);
x7 ^= rotate(x3 + x15, 9);
x11 ^= rotate(x7 + x3, 13);
x15 ^= rotate(x11 + x7, 18);
x1 ^= rotate(x0 + x3, 7);
x2 ^= rotate(x1 + x0, 9);
x3 ^= rotate(x2 + x1, 13);
x0 ^= rotate(x3 + x2, 18);
x6 ^= rotate(x5 + x4, 7);
x7 ^= rotate(x6 + x5, 9);
x4 ^= rotate(x7 + x6, 13);
x5 ^= rotate(x4 + x7, 18);
x11 ^= rotate(x10 + x9, 7);
x8 ^= rotate(x11 + x10, 9);
x9 ^= rotate(x8 + x11, 13);
x10 ^= rotate(x9 + x8, 18);
x12 ^= rotate(x15 + x14, 7);
x13 ^= rotate(x12 + x15, 9);
x14 ^= rotate(x13 + x12, 13);
x15 ^= rotate(x14 + x13, 18);
}
x0 += j0;
x1 += j1;
x2 += j2;
x3 += j3;
x4 += j4;
x5 += j5;
x6 += j6;
x7 += j7;
x8 += j8;
x9 += j9;
x10 += j10;
x11 += j11;
x12 += j12;
x13 += j13;
x14 += j14;
x15 += j15;
store_littleendian(out + 0, x0);
store_littleendian(out + 4, x1);
store_littleendian(out + 8, x2);
store_littleendian(out + 12, x3);
store_littleendian(out + 16, x4);
store_littleendian(out + 20, x5);
store_littleendian(out + 24, x6);
store_littleendian(out + 28, x7);
store_littleendian(out + 32, x8);
store_littleendian(out + 36, x9);
store_littleendian(out + 40, x10);
store_littleendian(out + 44, x11);
store_littleendian(out + 48, x12);
store_littleendian(out + 52, x13);
store_littleendian(out + 56, x14);
store_littleendian(out + 60, x15);
return;
}
TEST_DEF void
salsa20_stream(
void *dst, size_t len, const uint8_t nonce[static 8], const uint8_t key[static 32])
{
static const uint8_t sigma[16] = "expand 32-byte k";
uint8_t in[16];
if (len == 0)
return;
memcpy(in, nonce, 8);
memset(in + 8, 0, 8);
while (len >= 64) {
unsigned int u;
core_salsa20(dst, in, key, sigma);
u = 1;
for (size_t i = 8; i < 16; i++) {
u += in[i];
in[i] = u;
u >>= 8;
}
dst = (char *)dst + 64;
len -= 64;
}
if (len > 0) {
char block[64];
core_salsa20(block, in, key, sigma);
memcpy(dst, block, len);
}
return;
}
TEST_DEF struct umash_oh
oh_one_block(const uint64_t *params, uint64_t tag, const void *block)
{
struct umash_oh ret;
v128 acc = V128_ZERO;
size_t i;
for (i = 0; i < UMASH_OH_PARAM_COUNT - 2; i += 2) {
v128 x, k;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&k, ¶ms[i], sizeof(k));
x ^= k;
acc ^= v128_clmul_cross(x);
}
memcpy(&ret, &acc, sizeof(ret));
{
__uint128_t enh = (__uint128_t)tag << 64;
uint64_t x, y;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&y, block, sizeof(y));
x += params[i];
y += params[i + 1];
enh += (__uint128_t)x * y;
ret.bits[0] ^= (uint64_t)enh;
ret.bits[1] ^= (uint64_t)(enh >> 64) ^ (uint64_t)enh;
}
return ret;
}
TEST_DEF void
oh_one_block_fprint(struct umash_oh dst[static 2], const uint64_t *restrict params,
uint64_t tag, const void *restrict block)
{
v128 acc = V128_ZERO;
v128 acc_shifted = V128_ZERO;
v128 lrc;
v128 prev = V128_ZERO;
size_t i;
lrc = v128_create(params[UMASH_OH_PARAM_COUNT], params[UMASH_OH_PARAM_COUNT + 1]);
for (i = 0; i < UMASH_OH_PARAM_COUNT - 2; i += 2) {
v128 x, k;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&k, ¶ms[i], sizeof(k));
x ^= k;
lrc ^= x;
x = v128_clmul_cross(x);
acc ^= x;
acc_shifted ^= prev;
acc_shifted = v128_shift(acc_shifted);
prev = x;
}
{
v128 x, k;
memcpy(&x, block, sizeof(x));
memcpy(&k, ¶ms[i], sizeof(k));
lrc ^= x ^ k;
}
acc_shifted ^= acc;
acc_shifted = v128_shift(acc_shifted);
acc_shifted ^= v128_clmul_cross(lrc);
memcpy(&dst[0], &acc, sizeof(dst[0]));
memcpy(&dst[1], &acc_shifted, sizeof(dst[0]));
{
__uint128_t enh;
uint64_t x, y, kx, ky;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&y, block, sizeof(y));
enh = (__uint128_t)tag << 64;
kx = x + params[i];
ky = y + params[i + 1];
enh += (__uint128_t)kx * ky;
enh ^= enh << 64;
dst[0].bits[0] ^= (uint64_t)enh;
dst[0].bits[1] ^= (uint64_t)(enh >> 64);
dst[1].bits[0] ^= (uint64_t)enh;
dst[1].bits[1] ^= (uint64_t)(enh >> 64);
}
return;
}
TEST_DEF struct umash_oh
oh_last_block(const uint64_t *params, uint64_t tag, const void *block, size_t n_bytes)
{
struct umash_oh ret;
v128 acc = V128_ZERO;
size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128));
size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t);
const void *last_ptr = (const char *)block + n_bytes - sizeof(v128);
size_t i;
for (i = 0; i < end_full_pairs; i += 2) {
v128 x, k;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&k, ¶ms[i], sizeof(k));
x ^= k;
acc ^= v128_clmul_cross(x);
}
memcpy(&ret, &acc, sizeof(ret));
{
__uint128_t enh = (__uint128_t)tag << 64;
uint64_t x, y;
memcpy(&x, last_ptr, sizeof(x));
last_ptr = (const char *)last_ptr + sizeof(x);
memcpy(&y, last_ptr, sizeof(y));
x += params[i];
y += params[i + 1];
enh += (__uint128_t)x * y;
ret.bits[0] ^= (uint64_t)enh;
ret.bits[1] ^= (uint64_t)(enh >> 64) ^ (uint64_t)enh;
}
return ret;
}
TEST_DEF void
oh_last_block_fprint(struct umash_oh dst[static 2], const uint64_t *restrict params,
uint64_t tag, const void *restrict block, size_t n_bytes)
{
v128 acc = V128_ZERO;
v128 acc_shifted = V128_ZERO;
v128 lrc;
v128 prev = V128_ZERO;
size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128));
size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t);
const void *last_ptr = (const char *)block + n_bytes - sizeof(v128);
size_t i;
lrc = v128_create(params[UMASH_OH_PARAM_COUNT], params[UMASH_OH_PARAM_COUNT + 1]);
for (i = 0; i < end_full_pairs; i += 2) {
v128 x, k;
memcpy(&x, block, sizeof(x));
block = (const char *)block + sizeof(x);
memcpy(&k, ¶ms[i], sizeof(k));
x ^= k;
lrc ^= x;
x = v128_clmul_cross(x);
acc ^= x;
acc_shifted ^= prev;
acc_shifted = v128_shift(acc_shifted);
prev = x;
}
{
v128 x, k;
memcpy(&x, last_ptr, sizeof(x));
memcpy(&k, ¶ms[end_full_pairs], sizeof(k));
lrc ^= x ^ k;
}
acc_shifted ^= acc;
acc_shifted = v128_shift(acc_shifted);
acc_shifted ^= v128_clmul_cross(lrc);
memcpy(&dst[0], &acc, sizeof(dst[0]));
memcpy(&dst[1], &acc_shifted, sizeof(dst[0]));
{
__uint128_t enh;
uint64_t x, y, kx, ky;
memcpy(&x, last_ptr, sizeof(x));
last_ptr = (const char *)last_ptr + sizeof(x);
memcpy(&y, last_ptr, sizeof(y));
enh = (__uint128_t)tag << 64;
kx = x + params[end_full_pairs];
ky = y + params[end_full_pairs + 1];
enh += (__uint128_t)kx * ky;
enh ^= enh << 64;
dst[0].bits[0] ^= (uint64_t)enh;
dst[0].bits[1] ^= (uint64_t)(enh >> 64);
dst[1].bits[0] ^= (uint64_t)enh;
dst[1].bits[1] ^= (uint64_t)(enh >> 64);
}
return;
}
static inline const void *
select_ptr(bool cond, const void *then, const void *otherwise)
{
const char *ret;
#ifdef __GNUC__
__asm__("" ::"r"(then), "r"(otherwise));
#endif
ret = (cond) ? then : otherwise;
#ifdef __GNUC__
__asm__("" : "+r"(ret));
#endif
return ret;
}
TEST_DEF inline uint64_t
vec_to_u64(const void *data, size_t n_bytes)
{
const char zeros[2] = { 0 };
uint32_t hi, lo;
if (LIKELY(n_bytes >= sizeof(lo))) {
memcpy(&lo, data, sizeof(lo));
memcpy(&hi, (const char *)data + n_bytes - sizeof(hi), sizeof(hi));
} else {
uint16_t word;
uint8_t byte;
memcpy(&byte, select_ptr(n_bytes & 1, data, zeros), 1);
lo = byte;
memcpy(&word,
select_ptr(n_bytes & 2, (const char *)data + n_bytes - 2, zeros), 2);
hi = word;
}
return ((uint64_t)hi << 32) | (lo + hi);
}
TEST_DEF uint64_t
umash_short(const uint64_t *params, uint64_t seed, const void *data, size_t n_bytes)
{
uint64_t h;
seed += params[n_bytes];
h = vec_to_u64(data, n_bytes);
h ^= h >> 30;
h *= 0xbf58476d1ce4e5b9ULL;
h = (h ^ seed) ^ (h >> 27);
h *= 0x94d049bb133111ebULL;
h ^= h >> 31;
return h;
}
static FN struct umash_fp
umash_fp_short(const uint64_t *params, uint64_t seed, const void *data, size_t n_bytes)
{
struct umash_fp ret;
uint64_t h;
ret.hash[0] = seed + params[n_bytes];
ret.hash[1] = seed + params[n_bytes + OH_SHORT_HASH_SHIFT];
h = vec_to_u64(data, n_bytes);
h ^= h >> 30;
h *= 0xbf58476d1ce4e5b9ULL;
h ^= h >> 27;
#define TAIL(i) \
do { \
ret.hash[i] ^= h; \
ret.hash[i] *= 0x94d049bb133111ebULL; \
ret.hash[i] ^= ret.hash[i] >> 31; \
} while (0)
TAIL(0);
TAIL(1);
#undef TAIL
return ret;
}
static inline uint64_t
rotl64(uint64_t x, int n)
{
return (x << n) | (x >> (64 - n));
}
TEST_DEF inline uint64_t
finalize(uint64_t x)
{
return (x ^ rotl64(x, 8)) ^ rotl64(x, 33);
}
TEST_DEF uint64_t
umash_medium(const uint64_t multipliers[static 2], const uint64_t *oh, uint64_t seed,
const void *data, size_t n_bytes)
{
union {
__uint128_t h;
uint64_t u64[2];
} acc = { .h = (__uint128_t)(seed ^ n_bytes) << 64 };
{
uint64_t x, y;
memcpy(&x, data, sizeof(x));
memcpy(&y, (const char *)data + n_bytes - sizeof(y), sizeof(y));
x += oh[0];
y += oh[1];
acc.h += (__uint128_t)x * y;
}
acc.u64[1] ^= acc.u64[0];
return finalize(horner_double_update(
0, multipliers[0], multipliers[1], acc.u64[0], acc.u64[1]));
}
static FN struct umash_fp
umash_fp_medium(const uint64_t multipliers[static 2][2], const uint64_t *oh,
uint64_t seed, const void *data, size_t n_bytes)
{
struct umash_fp ret;
const uint64_t offset = seed ^ n_bytes;
union {
__uint128_t h;
uint64_t u64[2];
} hash;
union {
v128 v;
uint64_t u64[2];
} mixed_lrc;
uint64_t lrc[2] = { oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1] };
uint64_t x, y;
uint64_t a, b;
memcpy(&x, data, sizeof(x));
memcpy(&y, (const char *)data + n_bytes - sizeof(y), sizeof(y));
a = oh[0];
b = oh[1];
lrc[0] ^= x ^ a;
lrc[1] ^= y ^ b;
mixed_lrc.v = v128_clmul(lrc[0], lrc[1]);
hash.h = (__uint128_t)offset << 64;
a += x;
b += y;
hash.h += (__uint128_t)a * b;
hash.u64[1] ^= hash.u64[0];
ret.hash[0] = finalize(horner_double_update(
0, multipliers[0][0], multipliers[0][1], hash.u64[0], hash.u64[1]));
ret.hash[1] =
finalize(horner_double_update(0, multipliers[1][0], multipliers[1][1],
hash.u64[0] ^ mixed_lrc.u64[0], hash.u64[1] ^ mixed_lrc.u64[1]));
return ret;
}
TEST_DEF uint64_t
umash_long(const uint64_t multipliers[static 2], const uint64_t *oh, uint64_t seed,
const void *data, size_t n_bytes)
{
uint64_t acc = 0;
while (n_bytes > BLOCK_SIZE) {
struct umash_oh compressed;
compressed = oh_one_block(oh, seed, data);
data = (const char *)data + BLOCK_SIZE;
n_bytes -= BLOCK_SIZE;
acc = horner_double_update(acc, multipliers[0], multipliers[1],
compressed.bits[0], compressed.bits[1]);
}
{
struct umash_oh compressed;
seed ^= (uint8_t)n_bytes;
compressed = oh_last_block(oh, seed, data, n_bytes);
acc = horner_double_update(acc, multipliers[0], multipliers[1],
compressed.bits[0], compressed.bits[1]);
}
return finalize(acc);
}
static FN struct umash_fp
umash_fp_long(const uint64_t multipliers[static 2][2], const uint64_t *oh, uint64_t seed,
const void *data, size_t n_bytes)
{
struct umash_oh compressed[2];
struct umash_fp ret;
uint64_t acc[2] = { 0, 0 };
while (n_bytes > BLOCK_SIZE) {
oh_one_block_fprint(compressed, oh, seed, data);
#define UPDATE(i) \
do { \
acc[i] = horner_double_update(acc[i], multipliers[i][0], \
multipliers[i][1], compressed[i].bits[0], compressed[i].bits[1]); \
} while (0)
UPDATE(0);
UPDATE(1);
#undef UPDATE
data = (const char *)data + BLOCK_SIZE;
n_bytes -= BLOCK_SIZE;
}
oh_last_block_fprint(compressed, oh, seed ^ (uint8_t)n_bytes, data, n_bytes);
#define FINAL(i) \
do { \
acc[i] = horner_double_update(acc[i], multipliers[i][0], \
multipliers[i][1], compressed[i].bits[0], compressed[i].bits[1]); \
ret.hash[i] = finalize(acc[i]); \
} while (0)
FINAL(0);
FINAL(1);
#undef FINAL
return ret;
}
static FN bool
value_is_repeated(const uint64_t *values, size_t n, uint64_t needle)
{
for (size_t i = 0; i < n; i++) {
if (values[i] == needle)
return true;
}
return false;
}
FN bool
umash_params_prepare(struct umash_params *params)
{
static const uint64_t modulo = (1UL << 61) - 1;
uint64_t buf[] = { params->poly[0][0], params->poly[1][0] };
size_t buf_idx = 0;
#define GET_RANDOM(DST) \
do { \
if (buf_idx >= ARRAY_SIZE(buf)) \
return false; \
\
(DST) = buf[buf_idx++]; \
} while (0)
for (size_t i = 0; i < ARRAY_SIZE(params->poly); i++) {
uint64_t f = params->poly[i][1];
while (true) {
f &= (1UL << 61) - 1;
if (f != 0 && f < modulo)
break;
GET_RANDOM(f);
}
params->poly[i][0] = mul_mod_fast(f, f) % modulo;
params->poly[i][1] = f;
}
for (size_t i = 0; i < ARRAY_SIZE(params->oh); i++) {
while (value_is_repeated(params->oh, i, params->oh[i]))
GET_RANDOM(params->oh[i]);
}
return true;
}
FN void
umash_params_derive(struct umash_params *params, uint64_t bits, const void *key)
{
uint8_t umash_key[32] = "Do not use UMASH VS adversaries.";
if (key != NULL)
memcpy(umash_key, key, sizeof(umash_key));
while (true) {
uint8_t nonce[8];
for (size_t i = 0; i < 8; i++)
nonce[i] = bits >> (8 * i);
salsa20_stream(params, sizeof(*params), nonce, umash_key);
if (umash_params_prepare(params))
return;
bits++;
}
}
static FN void
sink_update_poly(struct umash_sink *sink)
{
uint64_t oh0, oh1;
oh0 = sink->oh_acc.bits[0];
oh1 = sink->oh_acc.bits[1];
sink->poly_state[0].acc = horner_double_update(sink->poly_state[0].acc,
sink->poly_state[0].mul[0], sink->poly_state[0].mul[1], oh0, oh1);
sink->oh_acc = (struct umash_oh) { .bits = { 0 } };
if (sink->hash_wanted == 0)
return;
oh0 = sink->oh_twisted.acc.bits[0];
oh1 = sink->oh_twisted.acc.bits[1];
sink->poly_state[1].acc = horner_double_update(sink->poly_state[1].acc,
sink->poly_state[1].mul[0], sink->poly_state[1].mul[1], oh0, oh1);
sink->oh_twisted =
(struct umash_twisted_oh) { .lrc = { sink->oh[UMASH_OH_PARAM_COUNT],
sink->oh[UMASH_OH_PARAM_COUNT + 1] } };
return;
}
static FN void
sink_consume_buf(
struct umash_sink *sink, const char buf[static INCREMENTAL_GRANULARITY], bool final)
{
const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY;
const size_t param = sink->oh_iter;
const uint64_t k0 = sink->oh[param];
const uint64_t k1 = sink->oh[param + 1];
uint64_t x, y;
memcpy(&x, buf, sizeof(x));
memcpy(&y, buf + sizeof(x), sizeof(y));
if (sink->oh_iter < UMASH_OH_PARAM_COUNT - 2 && !final) {
v128 acc, h, twisted_acc, prev;
uint64_t m0, m1;
m0 = x ^ k0;
m1 = y ^ k1;
memcpy(&acc, &sink->oh_acc, sizeof(acc));
h = v128_clmul(m0, m1);
acc ^= h;
memcpy(&sink->oh_acc, &acc, sizeof(acc));
if (sink->hash_wanted == 0)
goto next;
sink->oh_twisted.lrc[0] ^= m0;
sink->oh_twisted.lrc[1] ^= m1;
memcpy(&twisted_acc, &sink->oh_twisted.acc, sizeof(twisted_acc));
memcpy(&prev, sink->oh_twisted.prev, sizeof(prev));
twisted_acc ^= prev;
twisted_acc = v128_shift(twisted_acc);
memcpy(&sink->oh_twisted.acc, &twisted_acc, sizeof(twisted_acc));
memcpy(&sink->oh_twisted.prev, &h, sizeof(h));
} else {
__uint128_t enh;
uint64_t tag = sink->seed ^ (uint8_t)(sink->block_size + sink->bufsz);
enh = (__uint128_t)tag << 64;
enh += (__uint128_t)(x + k0) * (y + k1);
enh ^= enh << 64;
if (sink->hash_wanted != 0) {
union {
v128 vec;
uint64_t h[2];
} lrc_hash;
uint64_t lrc0, lrc1;
uint64_t oh0, oh1;
uint64_t oh_twisted0, oh_twisted1;
lrc0 = sink->oh_twisted.lrc[0] ^ x ^ k0;
lrc1 = sink->oh_twisted.lrc[1] ^ y ^ k1;
lrc_hash.vec = v128_clmul(lrc0, lrc1);
oh_twisted0 = sink->oh_twisted.acc.bits[0];
oh_twisted1 = sink->oh_twisted.acc.bits[1];
oh0 = sink->oh_acc.bits[0];
oh1 = sink->oh_acc.bits[1];
oh0 ^= oh_twisted0;
oh0 <<= 1;
oh1 ^= oh_twisted1;
oh1 <<= 1;
oh0 ^= lrc_hash.h[0];
oh1 ^= lrc_hash.h[1];
sink->oh_twisted.acc.bits[0] = oh0 ^ (uint64_t)enh;
sink->oh_twisted.acc.bits[1] = oh1 ^ (uint64_t)(enh >> 64);
}
sink->oh_acc.bits[0] ^= (uint64_t)enh;
sink->oh_acc.bits[1] ^= (uint64_t)(enh >> 64);
}
next:
memmove(&sink->buf, buf, buf_begin);
sink->block_size += sink->bufsz;
sink->bufsz = 0;
sink->oh_iter += 2;
if (sink->oh_iter == UMASH_OH_PARAM_COUNT || final) {
sink_update_poly(sink);
sink->block_size = 0;
sink->oh_iter = 0;
}
return;
}
static FN size_t
block_sink_update(struct umash_sink *sink, const void *data, size_t n_bytes)
{
size_t consumed = 0;
(void)n_bytes;
assert(n_bytes >= BLOCK_SIZE);
assert(sink->bufsz == 0);
assert(sink->block_size == 0);
assert(sink->oh_iter == 0);
while (n_bytes > BLOCK_SIZE) {
if (sink->hash_wanted != 0) {
struct umash_oh hashes[2];
oh_one_block_fprint(hashes, sink->oh, sink->seed, data);
sink->oh_acc = hashes[0];
sink->oh_twisted.acc = hashes[1];
} else {
sink->oh_acc = oh_one_block(sink->oh, sink->seed, data);
}
sink_update_poly(sink);
consumed += BLOCK_SIZE;
data = (const char *)data + BLOCK_SIZE;
n_bytes -= BLOCK_SIZE;
}
return consumed;
}
FN void
umash_sink_update(struct umash_sink *sink, const void *data, size_t n_bytes)
{
const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY;
size_t remaining = INCREMENTAL_GRANULARITY - sink->bufsz;
DTRACE_PROBE4(libumash, umash_sink_update, sink, remaining, data, n_bytes);
if (n_bytes < remaining) {
memcpy(&sink->buf[buf_begin + sink->bufsz], data, n_bytes);
sink->bufsz += n_bytes;
return;
}
memcpy(&sink->buf[buf_begin + sink->bufsz], data, remaining);
data = (const char *)data + remaining;
n_bytes -= remaining;
sink->large_umash = true;
sink->bufsz = INCREMENTAL_GRANULARITY;
if (n_bytes == 0)
return;
sink_consume_buf(sink, sink->buf + buf_begin, false);
while (n_bytes > INCREMENTAL_GRANULARITY) {
size_t consumed;
if (sink->oh_iter == 0 && n_bytes > BLOCK_SIZE) {
consumed = block_sink_update(sink, data, n_bytes);
assert(consumed >= BLOCK_SIZE);
memcpy(sink->buf,
(const char *)data + (consumed - INCREMENTAL_GRANULARITY),
buf_begin);
} else {
consumed = INCREMENTAL_GRANULARITY;
sink->bufsz = INCREMENTAL_GRANULARITY;
sink_consume_buf(sink, data, false);
}
n_bytes -= consumed;
data = (const char *)data + consumed;
}
memcpy(&sink->buf[buf_begin], data, n_bytes);
sink->bufsz = n_bytes;
return;
}
FN uint64_t
umash_full(const struct umash_params *params, uint64_t seed, int which, const void *data,
size_t n_bytes)
{
which = (which == 0) ? 0 : 1;
DTRACE_PROBE4(libumash, umash_full, params, which, data, n_bytes);
if (which == 1) {
struct umash_fp fp;
fp = umash_fprint(params, seed, data, n_bytes);
return fp.hash[1];
}
if (LIKELY(n_bytes <= sizeof(v128))) {
if (LIKELY(n_bytes <= sizeof(uint64_t)))
return umash_short(params->oh, seed, data, n_bytes);
return umash_medium(params->poly[0], params->oh, seed, data, n_bytes);
}
return umash_long(params->poly[0], params->oh, seed, data, n_bytes);
}
FN struct umash_fp
umash_fprint(
const struct umash_params *params, uint64_t seed, const void *data, size_t n_bytes)
{
DTRACE_PROBE3(libumash, umash_fprint, params, data, n_bytes);
if (LIKELY(n_bytes <= sizeof(v128))) {
if (LIKELY(n_bytes <= sizeof(uint64_t)))
return umash_fp_short(params->oh, seed, data, n_bytes);
return umash_fp_medium(params->poly, params->oh, seed, data, n_bytes);
}
return umash_fp_long(params->poly, params->oh, seed, data, n_bytes);
}
FN void
umash_init(struct umash_state *state, const struct umash_params *params, uint64_t seed,
int which)
{
which = (which == 0) ? 0 : 1;
DTRACE_PROBE3(libumash, umash_init, state, params, which);
state->sink = (struct umash_sink) {
.poly_state[0] = {
.mul = {
params->poly[0][0],
params->poly[0][1],
},
},
.poly_state[1]= {
.mul = {
params->poly[1][0],
params->poly[1][1],
},
},
.oh = params->oh,
.hash_wanted = which,
.oh_twisted.lrc = { params->oh[UMASH_OH_PARAM_COUNT],
params->oh[UMASH_OH_PARAM_COUNT + 1] },
.seed = seed,
};
return;
}
FN void
umash_fp_init(
struct umash_fp_state *state, const struct umash_params *params, uint64_t seed)
{
DTRACE_PROBE2(libumash, umash_fp_init, state, params);
state->sink = (struct umash_sink) {
.poly_state[0] = {
.mul = {
params->poly[0][0],
params->poly[0][1],
},
},
.poly_state[1]= {
.mul = {
params->poly[1][0],
params->poly[1][1],
},
},
.oh = params->oh,
.hash_wanted = 2,
.oh_twisted.lrc = { params->oh[UMASH_OH_PARAM_COUNT],
params->oh[UMASH_OH_PARAM_COUNT + 1] },
.seed = seed,
};
return;
}
static FN void
digest_flush(struct umash_sink *sink)
{
if (sink->bufsz > 0)
sink_consume_buf(sink, &sink->buf[sink->bufsz], true);
return;
}
static FN uint64_t
digest(const struct umash_sink *sink, int index)
{
const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY;
const size_t shift = (index == 0) ? 0 : OH_SHORT_HASH_SHIFT;
if (sink->large_umash)
return finalize(sink->poly_state[index].acc);
if (sink->bufsz <= sizeof(uint64_t))
return umash_short(
&sink->oh[shift], sink->seed, &sink->buf[buf_begin], sink->bufsz);
return umash_medium(sink->poly_state[index].mul, sink->oh, sink->seed,
&sink->buf[buf_begin], sink->bufsz);
}
static FN struct umash_fp
fp_digest_sink(const struct umash_sink *sink)
{
struct umash_sink copy;
struct umash_fp ret;
const size_t buf_begin = sizeof(sink->buf) - INCREMENTAL_GRANULARITY;
if (sink->large_umash) {
copy = *sink;
digest_flush(©);
sink = ©
} else if (sink->bufsz <= sizeof(uint64_t)) {
return umash_fp_short(
sink->oh, sink->seed, &sink->buf[buf_begin], sink->bufsz);
} else {
const struct umash_params *params;
params = (const void *)((const char *)sink->oh -
__builtin_offsetof(struct umash_params, oh));
return umash_fp_medium(params->poly, sink->oh, sink->seed,
&sink->buf[buf_begin], sink->bufsz);
}
for (size_t i = 0; i < ARRAY_SIZE(ret.hash); i++)
ret.hash[i] = digest(sink, i);
return ret;
}
FN uint64_t
umash_digest(const struct umash_state *state)
{
struct umash_sink copy;
const struct umash_sink *sink = &state->sink;
DTRACE_PROBE1(libumash, umash_digest, state);
if (sink->hash_wanted == 1) {
struct umash_fp fp;
fp = fp_digest_sink(sink);
return fp.hash[1];
}
if (sink->large_umash) {
copy = *sink;
digest_flush(©);
sink = ©
}
return digest(sink, 0);
}
FN struct umash_fp
umash_fp_digest(const struct umash_fp_state *state)
{
DTRACE_PROBE1(libumash, umash_fp_digest, state);
return fp_digest_sink(&state->sink);
}