#![allow(clippy::len_zero)]
#![allow(clippy::needless_borrow)]
#![allow(clippy::identity_op)]
#![allow(unused)]
#![allow(clippy::undocumented_unsafe_blocks)]
use crate::xxh3_vendored::xxh3::Xxh3Default;
use std::hash::Hasher;
pub(crate) mod xxh32_common {
#![allow(unused)]
use core::mem;
pub const CHUNK_SIZE: usize = mem::size_of::<u32>() * 4;
pub const PRIME_1: u32 = 0x9E3779B1;
pub const PRIME_2: u32 = 0x85EBCA77;
pub const PRIME_3: u32 = 0xC2B2AE3D;
pub const PRIME_4: u32 = 0x27D4EB2F;
pub const PRIME_5: u32 = 0x165667B1;
#[inline]
pub const fn round(acc: u32, input: u32) -> u32 {
acc.wrapping_add(input.wrapping_mul(PRIME_2))
.rotate_left(13)
.wrapping_mul(PRIME_1)
}
#[inline]
pub const fn avalanche(mut input: u32) -> u32 {
input ^= input >> 15;
input = input.wrapping_mul(PRIME_2);
input ^= input >> 13;
input = input.wrapping_mul(PRIME_3);
input ^= input >> 16;
input
}
}
pub(crate) mod xxh64_common {
#![allow(unused)]
use core::mem;
pub const CHUNK_SIZE: usize = mem::size_of::<u64>() * 4;
pub const PRIME_1: u64 = 0x9E3779B185EBCA87;
pub const PRIME_2: u64 = 0xC2B2AE3D27D4EB4F;
pub const PRIME_3: u64 = 0x165667B19E3779F9;
pub const PRIME_4: u64 = 0x85EBCA77C2B2AE63;
pub const PRIME_5: u64 = 0x27D4EB2F165667C5;
#[inline]
pub const fn round(acc: u64, input: u64) -> u64 {
acc.wrapping_add(input.wrapping_mul(PRIME_2))
.rotate_left(31)
.wrapping_mul(PRIME_1)
}
#[inline]
pub const fn merge_round(mut acc: u64, val: u64) -> u64 {
acc ^= round(0, val);
acc.wrapping_mul(PRIME_1).wrapping_add(PRIME_4)
}
#[inline]
pub const fn avalanche(mut input: u64) -> u64 {
input ^= input >> 33;
input = input.wrapping_mul(PRIME_2);
input ^= input >> 29;
input = input.wrapping_mul(PRIME_3);
input ^= input >> 32;
input
}
}
pub(crate) mod xxh3_common {
use core::mem;
pub const STRIPE_LEN: usize = 64;
pub const SECRET_CONSUME_RATE: usize = 8;
pub const ACC_NB: usize = STRIPE_LEN / mem::size_of::<u64>();
pub const SECRET_MERGEACCS_START: usize = 11;
pub const SECRET_LASTACC_START: usize = 7;
pub const MID_SIZE_MAX: usize = 240;
pub const SECRET_SIZE_MIN: usize = 136;
pub const DEFAULT_SECRET_SIZE: usize = 192;
pub const DEFAULT_SECRET: [u8; DEFAULT_SECRET_SIZE] = [
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad,
0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3,
0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc,
0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6,
0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65,
0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19,
0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9,
0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31,
0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb,
0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0,
0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d,
0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f,
0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
];
#[inline(always)]
pub const fn xorshift64(value: u64, shift: u64) -> u64 {
value ^ (value >> shift)
}
#[inline(always)]
pub const fn avalanche(mut value: u64) -> u64 {
value = xorshift64(value, 37);
value = value.wrapping_mul(0x165667919E3779F9);
xorshift64(value, 32)
}
#[inline(always)]
pub const fn strong_avalanche(mut value: u64, len: u64) -> u64 {
value ^= value.rotate_left(49) ^ value.rotate_left(24);
value = value.wrapping_mul(0x9FB21C651E98DF25);
value ^= (value >> 35).wrapping_add(len);
value = value.wrapping_mul(0x9FB21C651E98DF25);
xorshift64(value, 28)
}
#[inline(always)]
pub const fn mul64_to128(left: u64, right: u64) -> (u64, u64) {
let product = left as u128 * right as u128;
(product as u64, (product >> 64) as u64)
}
#[inline(always)]
pub const fn mul128_fold64(left: u64, right: u64) -> u64 {
let (low, high) = mul64_to128(left, right);
low ^ high
}
}
pub(crate) mod utils {
use core::{mem, ptr};
#[inline(always)]
pub const fn get_aligned_chunk_ref<T: Copy>(input: &[u8], offset: usize) -> &T {
debug_assert!(mem::size_of::<T>() > 0); debug_assert!(mem::size_of::<T>() <= input.len().saturating_sub(offset));
unsafe { &*(input.as_ptr().add(offset) as *const T) }
}
#[allow(unused)]
#[inline(always)]
pub const fn get_aligned_chunk<T: Copy>(input: &[u8], offset: usize) -> T {
*get_aligned_chunk_ref(input, offset)
}
#[inline(always)]
pub fn get_unaligned_chunk<T: Copy>(input: &[u8], offset: usize) -> T {
debug_assert!(mem::size_of::<T>() > 0); debug_assert!(mem::size_of::<T>() <= input.len().saturating_sub(offset));
unsafe { ptr::read_unaligned(input.as_ptr().add(offset) as *const T) }
}
#[derive(Debug)]
pub struct Buffer<T> {
pub ptr: T,
pub len: usize,
pub offset: usize,
}
impl Buffer<*mut u8> {
#[inline(always)]
pub fn copy_from_slice(&self, src: &[u8]) {
self.copy_from_slice_by_size(src, src.len())
}
#[inline(always)]
pub fn copy_from_slice_by_size(&self, src: &[u8], len: usize) {
debug_assert!(self.len.saturating_sub(self.offset) >= len);
unsafe {
ptr::copy_nonoverlapping(src.as_ptr(), self.ptr.add(self.offset), len);
}
}
}
}
pub mod xxh3 {
use core::{hash, mem, ptr, slice};
use super::utils::{get_aligned_chunk_ref, get_unaligned_chunk, Buffer};
use super::xxh32_common as xxh32;
use super::xxh3_common::*;
use super::xxh64_common as xxh64;
#[cfg(all(
any(
target_feature = "sse2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128")
),
not(target_feature = "avx2")
))]
#[repr(align(16))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
#[cfg(target_feature = "avx2")]
#[repr(align(32))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
#[cfg(not(any(
target_feature = "avx2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128"),
target_feature = "sse2"
)))]
#[repr(align(8))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
const INITIAL_ACC: Acc = Acc([
xxh32::PRIME_3 as u64,
xxh64::PRIME_1,
xxh64::PRIME_2,
xxh64::PRIME_3,
xxh64::PRIME_4,
xxh32::PRIME_2 as u64,
xxh64::PRIME_5,
xxh32::PRIME_1 as u64,
]);
type LongHashFn = fn(&[u8], u64, &[u8]) -> u64;
type LongHashFn128 = fn(&[u8], u64, &[u8]) -> u128;
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
type StripeLanes = [[u8; mem::size_of::<core::arch::wasm32::v128>()];
STRIPE_LEN / mem::size_of::<core::arch::wasm32::v128>()];
#[cfg(all(target_arch = "x86", target_feature = "avx2"))]
type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m256i>()];
STRIPE_LEN / mem::size_of::<core::arch::x86::__m256i>()];
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m256i>()];
STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m256i>()];
#[cfg(all(
target_arch = "x86",
target_feature = "sse2",
not(target_feature = "avx2")
))]
type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m128i>()];
STRIPE_LEN / mem::size_of::<core::arch::x86::__m128i>()];
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
not(target_feature = "avx2")
))]
type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m128i>()];
STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m128i>()];
#[cfg(target_feature = "neon")]
type StripeLanes = [[u8; mem::size_of::<core::arch::aarch64::uint8x16_t>()];
STRIPE_LEN / mem::size_of::<core::arch::aarch64::uint8x16_t>()];
#[cfg(any(target_feature = "sse2", target_feature = "avx2"))]
#[inline]
const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
((z << 6) | (y << 4) | (x << 2) | w) as i32
}
#[inline(always)]
const fn mult32_to64(left: u32, right: u32) -> u64 {
(left as u64).wrapping_mul(right as u64)
}
macro_rules! to_u128 {
($lo:expr, $hi:expr) => {
($lo) as u128 | ((($hi) as u128) << 64)
};
}
macro_rules! slice_offset_ptr {
($slice:expr, $offset:expr) => {{
let slice = $slice;
let offset = $offset;
debug_assert!(slice.len() >= offset);
#[allow(unused_unsafe)]
unsafe {
(slice.as_ptr() as *const u8).add(offset)
}
}};
}
#[inline(always)]
fn read_32le_unaligned(data: &[u8], offset: usize) -> u32 {
u32::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
}
#[inline(always)]
fn read_64le_unaligned(data: &[u8], offset: usize) -> u64 {
u64::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
}
#[inline(always)]
fn mix_two_accs(acc: &mut Acc, offset: usize, secret: &[[u8; 8]; 2]) -> u64 {
mul128_fold64(
acc.0[offset] ^ u64::from_ne_bytes(secret[0]).to_le(),
acc.0[offset + 1] ^ u64::from_ne_bytes(secret[1]).to_le(),
)
}
#[inline]
fn merge_accs(acc: &mut Acc, secret: &[[[u8; 8]; 2]; 4], mut result: u64) -> u64 {
macro_rules! mix_two_accs {
($idx:literal) => {
result = result.wrapping_add(mix_two_accs(acc, $idx * 2, &secret[$idx]))
};
}
mix_two_accs!(0);
mix_two_accs!(1);
mix_two_accs!(2);
mix_two_accs!(3);
avalanche(result)
}
#[inline(always)]
fn mix16_b(input: &[[u8; 8]; 2], secret: &[[u8; 8]; 2], seed: u64) -> u64 {
let mut input_lo = u64::from_ne_bytes(input[0]).to_le();
let mut input_hi = u64::from_ne_bytes(input[1]).to_le();
input_lo ^= u64::from_ne_bytes(secret[0]).to_le().wrapping_add(seed);
input_hi ^= u64::from_ne_bytes(secret[1]).to_le().wrapping_sub(seed);
mul128_fold64(input_lo, input_hi)
}
#[inline(always)]
fn mix32_b(
lo: &mut u64,
hi: &mut u64,
input_1: &[[u8; 8]; 2],
input_2: &[[u8; 8]; 2],
secret: &[[[u8; 8]; 2]; 2],
seed: u64,
) {
*lo = lo.wrapping_add(mix16_b(input_1, &secret[0], seed));
*lo ^= u64::from_ne_bytes(input_2[0])
.to_le()
.wrapping_add(u64::from_ne_bytes(input_2[1]).to_le());
*hi = hi.wrapping_add(mix16_b(input_2, &secret[1], seed));
*hi ^= u64::from_ne_bytes(input_1[0])
.to_le()
.wrapping_add(u64::from_ne_bytes(input_1[1]).to_le());
}
#[inline(always)]
fn custom_default_secret(seed: u64) -> [u8; DEFAULT_SECRET_SIZE] {
let mut result = mem::MaybeUninit::<[u8; DEFAULT_SECRET_SIZE]>::uninit();
let nb_rounds = DEFAULT_SECRET_SIZE / 16;
for idx in 0..nb_rounds {
let low = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16)
.to_le()
.wrapping_add(seed);
let hi = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16 + 8)
.to_le()
.wrapping_sub(seed);
Buffer {
ptr: result.as_mut_ptr() as *mut u8,
len: DEFAULT_SECRET_SIZE,
offset: idx * 16,
}
.copy_from_slice(&low.to_le_bytes());
Buffer {
ptr: result.as_mut_ptr() as *mut u8,
len: DEFAULT_SECRET_SIZE,
offset: idx * 16 + 8,
}
.copy_from_slice(&hi.to_le_bytes());
}
unsafe { result.assume_init() }
}
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
fn accumulate_512_wasm(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
const LANES: usize = ACC_NB;
use core::arch::wasm32::*;
let mut idx = 0usize;
let xacc = acc.0.as_mut_ptr() as *mut v128;
unsafe {
while idx.wrapping_add(1) < LANES / 2 {
let data_vec_1 = v128_load(input[idx].as_ptr() as _);
let data_vec_2 = v128_load(input[idx.wrapping_add(1)].as_ptr() as _);
let key_vec_1 = v128_load(secret[idx].as_ptr() as _);
let key_vec_2 = v128_load(secret[idx.wrapping_add(1)].as_ptr() as _);
let data_key_1 = v128_xor(data_vec_1, key_vec_1);
let data_key_2 = v128_xor(data_vec_2, key_vec_2);
let data_swap_1 = i64x2_shuffle::<1, 0>(data_vec_1, data_vec_1);
let data_swap_2 = i64x2_shuffle::<1, 0>(data_vec_2, data_vec_2);
let mixed_lo = i32x4_shuffle::<0, 2, 4, 6>(data_key_1, data_key_2);
let mixed_hi = i32x4_shuffle::<1, 3, 5, 7>(data_key_1, data_key_2);
let prod_1 = u64x2_extmul_low_u32x4(mixed_lo, mixed_hi);
let prod_2 = u64x2_extmul_high_u32x4(mixed_lo, mixed_hi);
let sum_1 = i64x2_add(prod_1, data_swap_1);
let sum_2 = i64x2_add(prod_2, data_swap_2);
xacc.add(idx).write(i64x2_add(sum_1, *xacc.add(idx)));
xacc.add(idx.wrapping_add(1))
.write(i64x2_add(sum_2, *xacc.add(idx.wrapping_add(1))));
idx = idx.wrapping_add(2);
}
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
macro_rules! vld1q_u8 {
($ptr:expr) => {
core::arch::aarch64::vld1q_u8($ptr)
};
}
#[cfg(all(target_arch = "arm", target_feature = "neon"))]
macro_rules! vld1q_u8 {
($ptr:expr) => {
core::ptr::read_unaligned($ptr as *const core::arch::arm::uint8x16_t)
};
}
#[cfg(target_feature = "neon")]
fn accumulate_512_neon(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
const NEON_LANES: usize = ACC_NB;
unsafe {
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
let mut idx = 0usize;
let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;
while idx.wrapping_add(1) < NEON_LANES / 2 {
let data_vec_1 = vreinterpretq_u64_u8(vld1q_u8!(input[idx].as_ptr()));
let data_vec_2 =
vreinterpretq_u64_u8(vld1q_u8!(input[idx.wrapping_add(1)].as_ptr()));
let key_vec_1 = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
let key_vec_2 =
vreinterpretq_u64_u8(vld1q_u8!(secret[idx.wrapping_add(1)].as_ptr()));
let data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
let data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
let data_key_1 = veorq_u64(data_vec_1, key_vec_1);
let data_key_2 = veorq_u64(data_vec_2, key_vec_2);
let unzipped = vuzpq_u32(
vreinterpretq_u32_u64(data_key_1),
vreinterpretq_u32_u64(data_key_2),
);
let data_key_lo = unzipped.0;
let data_key_hi = unzipped.1;
let sum_1 = vmlal_u32(
data_swap_1,
vget_low_u32(data_key_lo),
vget_low_u32(data_key_hi),
);
#[cfg(target_arch = "aarch64")]
let sum_2 = vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
#[cfg(target_arch = "arm")]
let sum_2 = vmlal_u32(
data_swap_2,
vget_high_u32(data_key_lo),
vget_high_u32(data_key_hi),
);
xacc.add(idx).write(vaddq_u64(*xacc.add(idx), sum_1));
xacc.add(idx.wrapping_add(1))
.write(vaddq_u64(*xacc.add(idx.wrapping_add(1)), sum_2));
idx = idx.wrapping_add(2);
}
}
}
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
fn accumulate_512_sse2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let xacc = acc.0.as_mut_ptr() as *mut __m128i;
for idx in 0..secret.len() {
let data_vec = _mm_loadu_si128(input[idx].as_ptr() as _);
let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
let data_key = _mm_xor_si128(data_vec, key_vec);
let data_key_lo = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
let product = _mm_mul_epu32(data_key, data_key_lo);
let data_swap = _mm_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2));
let sum = _mm_add_epi64(*xacc.add(idx), data_swap);
xacc.add(idx).write(_mm_add_epi64(product, sum));
}
}
}
#[cfg(target_feature = "avx2")]
fn accumulate_512_avx2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let xacc = acc.0.as_mut_ptr() as *mut __m256i;
for idx in 0..secret.len() {
let data_vec = _mm256_loadu_si256(input[idx].as_ptr() as _);
let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
let data_key = _mm256_xor_si256(data_vec, key_vec);
let data_key_lo = _mm256_srli_epi64(data_key, 32);
let product = _mm256_mul_epu32(data_key, data_key_lo);
let data_swap = _mm256_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2));
let sum = _mm256_add_epi64(*xacc.add(idx), data_swap);
xacc.add(idx).write(_mm256_add_epi64(product, sum));
}
}
}
#[cfg(not(any(
target_feature = "avx2",
target_feature = "sse2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128")
)))]
fn accumulate_512_scalar(acc: &mut Acc, input: &[[u8; 8]; ACC_NB], secret: &[[u8; 8]; ACC_NB]) {
for idx in 0..ACC_NB {
let data_val = u64::from_ne_bytes(input[idx]).to_le();
let data_key = data_val ^ u64::from_ne_bytes(secret[idx]).to_le();
acc.0[idx ^ 1] = acc.0[idx ^ 1].wrapping_add(data_val);
acc.0[idx] = acc.0[idx].wrapping_add(mult32_to64(
(data_key & 0xFFFFFFFF) as u32,
(data_key >> 32) as u32,
));
}
}
#[cfg(target_feature = "avx2")]
use accumulate_512_avx2 as accumulate_512;
#[cfg(target_feature = "neon")]
use accumulate_512_neon as accumulate_512;
#[cfg(not(any(
target_feature = "avx2",
target_feature = "sse2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128")
)))]
use accumulate_512_scalar as accumulate_512;
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
use accumulate_512_sse2 as accumulate_512;
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
use accumulate_512_wasm as accumulate_512;
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
fn scramble_acc_wasm(acc: &mut Acc, secret: &StripeLanes) {
use core::arch::wasm32::*;
let xacc = acc.0.as_mut_ptr() as *mut v128;
let prime = u64x2_splat(xxh32::PRIME_1 as _);
unsafe {
for idx in 0..secret.len() {
let acc_vec = v128_load(xacc.add(idx) as _);
let shifted = u64x2_shr(acc_vec, 47);
let data_vec = v128_xor(acc_vec, shifted);
let key_vec = v128_load(secret[idx].as_ptr() as _);
let mixed = v128_xor(data_vec, key_vec);
xacc.add(idx).write(i64x2_mul(mixed, prime));
}
}
}
#[cfg(target_feature = "neon")]
fn scramble_acc_neon(acc: &mut Acc, secret: &StripeLanes) {
unsafe {
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "arm")]
use core::arch::arm::*;
let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;
let prime_low = vdup_n_u32(xxh32::PRIME_1);
let prime_hi = vreinterpretq_u32_u64(vdupq_n_u64((xxh32::PRIME_1 as u64) << 32));
for idx in 0..secret.len() {
let acc_vec = *xacc.add(idx);
let shifted = vshrq_n_u64(acc_vec, 47);
let data_vec = veorq_u64(acc_vec, shifted);
let key_vec = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
let data_key = veorq_u64(data_vec, key_vec);
let prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), prime_hi);
let data_key_lo = vmovn_u64(data_key);
xacc.add(idx).write(vmlal_u32(
vreinterpretq_u64_u32(prod_hi),
data_key_lo,
prime_low,
));
}
}
}
#[allow(clippy::needless_range_loop)]
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
fn scramble_acc_sse2(acc: &mut Acc, secret: &StripeLanes) {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let xacc = acc.0.as_mut_ptr() as *mut __m128i;
let prime32 = _mm_set1_epi32(xxh32::PRIME_1 as i32);
for idx in 0..secret.len() {
let acc_vec = *xacc.add(idx);
let shifted = _mm_srli_epi64(acc_vec, 47);
let data_vec = _mm_xor_si128(acc_vec, shifted);
let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
let data_key = _mm_xor_si128(data_vec, key_vec);
let data_key_hi = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
let prod_lo = _mm_mul_epu32(data_key, prime32);
let prod_hi = _mm_mul_epu32(data_key_hi, prime32);
xacc.add(idx)
.write(_mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)));
}
}
}
#[cfg(target_feature = "avx2")]
fn scramble_acc_avx2(acc: &mut Acc, secret: &StripeLanes) {
unsafe {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let xacc = acc.0.as_mut_ptr() as *mut __m256i;
let prime32 = _mm256_set1_epi32(xxh32::PRIME_1 as i32);
for idx in 0..secret.len() {
let acc_vec = *xacc.add(idx);
let shifted = _mm256_srli_epi64(acc_vec, 47);
let data_vec = _mm256_xor_si256(acc_vec, shifted);
let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
let data_key = _mm256_xor_si256(data_vec, key_vec);
let data_key_hi = _mm256_srli_epi64(data_key, 32);
let prod_lo = _mm256_mul_epu32(data_key, prime32);
let prod_hi = _mm256_mul_epu32(data_key_hi, prime32);
xacc.add(idx)
.write(_mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)));
}
}
}
#[cfg(not(any(
target_feature = "avx2",
target_feature = "sse2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128")
)))]
fn scramble_acc_scalar(acc: &mut Acc, secret: &[[u8; 8]; ACC_NB]) {
for idx in 0..secret.len() {
let key = u64::from_ne_bytes(secret[idx]).to_le();
let mut acc_val = xorshift64(acc.0[idx], 47);
acc_val ^= key;
acc.0[idx] = acc_val.wrapping_mul(xxh32::PRIME_1 as u64);
}
}
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
use scramble_acc_wasm as scramble_acc;
#[cfg(target_feature = "neon")]
use scramble_acc_neon as scramble_acc;
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
use scramble_acc_sse2 as scramble_acc;
#[cfg(target_feature = "avx2")]
use scramble_acc_avx2 as scramble_acc;
#[cfg(not(any(
target_feature = "avx2",
target_feature = "sse2",
target_feature = "neon",
all(target_family = "wasm", target_feature = "simd128")
)))]
use scramble_acc_scalar as scramble_acc;
#[inline(always)]
fn accumulate_loop(acc: &mut Acc, input: *const u8, secret: *const u8, nb_stripes: usize) {
for idx in 0..nb_stripes {
unsafe {
let input = input.add(idx * STRIPE_LEN);
accumulate_512(
acc,
&*(input as *const _),
&*(secret.add(idx * SECRET_CONSUME_RATE) as *const _),
);
}
}
}
#[inline]
fn hash_long_internal_loop(acc: &mut Acc, input: &[u8], secret: &[u8]) {
let nb_stripes = (secret.len() - STRIPE_LEN) / SECRET_CONSUME_RATE;
let block_len = STRIPE_LEN * nb_stripes;
let nb_blocks = (input.len() - 1) / block_len;
for idx in 0..nb_blocks {
accumulate_loop(
acc,
slice_offset_ptr!(input, idx * block_len),
secret.as_ptr(),
nb_stripes,
);
scramble_acc(
acc,
get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN),
);
}
debug_assert!(input.len() > STRIPE_LEN);
let nb_stripes = ((input.len() - 1) - (block_len * nb_blocks)) / STRIPE_LEN;
debug_assert!(nb_stripes <= (secret.len() / SECRET_CONSUME_RATE));
accumulate_loop(
acc,
slice_offset_ptr!(input, nb_blocks * block_len),
secret.as_ptr(),
nb_stripes,
);
accumulate_512(
acc,
get_aligned_chunk_ref(input, input.len() - STRIPE_LEN),
get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN - SECRET_LASTACC_START),
);
}
#[inline(always)]
fn xxh3_64_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
let c1;
let c2;
let c3;
unsafe {
c1 = *input.get_unchecked(0);
c2 = *input.get_unchecked(input.len() >> 1);
c3 = *input.get_unchecked(input.len() - 1);
};
let combo =
(c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
let flip = ((read_32le_unaligned(secret, 0) ^ read_32le_unaligned(secret, 4)) as u64)
.wrapping_add(seed);
xxh64::avalanche((combo as u64) ^ flip)
}
#[inline(always)]
fn xxh3_64_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u64 {
debug_assert!(input.len() >= 4 && input.len() <= 8);
seed ^= ((seed as u32).swap_bytes() as u64) << 32;
let input1 = read_32le_unaligned(input, 0);
let input2 = read_32le_unaligned(input, input.len() - 4);
let flip =
(read_64le_unaligned(secret, 8) ^ read_64le_unaligned(secret, 16)).wrapping_sub(seed);
let input64 = (input2 as u64).wrapping_add((input1 as u64) << 32);
let keyed = input64 ^ flip;
strong_avalanche(keyed, input.len() as u64)
}
#[inline(always)]
fn xxh3_64_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
debug_assert!(input.len() >= 9 && input.len() <= 16);
let flip1 =
(read_64le_unaligned(secret, 24) ^ read_64le_unaligned(secret, 32)).wrapping_add(seed);
let flip2 =
(read_64le_unaligned(secret, 40) ^ read_64le_unaligned(secret, 48)).wrapping_sub(seed);
let input_lo = read_64le_unaligned(input, 0) ^ flip1;
let input_hi = read_64le_unaligned(input, input.len() - 8) ^ flip2;
let acc = (input.len() as u64)
.wrapping_add(input_lo.swap_bytes())
.wrapping_add(input_hi)
.wrapping_add(mul128_fold64(input_lo, input_hi));
avalanche(acc)
}
#[inline(always)]
fn xxh3_64_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
if input.len() > 8 {
xxh3_64_9to16(input, seed, secret)
} else if input.len() >= 4 {
xxh3_64_4to8(input, seed, secret)
} else if input.len() > 0 {
xxh3_64_1to3(input, seed, secret)
} else {
xxh64::avalanche(
seed ^ (read_64le_unaligned(secret, 56) ^ read_64le_unaligned(secret, 64)),
)
}
}
#[inline(always)]
fn xxh3_64_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
if input.len() > 32 {
if input.len() > 64 {
if input.len() > 96 {
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 48),
get_aligned_chunk_ref(secret, 96),
seed,
));
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, input.len() - 64),
get_aligned_chunk_ref(secret, 112),
seed,
));
}
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 32),
get_aligned_chunk_ref(secret, 64),
seed,
));
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, input.len() - 48),
get_aligned_chunk_ref(secret, 80),
seed,
));
}
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 16),
get_aligned_chunk_ref(secret, 32),
seed,
));
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, input.len() - 32),
get_aligned_chunk_ref(secret, 48),
seed,
));
}
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 0),
get_aligned_chunk_ref(secret, 0),
seed,
));
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, input.len() - 16),
get_aligned_chunk_ref(secret, 16),
seed,
));
avalanche(acc)
}
#[inline(never)]
fn xxh3_64_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
const START_OFFSET: usize = 3;
const LAST_OFFSET: usize = 17;
let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
let nb_rounds = input.len() / 16;
debug_assert!(nb_rounds >= 8);
let mut idx = 0;
while idx < 8 {
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 16 * idx),
get_aligned_chunk_ref(secret, 16 * idx),
seed,
));
idx = idx.wrapping_add(1);
}
acc = avalanche(acc);
while idx < nb_rounds {
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, 16 * idx),
get_aligned_chunk_ref(secret, 16 * (idx - 8) + START_OFFSET),
seed,
));
idx = idx.wrapping_add(1);
}
acc = acc.wrapping_add(mix16_b(
get_aligned_chunk_ref(input, input.len() - 16),
get_aligned_chunk_ref(secret, SECRET_SIZE_MIN - LAST_OFFSET),
seed,
));
avalanche(acc)
}
#[inline(always)]
fn xxh3_64_internal(input: &[u8], seed: u64, secret: &[u8], long_hash_fn: LongHashFn) -> u64 {
debug_assert!(secret.len() >= SECRET_SIZE_MIN);
if input.len() <= 16 {
xxh3_64_0to16(input, seed, secret)
} else if input.len() <= 128 {
xxh3_64_7to128(input, seed, secret)
} else if input.len() <= MID_SIZE_MAX {
xxh3_64_129to240(input, seed, secret)
} else {
long_hash_fn(input, seed, secret)
}
}
#[inline(always)]
fn xxh3_64_long_impl(input: &[u8], secret: &[u8]) -> u64 {
let mut acc = INITIAL_ACC;
hash_long_internal_loop(&mut acc, input, secret);
merge_accs(
&mut acc,
get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START),
(input.len() as u64).wrapping_mul(xxh64::PRIME_1),
)
}
#[inline(never)]
fn xxh3_64_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u64 {
match seed {
0 => xxh3_64_long_impl(input, &DEFAULT_SECRET),
seed => xxh3_64_long_impl(input, &custom_default_secret(seed)),
}
}
#[inline(never)]
fn xxh3_64_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u64 {
xxh3_64_long_impl(input, &DEFAULT_SECRET)
}
#[inline(never)]
fn xxh3_64_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u64 {
xxh3_64_long_impl(input, secret)
}
#[inline]
pub fn xxh3_64(input: &[u8]) -> u64 {
xxh3_64_internal(input, 0, &DEFAULT_SECRET, xxh3_64_long_default)
}
#[inline]
pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
xxh3_64_internal(input, seed, &DEFAULT_SECRET, xxh3_64_long_with_seed)
}
#[inline]
pub fn xxh3_64_with_secret(input: &[u8], secret: &[u8]) -> u64 {
xxh3_64_internal(input, 0, secret, xxh3_64_long_with_secret)
}
const INTERNAL_BUFFER_SIZE: usize = 256;
const STRIPES_PER_BLOCK: usize = (DEFAULT_SECRET_SIZE - STRIPE_LEN) / SECRET_CONSUME_RATE;
#[derive(Clone)]
#[repr(align(64))]
struct Aligned64<T>(T);
#[inline]
fn xxh3_stateful_consume_stripes(
acc: &mut Acc,
nb_stripes: usize,
nb_stripes_acc: usize,
input: *const u8,
secret: &[u8; DEFAULT_SECRET_SIZE],
) -> usize {
if (STRIPES_PER_BLOCK - nb_stripes_acc) <= nb_stripes {
let stripes_to_end = STRIPES_PER_BLOCK - nb_stripes_acc;
let stripes_after_end = nb_stripes - stripes_to_end;
accumulate_loop(
acc,
input,
slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE),
stripes_to_end,
);
scramble_acc(
acc,
get_aligned_chunk_ref(secret, DEFAULT_SECRET_SIZE - STRIPE_LEN),
);
accumulate_loop(
acc,
unsafe { input.add(stripes_to_end * STRIPE_LEN) },
secret.as_ptr(),
stripes_after_end,
);
stripes_after_end
} else {
accumulate_loop(
acc,
input,
slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE),
nb_stripes,
);
nb_stripes_acc.wrapping_add(nb_stripes)
}
}
#[allow(clippy::assign_op_pattern)]
fn xxh3_stateful_update(
input: &[u8],
total_len: &mut u64,
acc: &mut Acc,
buffer: &mut Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
buffered_size: &mut u16,
nb_stripes_acc: &mut usize,
secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
) {
const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN;
let mut input_ptr = input.as_ptr();
let mut input_len = input.len();
*total_len = total_len.wrapping_add(input_len as u64);
if (input_len + *buffered_size as usize) <= INTERNAL_BUFFER_SIZE {
unsafe {
ptr::copy_nonoverlapping(
input_ptr,
(buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize),
input_len,
)
}
*buffered_size += input_len as u16;
return;
}
if *buffered_size > 0 {
let fill_len = INTERNAL_BUFFER_SIZE - *buffered_size as usize;
unsafe {
ptr::copy_nonoverlapping(
input_ptr,
(buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize),
fill_len,
);
input_ptr = input_ptr.add(fill_len);
input_len -= fill_len;
}
*nb_stripes_acc = xxh3_stateful_consume_stripes(
acc,
INTERNAL_BUFFER_STRIPES,
*nb_stripes_acc,
buffer.0.as_ptr() as *const u8,
&secret.0,
);
*buffered_size = 0;
}
debug_assert_ne!(input_len, 0);
if input_len > INTERNAL_BUFFER_SIZE {
loop {
*nb_stripes_acc = xxh3_stateful_consume_stripes(
acc,
INTERNAL_BUFFER_STRIPES,
*nb_stripes_acc,
input_ptr,
&secret.0,
);
input_ptr = unsafe { input_ptr.add(INTERNAL_BUFFER_SIZE) };
input_len = input_len - INTERNAL_BUFFER_SIZE;
if input_len <= INTERNAL_BUFFER_SIZE {
break;
}
}
unsafe {
ptr::copy_nonoverlapping(
input_ptr.offset(-(STRIPE_LEN as isize)),
(buffer.0.as_mut_ptr() as *mut u8).add(buffer.0.len() - STRIPE_LEN),
STRIPE_LEN,
)
}
}
debug_assert_ne!(input_len, 0);
debug_assert_eq!(*buffered_size, 0);
unsafe { ptr::copy_nonoverlapping(input_ptr, buffer.0.as_mut_ptr() as *mut u8, input_len) }
*buffered_size = input_len as u16;
}
#[inline(always)]
fn xxh3_stateful_digest_internal(
acc: &mut Acc,
nb_stripes_acc: usize,
buffer: &[u8],
old_buffer: &[mem::MaybeUninit<u8>],
secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
) {
if buffer.len() >= STRIPE_LEN {
let nb_stripes = (buffer.len() - 1) / STRIPE_LEN;
xxh3_stateful_consume_stripes(
acc,
nb_stripes,
nb_stripes_acc,
buffer.as_ptr(),
&secret.0,
);
accumulate_512(
acc,
get_aligned_chunk_ref(buffer, buffer.len() - STRIPE_LEN),
get_aligned_chunk_ref(
&secret.0,
DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START,
),
);
} else {
let mut last_stripe = mem::MaybeUninit::<[u8; STRIPE_LEN]>::uninit();
let catchup_size = STRIPE_LEN - buffer.len();
debug_assert!(buffer.len() > 0);
let last_stripe = unsafe {
ptr::copy_nonoverlapping(
(old_buffer.as_ptr() as *const u8)
.add(INTERNAL_BUFFER_SIZE - buffer.len() - catchup_size),
last_stripe.as_mut_ptr() as _,
catchup_size,
);
ptr::copy_nonoverlapping(
buffer.as_ptr(),
(last_stripe.as_mut_ptr() as *mut u8).add(catchup_size),
buffer.len(),
);
slice::from_raw_parts(
last_stripe.as_ptr() as *const u8,
buffer.len() + catchup_size,
)
};
accumulate_512(
acc,
get_aligned_chunk_ref(&last_stripe, 0),
get_aligned_chunk_ref(
&secret.0,
DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START,
),
);
}
}
#[derive(Clone)]
pub struct Xxh3Default {
acc: Acc,
buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
buffered_size: u16,
nb_stripes_acc: usize,
total_len: u64,
}
impl Xxh3Default {
const DEFAULT_SECRET: Aligned64<[u8; DEFAULT_SECRET_SIZE]> = Aligned64(DEFAULT_SECRET);
#[inline(always)]
pub const fn new() -> Self {
Self {
acc: INITIAL_ACC,
buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
buffered_size: 0,
nb_stripes_acc: 0,
total_len: 0,
}
}
#[inline(always)]
pub fn reset(&mut self) {
self.acc = INITIAL_ACC;
self.total_len = 0;
self.buffered_size = 0;
self.nb_stripes_acc = 0;
}
#[inline(always)]
fn buffered_input(&self) -> &[u8] {
let ptr = self.buffer.0.as_ptr();
unsafe { slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize) }
}
#[inline(always)]
fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
let ptr = self.buffer.0.as_ptr();
unsafe {
slice::from_raw_parts(
ptr.add(self.buffered_size as usize),
self.buffer.0.len() - self.buffered_size as usize,
)
}
}
#[inline(always)]
pub fn update(&mut self, input: &[u8]) {
xxh3_stateful_update(
input,
&mut self.total_len,
&mut self.acc,
&mut self.buffer,
&mut self.buffered_size,
&mut self.nb_stripes_acc,
&Self::DEFAULT_SECRET,
);
}
#[inline(never)]
fn digest_mid_sized(&self) -> u64 {
let mut acc = self.acc.clone();
xxh3_stateful_digest_internal(
&mut acc,
self.nb_stripes_acc,
self.buffered_input(),
self.processed_buffer(),
&Self::DEFAULT_SECRET,
);
merge_accs(
&mut acc,
get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
self.total_len.wrapping_mul(xxh64::PRIME_1),
)
}
#[inline(never)]
fn digest_mid_sized_128(&self) -> u128 {
let mut acc = self.acc.clone();
xxh3_stateful_digest_internal(
&mut acc,
self.nb_stripes_acc,
self.buffered_input(),
self.processed_buffer(),
&Self::DEFAULT_SECRET,
);
let low = merge_accs(
&mut acc,
get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
self.total_len.wrapping_mul(xxh64::PRIME_1),
);
let high = merge_accs(
&mut acc,
get_aligned_chunk_ref(
&Self::DEFAULT_SECRET.0,
DEFAULT_SECRET_SIZE - mem::size_of_val(&self.acc) - SECRET_MERGEACCS_START,
),
!self.total_len.wrapping_mul(xxh64::PRIME_2),
);
((high as u128) << 64) | (low as u128)
}
pub fn digest(&self) -> u64 {
if self.total_len > MID_SIZE_MAX as u64 {
self.digest_mid_sized()
} else {
xxh3_64_internal(
self.buffered_input(),
0,
&Self::DEFAULT_SECRET.0,
xxh3_64_long_default,
)
}
}
pub fn digest128(&self) -> u128 {
if self.total_len > MID_SIZE_MAX as u64 {
self.digest_mid_sized_128()
} else {
xxh3_128_internal(
self.buffered_input(),
0,
&Self::DEFAULT_SECRET.0,
xxh3_128_long_default,
)
}
}
}
impl Default for Xxh3Default {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl hash::Hasher for Xxh3Default {
#[inline(always)]
fn finish(&self) -> u64 {
self.digest()
}
#[inline(always)]
fn write(&mut self, input: &[u8]) {
self.update(input)
}
}
impl std::io::Write for Xxh3Default {
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.update(buf);
Ok(buf.len())
}
#[inline]
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Clone)]
pub struct Xxh3 {
acc: Acc,
custom_secret: Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
buffered_size: u16,
nb_stripes_acc: usize,
total_len: u64,
seed: u64,
}
impl Xxh3 {
#[inline(always)]
pub const fn new() -> Self {
Self::with_custom_ops(0, DEFAULT_SECRET)
}
#[inline]
const fn with_custom_ops(seed: u64, secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
Self {
acc: INITIAL_ACC,
custom_secret: Aligned64(secret),
buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
buffered_size: 0,
nb_stripes_acc: 0,
total_len: 0,
seed,
}
}
#[inline(always)]
pub const fn with_secret(secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
Self::with_custom_ops(0, secret)
}
#[inline(always)]
pub fn with_seed(seed: u64) -> Self {
Self::with_custom_ops(seed, custom_default_secret(seed))
}
#[inline(always)]
pub fn reset(&mut self) {
self.acc = INITIAL_ACC;
self.total_len = 0;
self.buffered_size = 0;
self.nb_stripes_acc = 0;
}
#[inline(always)]
fn buffered_input(&self) -> &[u8] {
let ptr = self.buffer.0.as_ptr();
unsafe { slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize) }
}
#[inline(always)]
fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
let ptr = self.buffer.0.as_ptr();
unsafe {
slice::from_raw_parts(
ptr.add(self.buffered_size as usize),
self.buffer.0.len() - self.buffered_size as usize,
)
}
}
#[inline]
pub fn update(&mut self, input: &[u8]) {
xxh3_stateful_update(
input,
&mut self.total_len,
&mut self.acc,
&mut self.buffer,
&mut self.buffered_size,
&mut self.nb_stripes_acc,
&self.custom_secret,
);
}
#[inline(never)]
fn digest_mid_sized(&self) -> u64 {
let mut acc = self.acc.clone();
xxh3_stateful_digest_internal(
&mut acc,
self.nb_stripes_acc,
self.buffered_input(),
self.processed_buffer(),
&self.custom_secret,
);
merge_accs(
&mut acc,
get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START),
self.total_len.wrapping_mul(xxh64::PRIME_1),
)
}
#[inline(never)]
fn digest_mid_sized_128(&self) -> u128 {
let mut acc = self.acc.clone();
xxh3_stateful_digest_internal(
&mut acc,
self.nb_stripes_acc,
self.buffered_input(),
self.processed_buffer(),
&self.custom_secret,
);
let low = merge_accs(
&mut acc,
get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START),
self.total_len.wrapping_mul(xxh64::PRIME_1),
);
let high = merge_accs(
&mut acc,
get_aligned_chunk_ref(
&self.custom_secret.0,
self.custom_secret.0.len()
- mem::size_of_val(&self.acc)
- SECRET_MERGEACCS_START,
),
!self.total_len.wrapping_mul(xxh64::PRIME_2),
);
((high as u128) << 64) | (low as u128)
}
pub fn digest(&self) -> u64 {
if self.total_len > MID_SIZE_MAX as u64 {
self.digest_mid_sized()
} else if self.seed > 0 {
xxh3_64_internal(
self.buffered_input(),
self.seed,
&DEFAULT_SECRET,
xxh3_64_long_with_seed,
)
} else {
xxh3_64_internal(
self.buffered_input(),
self.seed,
&self.custom_secret.0,
xxh3_64_long_with_secret,
)
}
}
pub fn digest128(&self) -> u128 {
if self.total_len > MID_SIZE_MAX as u64 {
self.digest_mid_sized_128()
} else if self.seed > 0 {
xxh3_128_internal(
self.buffered_input(),
self.seed,
&DEFAULT_SECRET,
xxh3_128_long_with_seed,
)
} else {
xxh3_128_internal(
self.buffered_input(),
self.seed,
&self.custom_secret.0,
xxh3_128_long_with_secret,
)
}
}
}
impl Default for Xxh3 {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl core::hash::Hasher for Xxh3 {
#[inline(always)]
fn finish(&self) -> u64 {
self.digest()
}
#[inline(always)]
fn write(&mut self, input: &[u8]) {
self.update(input)
}
}
impl std::io::Write for Xxh3 {
#[inline]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.update(buf);
Ok(buf.len())
}
#[inline]
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[inline]
fn xxh3_128_long_impl(input: &[u8], secret: &[u8]) -> u128 {
let mut acc = INITIAL_ACC;
hash_long_internal_loop(&mut acc, input, secret);
debug_assert!(secret.len() >= mem::size_of::<Acc>() + SECRET_MERGEACCS_START);
let lo = merge_accs(
&mut acc,
get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START),
(input.len() as u64).wrapping_mul(xxh64::PRIME_1),
);
let hi = merge_accs(
&mut acc,
get_aligned_chunk_ref(
secret,
secret.len() - mem::size_of::<Acc>() - SECRET_MERGEACCS_START,
),
!(input.len() as u64).wrapping_mul(xxh64::PRIME_2),
);
lo as u128 | (hi as u128) << 64
}
#[inline(always)]
fn xxh3_128_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
let flip_lo =
(read_64le_unaligned(secret, 32) ^ read_64le_unaligned(secret, 40)).wrapping_sub(seed);
let flip_hi =
(read_64le_unaligned(secret, 48) ^ read_64le_unaligned(secret, 56)).wrapping_add(seed);
let input_lo = read_64le_unaligned(input, 0);
let mut input_hi = read_64le_unaligned(input, input.len() - 8);
let (mut mul_low, mut mul_high) =
mul64_to128(input_lo ^ input_hi ^ flip_lo, xxh64::PRIME_1);
mul_low = mul_low.wrapping_add((input.len() as u64 - 1) << 54);
input_hi ^= flip_hi;
mul_high = mul_high
.wrapping_add(input_hi.wrapping_add(mult32_to64(input_hi as u32, xxh32::PRIME_2 - 1)));
mul_low ^= mul_high.swap_bytes();
let (result_low, mut result_hi) = mul64_to128(mul_low, xxh64::PRIME_2);
result_hi = result_hi.wrapping_add(mul_high.wrapping_mul(xxh64::PRIME_2));
to_u128!(avalanche(result_low), avalanche(result_hi))
}
#[inline(always)]
fn xxh3_128_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u128 {
seed ^= ((seed as u32).swap_bytes() as u64) << 32;
let lo = read_32le_unaligned(input, 0);
let hi = read_32le_unaligned(input, input.len() - 4);
let input_64 = (lo as u64).wrapping_add((hi as u64) << 32);
let flip =
(read_64le_unaligned(secret, 16) ^ read_64le_unaligned(secret, 24)).wrapping_add(seed);
let keyed = input_64 ^ flip;
let (mut lo, mut hi) = mul64_to128(
keyed,
xxh64::PRIME_1.wrapping_add((input.len() as u64) << 2),
);
hi = hi.wrapping_add(lo << 1);
lo ^= hi >> 3;
lo = xorshift64(lo, 35).wrapping_mul(0x9FB21C651E98DF25);
lo = xorshift64(lo, 28);
hi = avalanche(hi);
lo as u128 | (hi as u128) << 64
}
#[inline(always)]
fn xxh3_128_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
let c1;
let c2;
let c3;
unsafe {
c1 = *input.get_unchecked(0);
c2 = *input.get_unchecked(input.len() >> 1);
c3 = *input.get_unchecked(input.len() - 1);
};
let input_lo =
(c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
let input_hi = input_lo.swap_bytes().rotate_left(13);
let flip_lo = (read_32le_unaligned(secret, 0) as u64
^ read_32le_unaligned(secret, 4) as u64)
.wrapping_add(seed);
let flip_hi = (read_32le_unaligned(secret, 8) as u64
^ read_32le_unaligned(secret, 12) as u64)
.wrapping_sub(seed);
let keyed_lo = input_lo as u64 ^ flip_lo;
let keyed_hi = input_hi as u64 ^ flip_hi;
xxh64::avalanche(keyed_lo) as u128 | (xxh64::avalanche(keyed_hi) as u128) << 64
}
#[inline(always)]
fn xxh3_128_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
if input.len() > 8 {
xxh3_128_9to16(input, seed, secret)
} else if input.len() >= 4 {
xxh3_128_4to8(input, seed, secret)
} else if input.len() > 0 {
xxh3_128_1to3(input, seed, secret)
} else {
let flip_lo = read_64le_unaligned(secret, 64) ^ read_64le_unaligned(secret, 72);
let flip_hi = read_64le_unaligned(secret, 80) ^ read_64le_unaligned(secret, 88);
xxh64::avalanche(seed ^ flip_lo) as u128
| (xxh64::avalanche(seed ^ flip_hi) as u128) << 64
}
}
#[inline(always)]
fn xxh3_128_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
let mut hi = 0;
if input.len() > 32 {
if input.len() > 64 {
if input.len() > 96 {
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, 48),
get_aligned_chunk_ref(input, input.len() - 64),
get_aligned_chunk_ref(secret, 96),
seed,
);
}
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, 32),
get_aligned_chunk_ref(input, input.len() - 48),
get_aligned_chunk_ref(secret, 64),
seed,
);
}
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, 16),
get_aligned_chunk_ref(input, input.len() - 32),
get_aligned_chunk_ref(secret, 32),
seed,
);
}
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, 0),
get_aligned_chunk_ref(input, input.len() - 16),
get_aligned_chunk_ref(secret, 0),
seed,
);
to_u128!(
avalanche(lo.wrapping_add(hi)),
0u64.wrapping_sub(avalanche(
lo.wrapping_mul(xxh64::PRIME_1)
.wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
.wrapping_add(
(input.len() as u64)
.wrapping_sub(seed)
.wrapping_mul(xxh64::PRIME_2)
)
))
)
}
#[inline(never)]
fn xxh3_128_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
const START_OFFSET: usize = 3;
const LAST_OFFSET: usize = 17;
let nb_rounds = input.len() / 32;
debug_assert!(nb_rounds >= 4);
let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
let mut hi = 0;
let mut idx = 0;
while idx < 4 {
let offset_idx = 32 * idx;
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, offset_idx),
get_aligned_chunk_ref(input, offset_idx + 16),
get_aligned_chunk_ref(secret, offset_idx),
seed,
);
idx = idx.wrapping_add(1);
}
lo = avalanche(lo);
hi = avalanche(hi);
while idx < nb_rounds {
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, 32 * idx),
get_aligned_chunk_ref(input, (32 * idx) + 16),
get_aligned_chunk_ref(secret, START_OFFSET.wrapping_add(32 * (idx - 4))),
seed,
);
idx = idx.wrapping_add(1);
}
mix32_b(
&mut lo,
&mut hi,
get_aligned_chunk_ref(input, input.len() - 16),
get_aligned_chunk_ref(input, input.len() - 32),
get_aligned_chunk_ref(secret, SECRET_SIZE_MIN - LAST_OFFSET - 16),
0u64.wrapping_sub(seed),
);
to_u128!(
avalanche(lo.wrapping_add(hi)),
0u64.wrapping_sub(avalanche(
lo.wrapping_mul(xxh64::PRIME_1)
.wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
.wrapping_add(
(input.len() as u64)
.wrapping_sub(seed)
.wrapping_mul(xxh64::PRIME_2)
)
))
)
}
#[inline(always)]
fn xxh3_128_internal(
input: &[u8],
seed: u64,
secret: &[u8],
long_hash_fn: LongHashFn128,
) -> u128 {
debug_assert!(secret.len() >= SECRET_SIZE_MIN);
if input.len() <= 16 {
xxh3_128_0to16(input, seed, secret)
} else if input.len() <= 128 {
xxh3_128_7to128(input, seed, secret)
} else if input.len() <= MID_SIZE_MAX {
xxh3_128_129to240(input, seed, secret)
} else {
long_hash_fn(input, seed, secret)
}
}
#[inline(never)]
fn xxh3_128_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u128 {
xxh3_128_long_impl(input, &DEFAULT_SECRET)
}
#[inline(never)]
fn xxh3_128_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u128 {
match seed {
0 => xxh3_128_long_impl(input, &DEFAULT_SECRET),
seed => xxh3_128_long_impl(input, &custom_default_secret(seed)),
}
}
#[inline(never)]
fn xxh3_128_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u128 {
xxh3_128_long_impl(input, secret)
}
#[inline]
pub fn xxh3_128(input: &[u8]) -> u128 {
xxh3_128_internal(input, 0, &DEFAULT_SECRET, xxh3_128_long_default)
}
#[inline]
pub fn xxh3_128_with_seed(input: &[u8], seed: u64) -> u128 {
xxh3_128_internal(input, seed, &DEFAULT_SECRET, xxh3_128_long_with_seed)
}
#[inline]
pub fn xxh3_128_with_secret(input: &[u8], secret: &[u8]) -> u128 {
xxh3_128_internal(input, 0, secret, xxh3_128_long_with_secret)
}
}
#[allow(clippy::large_enum_variant)]
pub enum NoatunHasher {
Small([u8; 64], usize),
Large(Xxh3Default),
}
impl Default for NoatunHasher {
#[inline]
fn default() -> Self {
NoatunHasher::Small([0; 64], 0)
}
}
impl NoatunHasher {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[cold]
pub fn transform(&mut self, more_bytes: &[u8]) {
match self {
NoatunHasher::Small(data, ptr) => {
let mut d = Xxh3Default::default();
if *ptr > 0 {
d.update(&data[0..*ptr]);
}
d.update(more_bytes);
*self = NoatunHasher::Large(d);
}
NoatunHasher::Large(_) => {
unreachable!()
}
}
}
}
impl Hasher for NoatunHasher {
#[inline]
fn finish(&self) -> u64 {
match self {
NoatunHasher::Small(data, ptr) => xxh3::xxh3_64(&data[0..*ptr]),
NoatunHasher::Large(d) => d.digest(),
}
}
#[inline]
fn write(&mut self, a_bytes: &[u8]) {
match self {
NoatunHasher::Small(data, ptr) => {
if *ptr + a_bytes.len() <= data.len() {
data[*ptr..*ptr + a_bytes.len()].copy_from_slice(a_bytes);
*ptr += a_bytes.len();
} else {
self.transform(a_bytes);
}
}
NoatunHasher::Large(d) => {
d.update(a_bytes);
}
}
}
}
#[cfg(test)]
mod xxh3_tests {
use crate::xxh3_vendored::NoatunHasher;
use std::hash::Hasher;
#[test]
fn test_hasher() {
let mut hasher = NoatunHasher::new();
hasher.write(b"1234");
assert_eq!(hasher.finish(), 9777848219803310049);
let mut hasher = NoatunHasher::new();
hasher.write(b"5678");
hasher.write(b" (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade");
assert_eq!(hasher.finish(), 2773116291557897730);
}
#[test]
fn test_xxh3() {
let result = crate::xxh3_vendored::xxh3::xxh3_64(b"1234");
println!("{result:?}");
assert_eq!(result, 9777848219803310049);
let result = crate::xxh3_vendored::xxh3::xxh3_64(b"5678");
println!("{result:?}");
assert_eq!(result, 14901654952795293208);
let result = crate::xxh3_vendored::xxh3::xxh3_64(b"5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade");
println!("{result:?}");
assert_eq!(result, 2773116291557897730);
}
}