#![allow(unsafe_code)]
#![allow(clippy::indexing_slicing)]
use core::arch::aarch64::*;
use super::{
ACC_NB, DEFAULT_SECRET, DEFAULT_SECRET_SIZE, INITIAL_ACC, PRIME32_1, PRIME64_1, PRIME64_2, SECRET_CONSUME_RATE,
SECRET_LASTACC_START, SECRET_MERGEACCS_START, STRIPE_LEN,
};
const STRIPES_PER_BLOCK: usize = (DEFAULT_SECRET_SIZE - STRIPE_LEN) / SECRET_CONSUME_RATE;
const BLOCK_LEN: usize = STRIPE_LEN * STRIPES_PER_BLOCK;
const SCRAMBLE_SECRET_OFFSET: usize = DEFAULT_SECRET_SIZE - STRIPE_LEN;
const LAST_ACC_SECRET_OFFSET: usize = DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START;
#[inline]
#[target_feature(enable = "neon")]
unsafe fn load_acc(initial: &[u64; ACC_NB]) -> [uint64x2_t; 4] {
unsafe {
[
vld1q_u64(initial.as_ptr()),
vld1q_u64(initial.as_ptr().add(2)),
vld1q_u64(initial.as_ptr().add(4)),
vld1q_u64(initial.as_ptr().add(6)),
]
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn store_acc(acc: &[uint64x2_t; 4]) -> [u64; ACC_NB] {
unsafe {
let mut out = [0u64; ACC_NB];
vst1q_u64(out.as_mut_ptr(), acc[0]);
vst1q_u64(out.as_mut_ptr().add(2), acc[1]);
vst1q_u64(out.as_mut_ptr().add(4), acc[2]);
vst1q_u64(out.as_mut_ptr().add(6), acc[3]);
out
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn accumulate_512(acc: &mut [uint64x2_t; 4], stripe: *const u8, secret: *const u8) {
unsafe {
let mut i = 0usize;
while i < 4 {
let data_vec_1 = vreinterpretq_u64_u8(vld1q_u8(stripe.add(i.strict_mul(16))));
let data_vec_2 = vreinterpretq_u64_u8(vld1q_u8(stripe.add(i.strict_add(1).strict_mul(16))));
let key_vec_1 = vreinterpretq_u64_u8(vld1q_u8(secret.add(i.strict_mul(16))));
let key_vec_2 = vreinterpretq_u64_u8(vld1q_u8(secret.add(i.strict_add(1).strict_mul(16))));
let data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
let data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
let data_key_1 = veorq_u64(data_vec_1, key_vec_1);
let data_key_2 = veorq_u64(data_vec_2, key_vec_2);
let unzipped = vuzpq_u32(vreinterpretq_u32_u64(data_key_1), vreinterpretq_u32_u64(data_key_2));
let data_key_lo = unzipped.0;
let data_key_hi = unzipped.1;
let sum_1 = vmlal_u32(data_swap_1, vget_low_u32(data_key_lo), vget_low_u32(data_key_hi));
let sum_2 = vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
acc[i] = vaddq_u64(acc[i], sum_1);
acc[i.strict_add(1)] = vaddq_u64(acc[i.strict_add(1)], sum_2);
i = i.strict_add(2);
}
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn scramble_acc(acc: &mut [uint64x2_t; 4], secret: *const u8) {
unsafe {
let prime_lo = vdup_n_u32(PRIME32_1);
let prime_hi = vreinterpretq_u32_u64(vdupq_n_u64((PRIME32_1 as u64) << 32));
let mut i = 0usize;
while i < 4 {
let acc_vec = acc[i];
let shifted = vshrq_n_u64::<47>(acc_vec);
let data_vec = veorq_u64(acc_vec, shifted);
let key_vec = vreinterpretq_u64_u8(vld1q_u8(secret.add(i.strict_mul(16))));
let data_key = veorq_u64(data_vec, key_vec);
let prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), prime_hi);
let data_key_lo = vmovn_u64(data_key);
acc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, prime_lo);
i = i.strict_add(1);
}
}
}
#[inline(always)]
#[cfg(miri)]
unsafe fn prefetch_stripe(_input_ptr: *const u8) {}
#[inline(always)]
#[cfg(not(miri))]
unsafe fn prefetch_stripe(input_ptr: *const u8) {
unsafe {
core::arch::asm!(
"prfm pldl1keep, [{ptr}]",
ptr = in(reg) input_ptr,
options(nostack, preserves_flags)
);
}
}
#[target_feature(enable = "neon")]
unsafe fn hash_long_internal_loop<const PREFETCH: bool>(input: &[u8], secret: &[u8]) -> [u64; ACC_NB] {
unsafe {
let mut acc = load_acc(&INITIAL_ACC);
let nb_blocks = (input.len().strict_sub(1)) / BLOCK_LEN;
let mut block = 0usize;
while block < nb_blocks {
let mut stripe = 0usize;
while stripe < STRIPES_PER_BLOCK {
let input_off = block.strict_mul(BLOCK_LEN).strict_add(stripe.strict_mul(STRIPE_LEN));
let input_ptr = input.as_ptr().add(input_off);
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
if PREFETCH {
prefetch_stripe(input_ptr.wrapping_add(STRIPE_LEN));
}
accumulate_512(&mut acc, input_ptr, secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
scramble_acc(&mut acc, secret.as_ptr().add(SCRAMBLE_SECRET_OFFSET));
block = block.strict_add(1);
}
let nb_stripes_final = (input.len().strict_sub(1).strict_sub(BLOCK_LEN.strict_mul(nb_blocks))) / STRIPE_LEN;
let mut stripe = 0usize;
while stripe < nb_stripes_final {
let input_off = nb_blocks
.strict_mul(BLOCK_LEN)
.strict_add(stripe.strict_mul(STRIPE_LEN));
let input_ptr = input.as_ptr().add(input_off);
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
if PREFETCH {
prefetch_stripe(input_ptr.wrapping_add(STRIPE_LEN));
}
accumulate_512(&mut acc, input_ptr, secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
accumulate_512(
&mut acc,
input.as_ptr().add(input.len().strict_sub(STRIPE_LEN)),
secret.as_ptr().add(LAST_ACC_SECRET_OFFSET),
);
store_acc(&acc)
}
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn hash_long_internal_loop_for_len(input: &[u8], secret: &[u8]) -> [u64; ACC_NB] {
unsafe {
if input.len() <= 512 {
hash_long_internal_loop::<false>(input, secret)
} else {
hash_long_internal_loop::<true>(input, secret)
}
}
}
pub fn xxh3_64_long_default(input: &[u8]) -> u64 {
let acc = unsafe {
if input.len() <= 1024 {
hash_long_internal_loop::<false>(input, &DEFAULT_SECRET)
} else {
hash_long_internal_loop_for_len(input, &DEFAULT_SECRET)
}
};
super::merge_accs(
&acc,
&DEFAULT_SECRET,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
}
pub fn xxh3_64_long(input: &[u8], seed: u64) -> u64 {
if seed == 0 {
xxh3_64_long_default(input)
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe {
if input.len() <= 1024 {
hash_long_internal_loop::<false>(input, &secret)
} else {
hash_long_internal_loop_for_len(input, &secret)
}
};
super::merge_accs(
&acc,
&secret,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
}
}
#[cfg(any(test, feature = "diag"))]
pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
if input.len() <= 16 {
return super::xxh3_64_0to16(input, seed, &DEFAULT_SECRET);
}
if input.len() <= 128 {
return super::xxh3_64_7to128(input, seed, &DEFAULT_SECRET);
}
if input.len() <= super::MID_SIZE_MAX {
return super::xxh3_64_129to240(input, seed, &DEFAULT_SECRET);
}
xxh3_64_long(input, seed)
}
pub fn xxh3_128_long_default(input: &[u8]) -> u128 {
let acc = unsafe { hash_long_internal_loop_for_len(input, &DEFAULT_SECRET) };
xxh3_128_long_finalize(&acc, &DEFAULT_SECRET, input.len())
}
pub fn xxh3_128_long(input: &[u8], seed: u64) -> u128 {
if seed == 0 {
xxh3_128_long_default(input)
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop_for_len(input, &secret) };
xxh3_128_long_finalize(&acc, &secret, input.len())
}
}
#[inline(always)]
fn xxh3_128_long_finalize(acc: &[u64; ACC_NB], secret: &[u8], len: usize) -> u128 {
let lo = super::merge_accs(
acc,
secret,
SECRET_MERGEACCS_START,
(len as u64).wrapping_mul(PRIME64_1),
);
let hi = super::merge_accs(
acc,
secret,
secret
.len()
.strict_sub(ACC_NB.strict_mul(core::mem::size_of::<u64>()))
.strict_sub(SECRET_MERGEACCS_START),
!(len as u64).wrapping_mul(PRIME64_2),
);
(lo as u128) | ((hi as u128) << 64)
}