#![allow(unsafe_code)]
#![allow(clippy::indexing_slicing)]
use core::simd::i64x2;
use super::{
ACC_NB, DEFAULT_SECRET, INITIAL_ACC, PRIME32_1, PRIME64_1, PRIME64_2, SECRET_CONSUME_RATE, SECRET_LASTACC_START,
SECRET_MERGEACCS_START, STRIPE_LEN,
};
const BSWAP_MASK: [u8; 16] = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8];
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vag(a: i64x2, b: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vag {out}, {a}, {b}",
out = lateout(vreg) out,
a = in(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vesrlg<const SHIFT: u32>(a: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vesrlg {out}, {a}, {shift}",
out = lateout(vreg) out,
a = in(vreg) a,
shift = const SHIFT,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn veslg<const SHIFT: u32>(a: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"veslg {out}, {a}, {shift}",
out = lateout(vreg) out,
a = in(vreg) a,
shift = const SHIFT,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vmlof(a: i64x2, b: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vmlof {out}, {a}, {b}",
out = lateout(vreg) out,
a = in(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vperm(a: i64x2, mask: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vperm {out}, {a}, {a}, {mask}",
out = lateout(vreg) out,
a = in(vreg) a,
mask = in(vreg) mask,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vpdi_swap(a: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vpdi {out}, {a}, {a}, 4",
out = lateout(vreg) out,
a = in(vreg) a,
options(nomem, nostack, pure)
);
}
out
}
#[inline(always)]
unsafe fn vload_raw(ptr: *const u8) -> i64x2 {
unsafe { core::ptr::read_unaligned(ptr as *const i64x2) }
}
#[inline(always)]
unsafe fn vstore(ptr: *mut u8, val: i64x2) {
unsafe { core::ptr::write_unaligned(ptr as *mut i64x2, val) }
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vload_le(ptr: *const u8, bswap: i64x2) -> i64x2 {
unsafe { vperm(vload_raw(ptr), bswap) }
}
#[inline(always)]
unsafe fn load_bswap_mask() -> i64x2 {
unsafe { vload_raw(BSWAP_MASK.as_ptr()) }
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn load_acc(initial: &[u64; ACC_NB]) -> [i64x2; 4] {
unsafe {
let p = initial.as_ptr() as *const u8;
[
vload_raw(p),
vload_raw(p.add(16)),
vload_raw(p.add(32)),
vload_raw(p.add(48)),
]
}
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn store_acc(acc: &[i64x2; 4]) -> [u64; ACC_NB] {
unsafe {
let mut out = [0u64; ACC_NB];
let p = out.as_mut_ptr() as *mut u8;
vstore(p, acc[0]);
vstore(p.add(16), acc[1]);
vstore(p.add(32), acc[2]);
vstore(p.add(48), acc[3]);
out
}
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn accumulate_512(acc: &mut [i64x2; 4], stripe: *const u8, secret: *const u8) {
unsafe {
let bswap = load_bswap_mask();
let mut i = 0usize;
while i < 4 {
let data_vec = vload_le(stripe.add(i.strict_mul(16)), bswap);
let key_vec = vload_le(secret.add(i.strict_mul(16)), bswap);
let data_key = data_vec ^ key_vec;
let data_key_hi = vesrlg::<32>(data_key);
let product = vmlof(data_key, data_key_hi);
let data_swap = vpdi_swap(data_vec);
let sum = vag(acc[i], data_swap);
acc[i] = vag(product, sum);
i = i.strict_add(1);
}
}
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn scramble_acc(acc: &mut [i64x2; 4], secret: *const u8) {
unsafe {
let bswap = load_bswap_mask();
let prime_vec = i64x2::splat(PRIME32_1 as i64);
let mut i = 0usize;
while i < 4 {
let acc_vec = acc[i];
let shifted = vesrlg::<47>(acc_vec);
let data_vec = acc_vec ^ shifted;
let key_vec = vload_le(secret.add(i.strict_mul(16)), bswap);
let data_key = data_vec ^ key_vec;
let data_key_hi = vesrlg::<32>(data_key);
let prod_lo = vmlof(data_key, prime_vec);
let prod_hi = vmlof(data_key_hi, prime_vec);
acc[i] = vag(prod_lo, veslg::<32>(prod_hi));
i = i.strict_add(1);
}
}
}
#[target_feature(enable = "vector")]
unsafe fn hash_long_internal_loop(input: &[u8], secret: &[u8]) -> [u64; ACC_NB] {
unsafe {
let mut acc = load_acc(&INITIAL_ACC);
let nb_stripes = (secret.len().strict_sub(STRIPE_LEN)) / SECRET_CONSUME_RATE;
let block_len = STRIPE_LEN.strict_mul(nb_stripes);
let nb_blocks = (input.len().strict_sub(1)) / block_len;
let mut block = 0usize;
while block < nb_blocks {
let mut stripe = 0usize;
while stripe < nb_stripes {
let input_off = block.strict_mul(block_len).strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
scramble_acc(&mut acc, secret.as_ptr().add(secret.len().strict_sub(STRIPE_LEN)));
block = block.strict_add(1);
}
let nb_stripes_final = (input.len().strict_sub(1).strict_sub(block_len.strict_mul(nb_blocks))) / STRIPE_LEN;
let mut stripe = 0usize;
while stripe < nb_stripes_final {
let input_off = nb_blocks
.strict_mul(block_len)
.strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
accumulate_512(
&mut acc,
input.as_ptr().add(input.len().strict_sub(STRIPE_LEN)),
secret
.as_ptr()
.add(secret.len().strict_sub(STRIPE_LEN).strict_sub(SECRET_LASTACC_START)),
);
store_acc(&acc)
}
}
pub fn xxh3_64_long(input: &[u8], seed: u64) -> u64 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
super::merge_accs(
&acc,
&DEFAULT_SECRET,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
super::merge_accs(
&acc,
&secret,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
}
}
#[cfg(any(test, feature = "diag"))]
pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
if input.len() <= 16 {
return super::xxh3_64_0to16(input, seed, &DEFAULT_SECRET);
}
if input.len() <= 128 {
return super::xxh3_64_7to128(input, seed, &DEFAULT_SECRET);
}
if input.len() <= super::MID_SIZE_MAX {
return super::xxh3_64_129to240(input, seed, &DEFAULT_SECRET);
}
xxh3_64_long(input, seed)
}
pub fn xxh3_128_long(input: &[u8], seed: u64) -> u128 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
xxh3_128_long_finalize(&acc, &DEFAULT_SECRET, input.len())
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
xxh3_128_long_finalize(&acc, &secret, input.len())
}
}
#[inline(always)]
fn xxh3_128_long_finalize(acc: &[u64; ACC_NB], secret: &[u8], len: usize) -> u128 {
let lo = super::merge_accs(
acc,
secret,
SECRET_MERGEACCS_START,
(len as u64).wrapping_mul(PRIME64_1),
);
let hi = super::merge_accs(
acc,
secret,
secret
.len()
.strict_sub(ACC_NB.strict_mul(core::mem::size_of::<u64>()))
.strict_sub(SECRET_MERGEACCS_START),
!(len as u64).wrapping_mul(PRIME64_2),
);
(lo as u128) | ((hi as u128) << 64)
}