#![allow(unsafe_code)]
#![allow(clippy::indexing_slicing)]
use core::simd::i64x2;
use super::{
ACC_NB, DEFAULT_SECRET, INITIAL_ACC, PRIME32_1, PRIME64_1, PRIME64_2, SECRET_CONSUME_RATE, SECRET_LASTACC_START,
SECRET_MERGEACCS_START, STRIPE_LEN,
};
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn vadd_u64(a: i64x2, b: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vaddudm {out}, {a}, {b}",
out = lateout(vreg) out,
a = in(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn vshr_u64(a: i64x2, shift: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vsrd {out}, {a}, {shift}",
out = lateout(vreg) out,
a = in(vreg) a,
shift = in(vreg) shift,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn vshl_u64(a: i64x2, shift: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vsld {out}, {a}, {shift}",
out = lateout(vreg) out,
a = in(vreg) a,
shift = in(vreg) shift,
options(nomem, nostack, pure)
);
}
out
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn vmul_low32(a: i64x2, b: i64x2) -> i64x2 {
let out: i64x2;
unsafe {
core::arch::asm!(
"vmulouw {out}, {a}, {b}",
out = lateout(vreg) out,
a = in(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
}
out
}
#[inline(always)]
unsafe fn vload(ptr: *const u8) -> i64x2 {
unsafe { core::ptr::read_unaligned(ptr as *const i64x2) }
}
#[inline(always)]
unsafe fn vstore(ptr: *mut u8, val: i64x2) {
unsafe { core::ptr::write_unaligned(ptr as *mut i64x2, val) }
}
#[inline(always)]
fn vswap(a: i64x2) -> i64x2 {
core::simd::simd_swizzle!(a, [1, 0])
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn load_acc(initial: &[u64; ACC_NB]) -> [i64x2; 4] {
unsafe {
let p = initial.as_ptr() as *const u8;
[vload(p), vload(p.add(16)), vload(p.add(32)), vload(p.add(48))]
}
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn store_acc(acc: &[i64x2; 4]) -> [u64; ACC_NB] {
unsafe {
let mut out = [0u64; ACC_NB];
let p = out.as_mut_ptr() as *mut u8;
vstore(p, acc[0]);
vstore(p.add(16), acc[1]);
vstore(p.add(32), acc[2]);
vstore(p.add(48), acc[3]);
out
}
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn accumulate_512(acc: &mut [i64x2; 4], stripe: *const u8, secret: *const u8) {
unsafe {
let shift_32 = i64x2::splat(32);
let mut i = 0usize;
while i < 4 {
let data_vec = vload(stripe.add(i.strict_mul(16)));
let key_vec = vload(secret.add(i.strict_mul(16)));
let data_key = data_vec ^ key_vec;
let data_key_hi = vshr_u64(data_key, shift_32);
let product = vmul_low32(data_key, data_key_hi);
let data_swap = vswap(data_vec);
let sum = vadd_u64(acc[i], data_swap);
acc[i] = vadd_u64(product, sum);
i = i.strict_add(1);
}
}
}
#[inline]
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn scramble_acc(acc: &mut [i64x2; 4], secret: *const u8) {
unsafe {
let prime_vec = i64x2::splat(PRIME32_1 as i64);
let shift_47 = i64x2::splat(47);
let shift_32 = i64x2::splat(32);
let mut i = 0usize;
while i < 4 {
let acc_vec = acc[i];
let shifted = vshr_u64(acc_vec, shift_47);
let data_vec = acc_vec ^ shifted;
let key_vec = vload(secret.add(i.strict_mul(16)));
let data_key = data_vec ^ key_vec;
let data_key_hi = vshr_u64(data_key, shift_32);
let prod_lo = vmul_low32(data_key, prime_vec);
let prod_hi = vmul_low32(data_key_hi, prime_vec);
acc[i] = vadd_u64(prod_lo, vshl_u64(prod_hi, shift_32));
i = i.strict_add(1);
}
}
}
#[target_feature(enable = "altivec", enable = "vsx", enable = "power8-vector")]
unsafe fn hash_long_internal_loop(input: &[u8], secret: &[u8]) -> [u64; ACC_NB] {
unsafe {
let mut acc = load_acc(&INITIAL_ACC);
let nb_stripes = (secret.len().strict_sub(STRIPE_LEN)) / SECRET_CONSUME_RATE;
let block_len = STRIPE_LEN.strict_mul(nb_stripes);
let nb_blocks = (input.len().strict_sub(1)) / block_len;
let mut block = 0usize;
while block < nb_blocks {
let mut stripe = 0usize;
while stripe < nb_stripes {
let input_off = block.strict_mul(block_len).strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
scramble_acc(&mut acc, secret.as_ptr().add(secret.len().strict_sub(STRIPE_LEN)));
block = block.strict_add(1);
}
let nb_stripes_final = (input.len().strict_sub(1).strict_sub(block_len.strict_mul(nb_blocks))) / STRIPE_LEN;
let mut stripe = 0usize;
while stripe < nb_stripes_final {
let input_off = nb_blocks
.strict_mul(block_len)
.strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
accumulate_512(
&mut acc,
input.as_ptr().add(input.len().strict_sub(STRIPE_LEN)),
secret
.as_ptr()
.add(secret.len().strict_sub(STRIPE_LEN).strict_sub(SECRET_LASTACC_START)),
);
store_acc(&acc)
}
}
pub fn xxh3_64_long(input: &[u8], seed: u64) -> u64 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
super::merge_accs(
&acc,
&DEFAULT_SECRET,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
super::merge_accs(
&acc,
&secret,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
}
}
#[cfg(any(test, feature = "diag"))]
pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
if input.len() <= 16 {
return super::xxh3_64_0to16(input, seed, &DEFAULT_SECRET);
}
if input.len() <= 128 {
return super::xxh3_64_7to128(input, seed, &DEFAULT_SECRET);
}
if input.len() <= super::MID_SIZE_MAX {
return super::xxh3_64_129to240(input, seed, &DEFAULT_SECRET);
}
xxh3_64_long(input, seed)
}
pub fn xxh3_128_long(input: &[u8], seed: u64) -> u128 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
xxh3_128_long_finalize(&acc, &DEFAULT_SECRET, input.len())
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
xxh3_128_long_finalize(&acc, &secret, input.len())
}
}
#[inline(always)]
fn xxh3_128_long_finalize(acc: &[u64; ACC_NB], secret: &[u8], len: usize) -> u128 {
let lo = super::merge_accs(
acc,
secret,
SECRET_MERGEACCS_START,
(len as u64).wrapping_mul(PRIME64_1),
);
let hi = super::merge_accs(
acc,
secret,
secret
.len()
.strict_sub(ACC_NB.strict_mul(core::mem::size_of::<u64>()))
.strict_sub(SECRET_MERGEACCS_START),
!(len as u64).wrapping_mul(PRIME64_2),
);
(lo as u128) | ((hi as u128) << 64)
}