#![allow(unsafe_code)]
#![allow(clippy::indexing_slicing)]
#![allow(dead_code)]
use super::{
ACC_NB, DEFAULT_SECRET, INITIAL_ACC, PRIME32_1, PRIME64_1, PRIME64_2, SECRET_CONSUME_RATE, SECRET_LASTACC_START,
SECRET_MERGEACCS_START, STRIPE_LEN,
};
#[inline]
#[target_feature(enable = "v")]
unsafe fn accumulate_512(acc: &mut [u64; ACC_NB], stripe: *const u8, secret: *const u8) {
unsafe {
let mask: u64 = 0xFFFF_FFFF;
let shift32: u64 = 32;
core::arch::asm!(
"vsetivli zero, 2, e64, m1, ta, ma",
"vle64.v v16, ({acc})",
"addi {t1}, {acc}, 16",
"vle64.v v17, ({t1})",
"addi {t1}, {acc}, 32",
"vle64.v v18, ({t1})",
"addi {t1}, {acc}, 48",
"vle64.v v19, ({t1})",
"vle64.v v1, ({stripe})",
"vle64.v v2, ({secret})",
"vmv.x.s {t0}, v1", "vslide1down.vx v4, v1, {t0}", "vxor.vv v3, v1, v2", "vsrl.vx v2, v3, {shift32}", "vand.vx v5, v3, {mask}", "vmul.vv v3, v5, v2", "vadd.vv v3, v3, v4", "vadd.vv v16, v16, v3",
"addi {t1}, {stripe}, 16",
"vle64.v v1, ({t1})",
"addi {t1}, {secret}, 16",
"vle64.v v2, ({t1})",
"vmv.x.s {t0}, v1",
"vslide1down.vx v4, v1, {t0}",
"vxor.vv v3, v1, v2",
"vsrl.vx v2, v3, {shift32}",
"vand.vx v5, v3, {mask}",
"vmul.vv v3, v5, v2",
"vadd.vv v3, v3, v4",
"vadd.vv v17, v17, v3",
"addi {t1}, {stripe}, 32",
"vle64.v v1, ({t1})",
"addi {t1}, {secret}, 32",
"vle64.v v2, ({t1})",
"vmv.x.s {t0}, v1",
"vslide1down.vx v4, v1, {t0}",
"vxor.vv v3, v1, v2",
"vsrl.vx v2, v3, {shift32}",
"vand.vx v5, v3, {mask}",
"vmul.vv v3, v5, v2",
"vadd.vv v3, v3, v4",
"vadd.vv v18, v18, v3",
"addi {t1}, {stripe}, 48",
"vle64.v v1, ({t1})",
"addi {t1}, {secret}, 48",
"vle64.v v2, ({t1})",
"vmv.x.s {t0}, v1",
"vslide1down.vx v4, v1, {t0}",
"vxor.vv v3, v1, v2",
"vsrl.vx v2, v3, {shift32}",
"vand.vx v5, v3, {mask}",
"vmul.vv v3, v5, v2",
"vadd.vv v3, v3, v4",
"vadd.vv v19, v19, v3",
"vse64.v v16, ({acc})",
"addi {t1}, {acc}, 16",
"vse64.v v17, ({t1})",
"addi {t1}, {acc}, 32",
"vse64.v v18, ({t1})",
"addi {t1}, {acc}, 48",
"vse64.v v19, ({t1})",
acc = in(reg) acc.as_mut_ptr(),
stripe = in(reg) stripe,
secret = in(reg) secret,
mask = in(reg) mask,
shift32 = in(reg) shift32,
t0 = out(reg) _,
t1 = out(reg) _,
out("v1") _, out("v2") _, out("v3") _, out("v4") _, out("v5") _,
out("v16") _, out("v17") _, out("v18") _, out("v19") _,
options(nostack)
);
}
}
#[inline]
#[target_feature(enable = "v")]
unsafe fn scramble_acc(acc: &mut [u64; ACC_NB], secret: *const u8) {
unsafe {
let shift47: u64 = 47;
let prime: u64 = PRIME32_1 as u64;
core::arch::asm!(
"vsetivli zero, 2, e64, m1, ta, ma",
"vle64.v v16, ({acc})",
"addi {t1}, {acc}, 16",
"vle64.v v17, ({t1})",
"addi {t1}, {acc}, 32",
"vle64.v v18, ({t1})",
"addi {t1}, {acc}, 48",
"vle64.v v19, ({t1})",
"vsrl.vx v1, v16, {shift47}",
"vxor.vv v1, v16, v1", "vle64.v v2, ({secret})",
"vxor.vv v1, v1, v2", "vmul.vx v16, v1, {prime}",
"vsrl.vx v1, v17, {shift47}",
"vxor.vv v1, v17, v1",
"addi {t1}, {secret}, 16",
"vle64.v v2, ({t1})",
"vxor.vv v1, v1, v2",
"vmul.vx v17, v1, {prime}",
"vsrl.vx v1, v18, {shift47}",
"vxor.vv v1, v18, v1",
"addi {t1}, {secret}, 32",
"vle64.v v2, ({t1})",
"vxor.vv v1, v1, v2",
"vmul.vx v18, v1, {prime}",
"vsrl.vx v1, v19, {shift47}",
"vxor.vv v1, v19, v1",
"addi {t1}, {secret}, 48",
"vle64.v v2, ({t1})",
"vxor.vv v1, v1, v2",
"vmul.vx v19, v1, {prime}",
"vse64.v v16, ({acc})",
"addi {t1}, {acc}, 16",
"vse64.v v17, ({t1})",
"addi {t1}, {acc}, 32",
"vse64.v v18, ({t1})",
"addi {t1}, {acc}, 48",
"vse64.v v19, ({t1})",
acc = in(reg) acc.as_mut_ptr(),
secret = in(reg) secret,
shift47 = in(reg) shift47,
prime = in(reg) prime,
t1 = out(reg) _,
out("v1") _, out("v2") _,
out("v16") _, out("v17") _, out("v18") _, out("v19") _,
options(nostack)
);
}
}
#[target_feature(enable = "v")]
unsafe fn hash_long_internal_loop(input: &[u8], secret: &[u8]) -> [u64; ACC_NB] {
unsafe {
let mut acc = INITIAL_ACC;
let nb_stripes = (secret.len().strict_sub(STRIPE_LEN)) / SECRET_CONSUME_RATE;
let block_len = STRIPE_LEN.strict_mul(nb_stripes);
let nb_blocks = (input.len().strict_sub(1)) / block_len;
let mut block = 0usize;
while block < nb_blocks {
let mut stripe = 0usize;
while stripe < nb_stripes {
let input_off = block.strict_mul(block_len).strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
debug_assert!(input_off.strict_add(STRIPE_LEN) <= input.len());
debug_assert!(secret_off.strict_add(STRIPE_LEN) <= secret.len());
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
scramble_acc(&mut acc, secret.as_ptr().add(secret.len().strict_sub(STRIPE_LEN)));
block = block.strict_add(1);
}
let nb_stripes_final = (input.len().strict_sub(1).strict_sub(block_len.strict_mul(nb_blocks))) / STRIPE_LEN;
let mut stripe = 0usize;
while stripe < nb_stripes_final {
let input_off = nb_blocks
.strict_mul(block_len)
.strict_add(stripe.strict_mul(STRIPE_LEN));
let secret_off = stripe.strict_mul(SECRET_CONSUME_RATE);
debug_assert!(input_off.strict_add(STRIPE_LEN) <= input.len());
debug_assert!(secret_off.strict_add(STRIPE_LEN) <= secret.len());
accumulate_512(&mut acc, input.as_ptr().add(input_off), secret.as_ptr().add(secret_off));
stripe = stripe.strict_add(1);
}
accumulate_512(
&mut acc,
input.as_ptr().add(input.len().strict_sub(STRIPE_LEN)),
secret
.as_ptr()
.add(secret.len().strict_sub(STRIPE_LEN).strict_sub(SECRET_LASTACC_START)),
);
acc
}
}
pub fn xxh3_64_long(input: &[u8], seed: u64) -> u64 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
super::merge_accs(
&acc,
&DEFAULT_SECRET,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
super::merge_accs(
&acc,
&secret,
SECRET_MERGEACCS_START,
(input.len() as u64).wrapping_mul(PRIME64_1),
)
}
}
#[cfg(any(test, feature = "diag"))]
pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
if input.len() <= 16 {
return super::xxh3_64_0to16(input, seed, &DEFAULT_SECRET);
}
if input.len() <= 128 {
return super::xxh3_64_7to128(input, seed, &DEFAULT_SECRET);
}
if input.len() <= super::MID_SIZE_MAX {
return super::xxh3_64_129to240(input, seed, &DEFAULT_SECRET);
}
xxh3_64_long(input, seed)
}
pub fn xxh3_128_long(input: &[u8], seed: u64) -> u128 {
if seed == 0 {
let acc = unsafe { hash_long_internal_loop(input, &DEFAULT_SECRET) };
xxh3_128_long_finalize(&acc, &DEFAULT_SECRET, input.len())
} else {
let secret = super::custom_default_secret(seed);
let acc = unsafe { hash_long_internal_loop(input, &secret) };
xxh3_128_long_finalize(&acc, &secret, input.len())
}
}
#[inline(always)]
fn xxh3_128_long_finalize(acc: &[u64; ACC_NB], secret: &[u8], len: usize) -> u128 {
let lo = super::merge_accs(
acc,
secret,
SECRET_MERGEACCS_START,
(len as u64).wrapping_mul(PRIME64_1),
);
let hi = super::merge_accs(
acc,
secret,
secret
.len()
.strict_sub(ACC_NB.strict_mul(core::mem::size_of::<u64>()))
.strict_sub(SECRET_MERGEACCS_START),
!(len as u64).wrapping_mul(PRIME64_2),
);
(lo as u128) | ((hi as u128) << 64)
}