noatun 0.1.3 - Docs.rs

// This is a vendored version of xxhash-rust:
/*
License
Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
// The reason for vendoring is that this makes it possible for us to
// guarantee that its behavior will never change.
#![allow(clippy::len_zero)]
#![allow(clippy::needless_borrow)]
#![allow(clippy::identity_op)]
#![allow(unused)]
#![allow(clippy::undocumented_unsafe_blocks)] // This is vendored code

use crate::xxh3_vendored::xxh3::Xxh3Default;
use std::hash::Hasher;

pub(crate) mod xxh32_common {
    #![allow(unused)]

    use core::mem;

    pub const CHUNK_SIZE: usize = mem::size_of::<u32>() * 4;
    pub const PRIME_1: u32 = 0x9E3779B1;
    pub const PRIME_2: u32 = 0x85EBCA77;
    pub const PRIME_3: u32 = 0xC2B2AE3D;
    pub const PRIME_4: u32 = 0x27D4EB2F;
    pub const PRIME_5: u32 = 0x165667B1;

    #[inline]
    pub const fn round(acc: u32, input: u32) -> u32 {
        acc.wrapping_add(input.wrapping_mul(PRIME_2))
            .rotate_left(13)
            .wrapping_mul(PRIME_1)
    }

    #[inline]
    pub const fn avalanche(mut input: u32) -> u32 {
        input ^= input >> 15;
        input = input.wrapping_mul(PRIME_2);
        input ^= input >> 13;
        input = input.wrapping_mul(PRIME_3);
        input ^= input >> 16;
        input
    }
}
pub(crate) mod xxh64_common {
    #![allow(unused)]

    use core::mem;

    pub const CHUNK_SIZE: usize = mem::size_of::<u64>() * 4;
    pub const PRIME_1: u64 = 0x9E3779B185EBCA87;
    pub const PRIME_2: u64 = 0xC2B2AE3D27D4EB4F;
    pub const PRIME_3: u64 = 0x165667B19E3779F9;
    pub const PRIME_4: u64 = 0x85EBCA77C2B2AE63;
    pub const PRIME_5: u64 = 0x27D4EB2F165667C5;

    #[inline]
    pub const fn round(acc: u64, input: u64) -> u64 {
        acc.wrapping_add(input.wrapping_mul(PRIME_2))
            .rotate_left(31)
            .wrapping_mul(PRIME_1)
    }

    #[inline]
    pub const fn merge_round(mut acc: u64, val: u64) -> u64 {
        acc ^= round(0, val);
        acc.wrapping_mul(PRIME_1).wrapping_add(PRIME_4)
    }

    #[inline]
    pub const fn avalanche(mut input: u64) -> u64 {
        input ^= input >> 33;
        input = input.wrapping_mul(PRIME_2);
        input ^= input >> 29;
        input = input.wrapping_mul(PRIME_3);
        input ^= input >> 32;
        input
    }
}

pub(crate) mod xxh3_common {
    use core::mem;

    pub const STRIPE_LEN: usize = 64;
    pub const SECRET_CONSUME_RATE: usize = 8;
    pub const ACC_NB: usize = STRIPE_LEN / mem::size_of::<u64>();

    pub const SECRET_MERGEACCS_START: usize = 11;
    pub const SECRET_LASTACC_START: usize = 7; //not aligned on 8, last secret is different from acc & scrambler

    pub const MID_SIZE_MAX: usize = 240;
    pub const SECRET_SIZE_MIN: usize = 136;
    pub const DEFAULT_SECRET_SIZE: usize = 192;
    pub const DEFAULT_SECRET: [u8; DEFAULT_SECRET_SIZE] = [
        0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad,
        0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3,
        0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc,
        0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6,
        0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65,
        0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19,
        0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9,
        0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31,
        0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb,
        0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0,
        0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d,
        0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f,
        0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
    ];

    #[inline(always)]
    pub const fn xorshift64(value: u64, shift: u64) -> u64 {
        value ^ (value >> shift)
    }

    #[inline(always)]
    pub const fn avalanche(mut value: u64) -> u64 {
        value = xorshift64(value, 37);
        value = value.wrapping_mul(0x165667919E3779F9);
        xorshift64(value, 32)
    }

    #[inline(always)]
    pub const fn strong_avalanche(mut value: u64, len: u64) -> u64 {
        value ^= value.rotate_left(49) ^ value.rotate_left(24);
        value = value.wrapping_mul(0x9FB21C651E98DF25);
        value ^= (value >> 35).wrapping_add(len);
        value = value.wrapping_mul(0x9FB21C651E98DF25);
        xorshift64(value, 28)
    }

    #[inline(always)]
    pub const fn mul64_to128(left: u64, right: u64) -> (u64, u64) {
        let product = left as u128 * right as u128;
        (product as u64, (product >> 64) as u64)
    }

    #[inline(always)]
    pub const fn mul128_fold64(left: u64, right: u64) -> u64 {
        let (low, high) = mul64_to128(left, right);
        low ^ high
    }
}

pub(crate) mod utils {
    //! Utilities of the crate
    use core::{mem, ptr};

    #[inline(always)]
    pub const fn get_aligned_chunk_ref<T: Copy>(input: &[u8], offset: usize) -> &T {
        debug_assert!(mem::size_of::<T>() > 0); //Size MUST be positive
        debug_assert!(mem::size_of::<T>() <= input.len().saturating_sub(offset)); //Must fit

        // Safety: input is a valid slice
        unsafe { &*(input.as_ptr().add(offset) as *const T) }
    }

    #[allow(unused)]
    #[inline(always)]
    pub const fn get_aligned_chunk<T: Copy>(input: &[u8], offset: usize) -> T {
        *get_aligned_chunk_ref(input, offset)
    }

    #[inline(always)]
    pub fn get_unaligned_chunk<T: Copy>(input: &[u8], offset: usize) -> T {
        debug_assert!(mem::size_of::<T>() > 0); //Size MUST be positive
        debug_assert!(mem::size_of::<T>() <= input.len().saturating_sub(offset)); //Must fit

        // Safety: input is a valid slice
        unsafe { ptr::read_unaligned(input.as_ptr().add(offset) as *const T) }
    }

    #[derive(Debug)]
    pub struct Buffer<T> {
        pub ptr: T,
        pub len: usize,
        pub offset: usize,
    }

    impl Buffer<*mut u8> {
        #[inline(always)]
        pub fn copy_from_slice(&self, src: &[u8]) {
            self.copy_from_slice_by_size(src, src.len())
        }

        #[inline(always)]
        pub fn copy_from_slice_by_size(&self, src: &[u8], len: usize) {
            debug_assert!(self.len.saturating_sub(self.offset) >= len);

            // Safety: src is a valid slice, self.ptr is valid
            unsafe {
                ptr::copy_nonoverlapping(src.as_ptr(), self.ptr.add(self.offset), len);
            }
        }
    }
}

pub mod xxh3 {
    //!XXH3 implementation
    //!
    //!Provides `Hasher` only for 64bit as 128bit variant would not be much different due to trait
    //!being limited to `u64` outputs.

    use core::{hash, mem, ptr, slice};

    use super::utils::{get_aligned_chunk_ref, get_unaligned_chunk, Buffer};
    use super::xxh32_common as xxh32;
    use super::xxh3_common::*;
    use super::xxh64_common as xxh64;

    // Code is as close to original C implementation as possible
    // It does make it look ugly, but it is fast and easy to update once xxhash gets new version.

    #[cfg(all(
        any(
            target_feature = "sse2",
            target_feature = "neon",
            all(target_family = "wasm", target_feature = "simd128")
        ),
        not(target_feature = "avx2")
    ))]
    #[repr(align(16))]
    #[derive(Clone)]
    struct Acc([u64; ACC_NB]);
    #[cfg(target_feature = "avx2")]
    #[repr(align(32))]
    #[derive(Clone)]
    struct Acc([u64; ACC_NB]);
    #[cfg(not(any(
        target_feature = "avx2",
        target_feature = "neon",
        all(target_family = "wasm", target_feature = "simd128"),
        target_feature = "sse2"
    )))]
    #[repr(align(8))]
    #[derive(Clone)]
    struct Acc([u64; ACC_NB]);

    const INITIAL_ACC: Acc = Acc([
        xxh32::PRIME_3 as u64,
        xxh64::PRIME_1,
        xxh64::PRIME_2,
        xxh64::PRIME_3,
        xxh64::PRIME_4,
        xxh32::PRIME_2 as u64,
        xxh64::PRIME_5,
        xxh32::PRIME_1 as u64,
    ]);

    type LongHashFn = fn(&[u8], u64, &[u8]) -> u64;
    type LongHashFn128 = fn(&[u8], u64, &[u8]) -> u128;

    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    type StripeLanes = [[u8; mem::size_of::<core::arch::wasm32::v128>()];
        STRIPE_LEN / mem::size_of::<core::arch::wasm32::v128>()];
    #[cfg(all(target_arch = "x86", target_feature = "avx2"))]
    type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m256i>()];
        STRIPE_LEN / mem::size_of::<core::arch::x86::__m256i>()];
    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
    type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m256i>()];
        STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m256i>()];
    #[cfg(all(
        target_arch = "x86",
        target_feature = "sse2",
        not(target_feature = "avx2")
    ))]
    type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m128i>()];
        STRIPE_LEN / mem::size_of::<core::arch::x86::__m128i>()];
    #[cfg(all(
        target_arch = "x86_64",
        target_feature = "sse2",
        not(target_feature = "avx2")
    ))]
    type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m128i>()];
        STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m128i>()];
    #[cfg(target_feature = "neon")]
    type StripeLanes = [[u8; mem::size_of::<core::arch::aarch64::uint8x16_t>()];
        STRIPE_LEN / mem::size_of::<core::arch::aarch64::uint8x16_t>()];

    #[cfg(any(target_feature = "sse2", target_feature = "avx2"))]
    #[inline]
    const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
        ((z << 6) | (y << 4) | (x << 2) | w) as i32
    }

    #[inline(always)]
    const fn mult32_to64(left: u32, right: u32) -> u64 {
        (left as u64).wrapping_mul(right as u64)
    }

    macro_rules! to_u128 {
        ($lo:expr, $hi:expr) => {
            ($lo) as u128 | ((($hi) as u128) << 64)
        };
    }

    macro_rules! slice_offset_ptr {
        ($slice:expr, $offset:expr) => {{
            let slice = $slice;
            let offset = $offset;
            debug_assert!(slice.len() >= offset);

            #[allow(unused_unsafe)]
            unsafe {
                (slice.as_ptr() as *const u8).add(offset)
            }
        }};
    }

    #[inline(always)]
    fn read_32le_unaligned(data: &[u8], offset: usize) -> u32 {
        u32::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
    }

    #[inline(always)]
    fn read_64le_unaligned(data: &[u8], offset: usize) -> u64 {
        u64::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
    }

    #[inline(always)]
    fn mix_two_accs(acc: &mut Acc, offset: usize, secret: &[[u8; 8]; 2]) -> u64 {
        mul128_fold64(
            acc.0[offset] ^ u64::from_ne_bytes(secret[0]).to_le(),
            acc.0[offset + 1] ^ u64::from_ne_bytes(secret[1]).to_le(),
        )
    }

    #[inline]
    fn merge_accs(acc: &mut Acc, secret: &[[[u8; 8]; 2]; 4], mut result: u64) -> u64 {
        macro_rules! mix_two_accs {
            ($idx:literal) => {
                result = result.wrapping_add(mix_two_accs(acc, $idx * 2, &secret[$idx]))
            };
        }

        mix_two_accs!(0);
        mix_two_accs!(1);
        mix_two_accs!(2);
        mix_two_accs!(3);

        avalanche(result)
    }

    #[inline(always)]
    fn mix16_b(input: &[[u8; 8]; 2], secret: &[[u8; 8]; 2], seed: u64) -> u64 {
        let mut input_lo = u64::from_ne_bytes(input[0]).to_le();
        let mut input_hi = u64::from_ne_bytes(input[1]).to_le();

        input_lo ^= u64::from_ne_bytes(secret[0]).to_le().wrapping_add(seed);
        input_hi ^= u64::from_ne_bytes(secret[1]).to_le().wrapping_sub(seed);

        mul128_fold64(input_lo, input_hi)
    }

    #[inline(always)]
    //Inputs are two chunks of unaligned u64
    //Secret are two chunks of unaligned (u64, u64)
    fn mix32_b(
        lo: &mut u64,
        hi: &mut u64,
        input_1: &[[u8; 8]; 2],
        input_2: &[[u8; 8]; 2],
        secret: &[[[u8; 8]; 2]; 2],
        seed: u64,
    ) {
        *lo = lo.wrapping_add(mix16_b(input_1, &secret[0], seed));
        *lo ^= u64::from_ne_bytes(input_2[0])
            .to_le()
            .wrapping_add(u64::from_ne_bytes(input_2[1]).to_le());

        *hi = hi.wrapping_add(mix16_b(input_2, &secret[1], seed));
        *hi ^= u64::from_ne_bytes(input_1[0])
            .to_le()
            .wrapping_add(u64::from_ne_bytes(input_1[1]).to_le());
    }

    #[inline(always)]
    fn custom_default_secret(seed: u64) -> [u8; DEFAULT_SECRET_SIZE] {
        let mut result = mem::MaybeUninit::<[u8; DEFAULT_SECRET_SIZE]>::uninit();

        let nb_rounds = DEFAULT_SECRET_SIZE / 16;

        for idx in 0..nb_rounds {
            let low = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16)
                .to_le()
                .wrapping_add(seed);
            let hi = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16 + 8)
                .to_le()
                .wrapping_sub(seed);

            Buffer {
                ptr: result.as_mut_ptr() as *mut u8,
                len: DEFAULT_SECRET_SIZE,
                offset: idx * 16,
            }
            .copy_from_slice(&low.to_le_bytes());
            Buffer {
                ptr: result.as_mut_ptr() as *mut u8,
                len: DEFAULT_SECRET_SIZE,
                offset: idx * 16 + 8,
            }
            .copy_from_slice(&hi.to_le_bytes());
        }

        // Safety: result has been written to
        unsafe { result.assume_init() }
    }

    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    fn accumulate_512_wasm(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
        const LANES: usize = ACC_NB;

        use core::arch::wasm32::*;

        let mut idx = 0usize;
        let xacc = acc.0.as_mut_ptr() as *mut v128;

        unsafe {
            while idx.wrapping_add(1) < LANES / 2 {
                let data_vec_1 = v128_load(input[idx].as_ptr() as _);
                let data_vec_2 = v128_load(input[idx.wrapping_add(1)].as_ptr() as _);

                let key_vec_1 = v128_load(secret[idx].as_ptr() as _);
                let key_vec_2 = v128_load(secret[idx.wrapping_add(1)].as_ptr() as _);

                let data_key_1 = v128_xor(data_vec_1, key_vec_1);
                let data_key_2 = v128_xor(data_vec_2, key_vec_2);

                let data_swap_1 = i64x2_shuffle::<1, 0>(data_vec_1, data_vec_1);
                let data_swap_2 = i64x2_shuffle::<1, 0>(data_vec_2, data_vec_2);

                let mixed_lo = i32x4_shuffle::<0, 2, 4, 6>(data_key_1, data_key_2);
                let mixed_hi = i32x4_shuffle::<1, 3, 5, 7>(data_key_1, data_key_2);

                let prod_1 = u64x2_extmul_low_u32x4(mixed_lo, mixed_hi);
                let prod_2 = u64x2_extmul_high_u32x4(mixed_lo, mixed_hi);

                let sum_1 = i64x2_add(prod_1, data_swap_1);
                let sum_2 = i64x2_add(prod_2, data_swap_2);

                xacc.add(idx).write(i64x2_add(sum_1, *xacc.add(idx)));
                xacc.add(idx.wrapping_add(1))
                    .write(i64x2_add(sum_2, *xacc.add(idx.wrapping_add(1))));

                idx = idx.wrapping_add(2);
            }
        }
    }

    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
    macro_rules! vld1q_u8 {
        ($ptr:expr) => {
            core::arch::aarch64::vld1q_u8($ptr)
        };
    }

    //For some dumb reasons vld1q_u8 is unstable for arm
    #[cfg(all(target_arch = "arm", target_feature = "neon"))]
    macro_rules! vld1q_u8 {
        ($ptr:expr) => {
            core::ptr::read_unaligned($ptr as *const core::arch::arm::uint8x16_t)
        };
    }

    #[cfg(target_feature = "neon")]
    fn accumulate_512_neon(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
        //Full Neon version from xxhash source
        const NEON_LANES: usize = ACC_NB;

        unsafe {
            #[cfg(target_arch = "aarch64")]
            use core::arch::aarch64::*;
            #[cfg(target_arch = "arm")]
            use core::arch::arm::*;

            let mut idx = 0usize;
            let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;

            while idx.wrapping_add(1) < NEON_LANES / 2 {
                /* data_vec = xinput[i]; */
                let data_vec_1 = vreinterpretq_u64_u8(vld1q_u8!(input[idx].as_ptr()));
                let data_vec_2 =
                    vreinterpretq_u64_u8(vld1q_u8!(input[idx.wrapping_add(1)].as_ptr()));
                /* key_vec  = xsecret[i];  */
                let key_vec_1 = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
                let key_vec_2 =
                    vreinterpretq_u64_u8(vld1q_u8!(secret[idx.wrapping_add(1)].as_ptr()));
                /* data_swap = swap(data_vec) */
                let data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
                let data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
                /* data_key = data_vec ^ key_vec; */
                let data_key_1 = veorq_u64(data_vec_1, key_vec_1);
                let data_key_2 = veorq_u64(data_vec_2, key_vec_2);

                let unzipped = vuzpq_u32(
                    vreinterpretq_u32_u64(data_key_1),
                    vreinterpretq_u32_u64(data_key_2),
                );
                /* data_key_lo = data_key & 0xFFFFFFFF */
                let data_key_lo = unzipped.0;
                /* data_key_hi = data_key >> 32 */
                let data_key_hi = unzipped.1;

                //xxhash does it with inline assembly, but idk if I want to embed it here
                let sum_1 = vmlal_u32(
                    data_swap_1,
                    vget_low_u32(data_key_lo),
                    vget_low_u32(data_key_hi),
                );
                #[cfg(target_arch = "aarch64")]
                let sum_2 = vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
                #[cfg(target_arch = "arm")]
                let sum_2 = vmlal_u32(
                    data_swap_2,
                    vget_high_u32(data_key_lo),
                    vget_high_u32(data_key_hi),
                );

                xacc.add(idx).write(vaddq_u64(*xacc.add(idx), sum_1));
                xacc.add(idx.wrapping_add(1))
                    .write(vaddq_u64(*xacc.add(idx.wrapping_add(1)), sum_2));

                idx = idx.wrapping_add(2);
            }
        }
    }

    #[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
    fn accumulate_512_sse2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
        unsafe {
            #[cfg(target_arch = "x86")]
            use core::arch::x86::*;
            #[cfg(target_arch = "x86_64")]
            use core::arch::x86_64::*;

            let xacc = acc.0.as_mut_ptr() as *mut __m128i;

            for idx in 0..secret.len() {
                let data_vec = _mm_loadu_si128(input[idx].as_ptr() as _);
                let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
                let data_key = _mm_xor_si128(data_vec, key_vec);

                let data_key_lo = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
                let product = _mm_mul_epu32(data_key, data_key_lo);

                let data_swap = _mm_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2));
                let sum = _mm_add_epi64(*xacc.add(idx), data_swap);
                xacc.add(idx).write(_mm_add_epi64(product, sum));
            }
        }
    }

    #[cfg(target_feature = "avx2")]
    fn accumulate_512_avx2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
        unsafe {
            #[cfg(target_arch = "x86")]
            use core::arch::x86::*;
            #[cfg(target_arch = "x86_64")]
            use core::arch::x86_64::*;

            let xacc = acc.0.as_mut_ptr() as *mut __m256i;

            for idx in 0..secret.len() {
                let data_vec = _mm256_loadu_si256(input[idx].as_ptr() as _);
                let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
                let data_key = _mm256_xor_si256(data_vec, key_vec);

                let data_key_lo = _mm256_srli_epi64(data_key, 32);
                let product = _mm256_mul_epu32(data_key, data_key_lo);

                let data_swap = _mm256_shuffle_epi32(data_vec, _mm_shuffle(1, 0, 3, 2));
                let sum = _mm256_add_epi64(*xacc.add(idx), data_swap);
                xacc.add(idx).write(_mm256_add_epi64(product, sum));
            }
        }
    }

    #[cfg(not(any(
        target_feature = "avx2",
        target_feature = "sse2",
        target_feature = "neon",
        all(target_family = "wasm", target_feature = "simd128")
    )))]
    fn accumulate_512_scalar(acc: &mut Acc, input: &[[u8; 8]; ACC_NB], secret: &[[u8; 8]; ACC_NB]) {
        for idx in 0..ACC_NB {
            let data_val = u64::from_ne_bytes(input[idx]).to_le();
            let data_key = data_val ^ u64::from_ne_bytes(secret[idx]).to_le();

            acc.0[idx ^ 1] = acc.0[idx ^ 1].wrapping_add(data_val);
            acc.0[idx] = acc.0[idx].wrapping_add(mult32_to64(
                (data_key & 0xFFFFFFFF) as u32,
                (data_key >> 32) as u32,
            ));
        }
    }

    #[cfg(target_feature = "avx2")]
    use accumulate_512_avx2 as accumulate_512;
    #[cfg(target_feature = "neon")]
    use accumulate_512_neon as accumulate_512;
    #[cfg(not(any(
        target_feature = "avx2",
        target_feature = "sse2",
        target_feature = "neon",
        all(target_family = "wasm", target_feature = "simd128")
    )))]
    use accumulate_512_scalar as accumulate_512;
    #[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
    use accumulate_512_sse2 as accumulate_512;
    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    use accumulate_512_wasm as accumulate_512;

    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    fn scramble_acc_wasm(acc: &mut Acc, secret: &StripeLanes) {
        use core::arch::wasm32::*;

        let xacc = acc.0.as_mut_ptr() as *mut v128;
        let prime = u64x2_splat(xxh32::PRIME_1 as _);

        unsafe {
            for idx in 0..secret.len() {
                let acc_vec = v128_load(xacc.add(idx) as _);
                let shifted = u64x2_shr(acc_vec, 47);
                let data_vec = v128_xor(acc_vec, shifted);
                let key_vec = v128_load(secret[idx].as_ptr() as _);
                let mixed = v128_xor(data_vec, key_vec);
                xacc.add(idx).write(i64x2_mul(mixed, prime));
            }
        }
    }

    #[cfg(target_feature = "neon")]
    fn scramble_acc_neon(acc: &mut Acc, secret: &StripeLanes) {
        //Full Neon version from xxhash source
        unsafe {
            #[cfg(target_arch = "aarch64")]
            use core::arch::aarch64::*;
            #[cfg(target_arch = "arm")]
            use core::arch::arm::*;

            let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;

            let prime_low = vdup_n_u32(xxh32::PRIME_1);
            let prime_hi = vreinterpretq_u32_u64(vdupq_n_u64((xxh32::PRIME_1 as u64) << 32));

            for idx in 0..secret.len() {
                /* xacc[i] ^= (xacc[i] >> 47); */
                let acc_vec = *xacc.add(idx);
                let shifted = vshrq_n_u64(acc_vec, 47);
                let data_vec = veorq_u64(acc_vec, shifted);

                /* xacc[i] ^= xsecret[i]; */
                //According to xxhash sources you can do unaligned read here
                //but since Rust is kinda retarded about unaligned reads I'll avoid it for now
                let key_vec = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
                let data_key = veorq_u64(data_vec, key_vec);

                let prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), prime_hi);
                let data_key_lo = vmovn_u64(data_key);
                xacc.add(idx).write(vmlal_u32(
                    vreinterpretq_u64_u32(prod_hi),
                    data_key_lo,
                    prime_low,
                ));
            }
        }
    }

    #[allow(clippy::needless_range_loop)]
    #[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
    fn scramble_acc_sse2(acc: &mut Acc, secret: &StripeLanes) {
        unsafe {
            #[cfg(target_arch = "x86")]
            use core::arch::x86::*;
            #[cfg(target_arch = "x86_64")]
            use core::arch::x86_64::*;

            let xacc = acc.0.as_mut_ptr() as *mut __m128i;
            let prime32 = _mm_set1_epi32(xxh32::PRIME_1 as i32);

            for idx in 0..secret.len() {
                let acc_vec = *xacc.add(idx);
                let shifted = _mm_srli_epi64(acc_vec, 47);
                let data_vec = _mm_xor_si128(acc_vec, shifted);

                let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
                let data_key = _mm_xor_si128(data_vec, key_vec);

                let data_key_hi = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
                let prod_lo = _mm_mul_epu32(data_key, prime32);
                let prod_hi = _mm_mul_epu32(data_key_hi, prime32);
                xacc.add(idx)
                    .write(_mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)));
            }
        }
    }

    #[cfg(target_feature = "avx2")]
    fn scramble_acc_avx2(acc: &mut Acc, secret: &StripeLanes) {
        unsafe {
            #[cfg(target_arch = "x86")]
            use core::arch::x86::*;
            #[cfg(target_arch = "x86_64")]
            use core::arch::x86_64::*;

            let xacc = acc.0.as_mut_ptr() as *mut __m256i;
            let prime32 = _mm256_set1_epi32(xxh32::PRIME_1 as i32);

            for idx in 0..secret.len() {
                let acc_vec = *xacc.add(idx);
                let shifted = _mm256_srli_epi64(acc_vec, 47);
                let data_vec = _mm256_xor_si256(acc_vec, shifted);

                let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
                let data_key = _mm256_xor_si256(data_vec, key_vec);

                let data_key_hi = _mm256_srli_epi64(data_key, 32);
                let prod_lo = _mm256_mul_epu32(data_key, prime32);
                let prod_hi = _mm256_mul_epu32(data_key_hi, prime32);
                xacc.add(idx)
                    .write(_mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)));
            }
        }
    }

    #[cfg(not(any(
        target_feature = "avx2",
        target_feature = "sse2",
        target_feature = "neon",
        all(target_family = "wasm", target_feature = "simd128")
    )))]
    fn scramble_acc_scalar(acc: &mut Acc, secret: &[[u8; 8]; ACC_NB]) {
        for idx in 0..secret.len() {
            let key = u64::from_ne_bytes(secret[idx]).to_le();
            let mut acc_val = xorshift64(acc.0[idx], 47);
            acc_val ^= key;
            acc.0[idx] = acc_val.wrapping_mul(xxh32::PRIME_1 as u64);
        }
    }

    #[cfg(all(target_family = "wasm", target_feature = "simd128"))]
    use scramble_acc_wasm as scramble_acc;

    #[cfg(target_feature = "neon")]
    use scramble_acc_neon as scramble_acc;

    #[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
    use scramble_acc_sse2 as scramble_acc;

    #[cfg(target_feature = "avx2")]
    use scramble_acc_avx2 as scramble_acc;

    #[cfg(not(any(
        target_feature = "avx2",
        target_feature = "sse2",
        target_feature = "neon",
        all(target_family = "wasm", target_feature = "simd128")
    )))]
    use scramble_acc_scalar as scramble_acc;

    #[inline(always)]
    fn accumulate_loop(acc: &mut Acc, input: *const u8, secret: *const u8, nb_stripes: usize) {
        for idx in 0..nb_stripes {
            unsafe {
                let input = input.add(idx * STRIPE_LEN);
                //Miri complains about it for dumb reason so for not turn off prefetch
                //_mm_prefetch(input as _, 320);

                accumulate_512(
                    acc,
                    &*(input as *const _),
                    &*(secret.add(idx * SECRET_CONSUME_RATE) as *const _),
                );
            }
        }
    }

    #[inline]
    fn hash_long_internal_loop(acc: &mut Acc, input: &[u8], secret: &[u8]) {
        let nb_stripes = (secret.len() - STRIPE_LEN) / SECRET_CONSUME_RATE;
        let block_len = STRIPE_LEN * nb_stripes;
        let nb_blocks = (input.len() - 1) / block_len;

        for idx in 0..nb_blocks {
            accumulate_loop(
                acc,
                slice_offset_ptr!(input, idx * block_len),
                secret.as_ptr(),
                nb_stripes,
            );
            scramble_acc(
                acc,
                get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN),
            );
        }

        //last partial block
        debug_assert!(input.len() > STRIPE_LEN);

        let nb_stripes = ((input.len() - 1) - (block_len * nb_blocks)) / STRIPE_LEN;
        debug_assert!(nb_stripes <= (secret.len() / SECRET_CONSUME_RATE));
        accumulate_loop(
            acc,
            slice_offset_ptr!(input, nb_blocks * block_len),
            secret.as_ptr(),
            nb_stripes,
        );

        //last stripe
        accumulate_512(
            acc,
            get_aligned_chunk_ref(input, input.len() - STRIPE_LEN),
            get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN - SECRET_LASTACC_START),
        );
    }

    #[inline(always)]
    fn xxh3_64_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
        let c1;
        let c2;
        let c3;
        unsafe {
            c1 = *input.get_unchecked(0);
            c2 = *input.get_unchecked(input.len() >> 1);
            c3 = *input.get_unchecked(input.len() - 1);
        };

        let combo =
            (c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
        let flip = ((read_32le_unaligned(secret, 0) ^ read_32le_unaligned(secret, 4)) as u64)
            .wrapping_add(seed);
        xxh64::avalanche((combo as u64) ^ flip)
    }

    #[inline(always)]
    fn xxh3_64_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u64 {
        debug_assert!(input.len() >= 4 && input.len() <= 8);

        seed ^= ((seed as u32).swap_bytes() as u64) << 32;

        let input1 = read_32le_unaligned(input, 0);
        let input2 = read_32le_unaligned(input, input.len() - 4);

        let flip =
            (read_64le_unaligned(secret, 8) ^ read_64le_unaligned(secret, 16)).wrapping_sub(seed);
        let input64 = (input2 as u64).wrapping_add((input1 as u64) << 32);
        let keyed = input64 ^ flip;

        strong_avalanche(keyed, input.len() as u64)
    }

    #[inline(always)]
    fn xxh3_64_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
        debug_assert!(input.len() >= 9 && input.len() <= 16);

        let flip1 =
            (read_64le_unaligned(secret, 24) ^ read_64le_unaligned(secret, 32)).wrapping_add(seed);
        let flip2 =
            (read_64le_unaligned(secret, 40) ^ read_64le_unaligned(secret, 48)).wrapping_sub(seed);

        let input_lo = read_64le_unaligned(input, 0) ^ flip1;
        let input_hi = read_64le_unaligned(input, input.len() - 8) ^ flip2;

        let acc = (input.len() as u64)
            .wrapping_add(input_lo.swap_bytes())
            .wrapping_add(input_hi)
            .wrapping_add(mul128_fold64(input_lo, input_hi));

        avalanche(acc)
    }

    #[inline(always)]
    fn xxh3_64_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
        if input.len() > 8 {
            xxh3_64_9to16(input, seed, secret)
        } else if input.len() >= 4 {
            xxh3_64_4to8(input, seed, secret)
        } else if input.len() > 0 {
            xxh3_64_1to3(input, seed, secret)
        } else {
            xxh64::avalanche(
                seed ^ (read_64le_unaligned(secret, 56) ^ read_64le_unaligned(secret, 64)),
            )
        }
    }

    #[inline(always)]
    fn xxh3_64_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
        let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);

        if input.len() > 32 {
            if input.len() > 64 {
                if input.len() > 96 {
                    acc = acc.wrapping_add(mix16_b(
                        get_aligned_chunk_ref(input, 48),
                        get_aligned_chunk_ref(secret, 96),
                        seed,
                    ));
                    acc = acc.wrapping_add(mix16_b(
                        get_aligned_chunk_ref(input, input.len() - 64),
                        get_aligned_chunk_ref(secret, 112),
                        seed,
                    ));
                }

                acc = acc.wrapping_add(mix16_b(
                    get_aligned_chunk_ref(input, 32),
                    get_aligned_chunk_ref(secret, 64),
                    seed,
                ));
                acc = acc.wrapping_add(mix16_b(
                    get_aligned_chunk_ref(input, input.len() - 48),
                    get_aligned_chunk_ref(secret, 80),
                    seed,
                ));
            }

            acc = acc.wrapping_add(mix16_b(
                get_aligned_chunk_ref(input, 16),
                get_aligned_chunk_ref(secret, 32),
                seed,
            ));
            acc = acc.wrapping_add(mix16_b(
                get_aligned_chunk_ref(input, input.len() - 32),
                get_aligned_chunk_ref(secret, 48),
                seed,
            ));
        }

        acc = acc.wrapping_add(mix16_b(
            get_aligned_chunk_ref(input, 0),
            get_aligned_chunk_ref(secret, 0),
            seed,
        ));
        acc = acc.wrapping_add(mix16_b(
            get_aligned_chunk_ref(input, input.len() - 16),
            get_aligned_chunk_ref(secret, 16),
            seed,
        ));

        avalanche(acc)
    }

    #[inline(never)]
    fn xxh3_64_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
        const START_OFFSET: usize = 3;
        const LAST_OFFSET: usize = 17;

        let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
        let nb_rounds = input.len() / 16;
        debug_assert!(nb_rounds >= 8);

        let mut idx = 0;
        while idx < 8 {
            acc = acc.wrapping_add(mix16_b(
                get_aligned_chunk_ref(input, 16 * idx),
                get_aligned_chunk_ref(secret, 16 * idx),
                seed,
            ));
            idx = idx.wrapping_add(1);
        }
        acc = avalanche(acc);

        while idx < nb_rounds {
            acc = acc.wrapping_add(mix16_b(
                get_aligned_chunk_ref(input, 16 * idx),
                get_aligned_chunk_ref(secret, 16 * (idx - 8) + START_OFFSET),
                seed,
            ));
            idx = idx.wrapping_add(1);
        }

        acc = acc.wrapping_add(mix16_b(
            get_aligned_chunk_ref(input, input.len() - 16),
            get_aligned_chunk_ref(secret, SECRET_SIZE_MIN - LAST_OFFSET),
            seed,
        ));

        avalanche(acc)
    }

    #[inline(always)]
    fn xxh3_64_internal(input: &[u8], seed: u64, secret: &[u8], long_hash_fn: LongHashFn) -> u64 {
        debug_assert!(secret.len() >= SECRET_SIZE_MIN);

        if input.len() <= 16 {
            xxh3_64_0to16(input, seed, secret)
        } else if input.len() <= 128 {
            xxh3_64_7to128(input, seed, secret)
        } else if input.len() <= MID_SIZE_MAX {
            xxh3_64_129to240(input, seed, secret)
        } else {
            long_hash_fn(input, seed, secret)
        }
    }

    #[inline(always)]
    fn xxh3_64_long_impl(input: &[u8], secret: &[u8]) -> u64 {
        let mut acc = INITIAL_ACC;

        hash_long_internal_loop(&mut acc, input, secret);

        merge_accs(
            &mut acc,
            get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START),
            (input.len() as u64).wrapping_mul(xxh64::PRIME_1),
        )
    }

    #[inline(never)]
    fn xxh3_64_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u64 {
        match seed {
            0 => xxh3_64_long_impl(input, &DEFAULT_SECRET),
            seed => xxh3_64_long_impl(input, &custom_default_secret(seed)),
        }
    }

    #[inline(never)]
    fn xxh3_64_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u64 {
        xxh3_64_long_impl(input, &DEFAULT_SECRET)
    }

    #[inline(never)]
    fn xxh3_64_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u64 {
        xxh3_64_long_impl(input, secret)
    }

    #[inline]
    ///Returns 64bit hash for provided input.
    pub fn xxh3_64(input: &[u8]) -> u64 {
        xxh3_64_internal(input, 0, &DEFAULT_SECRET, xxh3_64_long_default)
    }

    #[inline]
    ///Returns 64bit hash for provided input using seed.
    ///
    ///Note: While overhead of deriving new secret from provided seed is low,
    ///it would more efficient to generate secret at compile time using special function
    ///`const_custom_default_secret` from `const_xxh3`
    pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
        xxh3_64_internal(input, seed, &DEFAULT_SECRET, xxh3_64_long_with_seed)
    }

    #[inline]
    ///Returns 64bit hash for provided input using custom secret.
    pub fn xxh3_64_with_secret(input: &[u8], secret: &[u8]) -> u64 {
        xxh3_64_internal(input, 0, secret, xxh3_64_long_with_secret)
    }

    const INTERNAL_BUFFER_SIZE: usize = 256;
    const STRIPES_PER_BLOCK: usize = (DEFAULT_SECRET_SIZE - STRIPE_LEN) / SECRET_CONSUME_RATE;

    #[derive(Clone)]
    #[repr(align(64))]
    struct Aligned64<T>(T);

    #[inline]
    //Internal function shared between Xxh3 and Xxh3Default
    fn xxh3_stateful_consume_stripes(
        acc: &mut Acc,
        nb_stripes: usize,
        nb_stripes_acc: usize,
        input: *const u8,
        secret: &[u8; DEFAULT_SECRET_SIZE],
    ) -> usize {
        if (STRIPES_PER_BLOCK - nb_stripes_acc) <= nb_stripes {
            let stripes_to_end = STRIPES_PER_BLOCK - nb_stripes_acc;
            let stripes_after_end = nb_stripes - stripes_to_end;

            accumulate_loop(
                acc,
                input,
                slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE),
                stripes_to_end,
            );
            scramble_acc(
                acc,
                get_aligned_chunk_ref(secret, DEFAULT_SECRET_SIZE - STRIPE_LEN),
            );
            accumulate_loop(
                acc,
                unsafe { input.add(stripes_to_end * STRIPE_LEN) },
                secret.as_ptr(),
                stripes_after_end,
            );
            stripes_after_end
        } else {
            accumulate_loop(
                acc,
                input,
                slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE),
                nb_stripes,
            );
            nb_stripes_acc.wrapping_add(nb_stripes)
        }
    }

    //Internal function shared between Xxh3 and Xxh3Default
    #[allow(clippy::assign_op_pattern)]
    fn xxh3_stateful_update(
        input: &[u8],
        total_len: &mut u64,
        acc: &mut Acc,
        buffer: &mut Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
        buffered_size: &mut u16,
        nb_stripes_acc: &mut usize,
        secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
    ) {
        const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN;

        let mut input_ptr = input.as_ptr();
        let mut input_len = input.len();
        *total_len = total_len.wrapping_add(input_len as u64);

        if (input_len + *buffered_size as usize) <= INTERNAL_BUFFER_SIZE {
            unsafe {
                ptr::copy_nonoverlapping(
                    input_ptr,
                    (buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize),
                    input_len,
                )
            }
            *buffered_size += input_len as u16;
            return;
        }

        if *buffered_size > 0 {
            let fill_len = INTERNAL_BUFFER_SIZE - *buffered_size as usize;

            unsafe {
                ptr::copy_nonoverlapping(
                    input_ptr,
                    (buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize),
                    fill_len,
                );
                input_ptr = input_ptr.add(fill_len);
                input_len -= fill_len;
            }

            *nb_stripes_acc = xxh3_stateful_consume_stripes(
                acc,
                INTERNAL_BUFFER_STRIPES,
                *nb_stripes_acc,
                buffer.0.as_ptr() as *const u8,
                &secret.0,
            );

            *buffered_size = 0;
        }

        debug_assert_ne!(input_len, 0);
        if input_len > INTERNAL_BUFFER_SIZE {
            loop {
                *nb_stripes_acc = xxh3_stateful_consume_stripes(
                    acc,
                    INTERNAL_BUFFER_STRIPES,
                    *nb_stripes_acc,
                    input_ptr,
                    &secret.0,
                );
                input_ptr = unsafe { input_ptr.add(INTERNAL_BUFFER_SIZE) };
                input_len = input_len - INTERNAL_BUFFER_SIZE;

                if input_len <= INTERNAL_BUFFER_SIZE {
                    break;
                }
            }

            unsafe {
                ptr::copy_nonoverlapping(
                    input_ptr.offset(-(STRIPE_LEN as isize)),
                    (buffer.0.as_mut_ptr() as *mut u8).add(buffer.0.len() - STRIPE_LEN),
                    STRIPE_LEN,
                )
            }
        }

        debug_assert_ne!(input_len, 0);
        debug_assert_eq!(*buffered_size, 0);
        unsafe { ptr::copy_nonoverlapping(input_ptr, buffer.0.as_mut_ptr() as *mut u8, input_len) }
        *buffered_size = input_len as u16;
    }

    #[inline(always)]
    //Internal function shared between Xxh3 and Xxh3Default
    fn xxh3_stateful_digest_internal(
        acc: &mut Acc,
        nb_stripes_acc: usize,
        buffer: &[u8],
        old_buffer: &[mem::MaybeUninit<u8>],
        secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
    ) {
        if buffer.len() >= STRIPE_LEN {
            let nb_stripes = (buffer.len() - 1) / STRIPE_LEN;
            xxh3_stateful_consume_stripes(
                acc,
                nb_stripes,
                nb_stripes_acc,
                buffer.as_ptr(),
                &secret.0,
            );

            accumulate_512(
                acc,
                get_aligned_chunk_ref(buffer, buffer.len() - STRIPE_LEN),
                get_aligned_chunk_ref(
                    &secret.0,
                    DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START,
                ),
            );
        } else {
            let mut last_stripe = mem::MaybeUninit::<[u8; STRIPE_LEN]>::uninit();
            let catchup_size = STRIPE_LEN - buffer.len();
            debug_assert!(buffer.len() > 0);

            let last_stripe = unsafe {
                ptr::copy_nonoverlapping(
                    (old_buffer.as_ptr() as *const u8)
                        .add(INTERNAL_BUFFER_SIZE - buffer.len() - catchup_size),
                    last_stripe.as_mut_ptr() as _,
                    catchup_size,
                );
                ptr::copy_nonoverlapping(
                    buffer.as_ptr(),
                    (last_stripe.as_mut_ptr() as *mut u8).add(catchup_size),
                    buffer.len(),
                );
                slice::from_raw_parts(
                    last_stripe.as_ptr() as *const u8,
                    buffer.len() + catchup_size,
                )
            };

            accumulate_512(
                acc,
                get_aligned_chunk_ref(&last_stripe, 0),
                get_aligned_chunk_ref(
                    &secret.0,
                    DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START,
                ),
            );
        }
    }

    #[derive(Clone)]
    ///Default XXH3 Streaming algorithm
    ///
    ///This is optimized version of Xxh3 struct that uses default seed/secret
    ///
    ///Optimal for use in hash maps
    pub struct Xxh3Default {
        acc: Acc,
        buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
        buffered_size: u16,
        nb_stripes_acc: usize,
        total_len: u64,
    }

    impl Xxh3Default {
        const DEFAULT_SECRET: Aligned64<[u8; DEFAULT_SECRET_SIZE]> = Aligned64(DEFAULT_SECRET);

        #[inline(always)]
        ///Creates new hasher with default settings
        pub const fn new() -> Self {
            Self {
                acc: INITIAL_ACC,
                buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
                buffered_size: 0,
                nb_stripes_acc: 0,
                total_len: 0,
            }
        }

        #[inline(always)]
        ///Resets state
        pub fn reset(&mut self) {
            self.acc = INITIAL_ACC;
            self.total_len = 0;
            self.buffered_size = 0;
            self.nb_stripes_acc = 0;
        }

        #[inline(always)]
        fn buffered_input(&self) -> &[u8] {
            let ptr = self.buffer.0.as_ptr();
            unsafe { slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize) }
        }

        #[inline(always)]
        fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
            let ptr = self.buffer.0.as_ptr();
            unsafe {
                slice::from_raw_parts(
                    ptr.add(self.buffered_size as usize),
                    self.buffer.0.len() - self.buffered_size as usize,
                )
            }
        }

        #[inline(always)]
        ///Hashes provided chunk
        pub fn update(&mut self, input: &[u8]) {
            xxh3_stateful_update(
                input,
                &mut self.total_len,
                &mut self.acc,
                &mut self.buffer,
                &mut self.buffered_size,
                &mut self.nb_stripes_acc,
                &Self::DEFAULT_SECRET,
            );
        }

        #[inline(never)]
        fn digest_mid_sized(&self) -> u64 {
            let mut acc = self.acc.clone();
            xxh3_stateful_digest_internal(
                &mut acc,
                self.nb_stripes_acc,
                self.buffered_input(),
                self.processed_buffer(),
                &Self::DEFAULT_SECRET,
            );

            merge_accs(
                &mut acc,
                get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
                self.total_len.wrapping_mul(xxh64::PRIME_1),
            )
        }

        #[inline(never)]
        fn digest_mid_sized_128(&self) -> u128 {
            let mut acc = self.acc.clone();
            xxh3_stateful_digest_internal(
                &mut acc,
                self.nb_stripes_acc,
                self.buffered_input(),
                self.processed_buffer(),
                &Self::DEFAULT_SECRET,
            );

            let low = merge_accs(
                &mut acc,
                get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
                self.total_len.wrapping_mul(xxh64::PRIME_1),
            );
            let high = merge_accs(
                &mut acc,
                get_aligned_chunk_ref(
                    &Self::DEFAULT_SECRET.0,
                    DEFAULT_SECRET_SIZE - mem::size_of_val(&self.acc) - SECRET_MERGEACCS_START,
                ),
                !self.total_len.wrapping_mul(xxh64::PRIME_2),
            );
            ((high as u128) << 64) | (low as u128)
        }

        ///Computes hash.
        pub fn digest(&self) -> u64 {
            //Separating digest mid sized allows us to inline this function, which benefits
            //code generation when hashing fixed size types and/or if the seed is known.
            if self.total_len > MID_SIZE_MAX as u64 {
                self.digest_mid_sized()
            } else {
                xxh3_64_internal(
                    self.buffered_input(),
                    0,
                    &Self::DEFAULT_SECRET.0,
                    xxh3_64_long_default,
                )
            }
        }

        ///Computes hash as 128bit integer.
        pub fn digest128(&self) -> u128 {
            //Separating digest mid sized allows us to inline this function, which benefits
            //code generation when hashing fixed size types and/or if the seed is known.
            if self.total_len > MID_SIZE_MAX as u64 {
                self.digest_mid_sized_128()
            } else {
                xxh3_128_internal(
                    self.buffered_input(),
                    0,
                    &Self::DEFAULT_SECRET.0,
                    xxh3_128_long_default,
                )
            }
        }
    }

    impl Default for Xxh3Default {
        #[inline(always)]
        fn default() -> Self {
            Self::new()
        }
    }

    impl hash::Hasher for Xxh3Default {
        #[inline(always)]
        fn finish(&self) -> u64 {
            self.digest()
        }

        #[inline(always)]
        fn write(&mut self, input: &[u8]) {
            self.update(input)
        }
    }

    impl std::io::Write for Xxh3Default {
        #[inline]
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            self.update(buf);
            Ok(buf.len())
        }

        #[inline]
        fn flush(&mut self) -> std::io::Result<()> {
            Ok(())
        }
    }

    #[derive(Clone)]
    ///XXH3 Streaming algorithm
    ///
    ///Internal state uses rather large buffers, therefore it might be beneficial
    ///to store hasher on heap rather than stack.
    ///Implementation makes no attempts at that, leaving choice entirely to user.
    ///
    ///Note that it is better to use [Xxh3Default](struct.Xxh3Default.html) in hash maps
    ///due to Rust hash interface which requires to create new instance of hasher every time.
    pub struct Xxh3 {
        acc: Acc,
        custom_secret: Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
        buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
        buffered_size: u16,
        nb_stripes_acc: usize,
        total_len: u64,
        seed: u64,
    }

    impl Xxh3 {
        #[inline(always)]
        ///Creates new hasher with default settings
        pub const fn new() -> Self {
            Self::with_custom_ops(0, DEFAULT_SECRET)
        }

        #[inline]
        ///Creates new hasher with all options.
        const fn with_custom_ops(seed: u64, secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
            Self {
                acc: INITIAL_ACC,
                custom_secret: Aligned64(secret),
                buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
                buffered_size: 0,
                nb_stripes_acc: 0,
                total_len: 0,
                seed,
            }
        }

        #[inline(always)]
        ///Creates new hasher with custom seed.
        pub const fn with_secret(secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
            Self::with_custom_ops(0, secret)
        }

        #[inline(always)]
        ///Creates new hasher with custom seed.
        pub fn with_seed(seed: u64) -> Self {
            Self::with_custom_ops(seed, custom_default_secret(seed))
        }

        #[inline(always)]
        ///Resets state
        pub fn reset(&mut self) {
            self.acc = INITIAL_ACC;
            self.total_len = 0;
            self.buffered_size = 0;
            self.nb_stripes_acc = 0;
        }

        #[inline(always)]
        fn buffered_input(&self) -> &[u8] {
            let ptr = self.buffer.0.as_ptr();
            unsafe { slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize) }
        }

        #[inline(always)]
        fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
            let ptr = self.buffer.0.as_ptr();
            unsafe {
                slice::from_raw_parts(
                    ptr.add(self.buffered_size as usize),
                    self.buffer.0.len() - self.buffered_size as usize,
                )
            }
        }

        #[inline]
        ///Hashes provided chunk
        pub fn update(&mut self, input: &[u8]) {
            xxh3_stateful_update(
                input,
                &mut self.total_len,
                &mut self.acc,
                &mut self.buffer,
                &mut self.buffered_size,
                &mut self.nb_stripes_acc,
                &self.custom_secret,
            );
        }

        #[inline(never)]
        fn digest_mid_sized(&self) -> u64 {
            let mut acc = self.acc.clone();
            xxh3_stateful_digest_internal(
                &mut acc,
                self.nb_stripes_acc,
                self.buffered_input(),
                self.processed_buffer(),
                &self.custom_secret,
            );

            merge_accs(
                &mut acc,
                get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START),
                self.total_len.wrapping_mul(xxh64::PRIME_1),
            )
        }

        #[inline(never)]
        fn digest_mid_sized_128(&self) -> u128 {
            let mut acc = self.acc.clone();
            xxh3_stateful_digest_internal(
                &mut acc,
                self.nb_stripes_acc,
                self.buffered_input(),
                self.processed_buffer(),
                &self.custom_secret,
            );

            let low = merge_accs(
                &mut acc,
                get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START),
                self.total_len.wrapping_mul(xxh64::PRIME_1),
            );
            let high = merge_accs(
                &mut acc,
                get_aligned_chunk_ref(
                    &self.custom_secret.0,
                    self.custom_secret.0.len()
                        - mem::size_of_val(&self.acc)
                        - SECRET_MERGEACCS_START,
                ),
                !self.total_len.wrapping_mul(xxh64::PRIME_2),
            );
            ((high as u128) << 64) | (low as u128)
        }

        ///Computes hash.
        pub fn digest(&self) -> u64 {
            //Separating digest mid sized allows us to inline this function, which benefits
            //code generation when hashing fixed size types and/or if the seed is known.
            if self.total_len > MID_SIZE_MAX as u64 {
                self.digest_mid_sized()
            } else if self.seed > 0 {
                //Technically we should not need to use it.
                //But in all actuality original xxh3 implementation uses default secret for input with size less or equal to MID_SIZE_MAX
                xxh3_64_internal(
                    self.buffered_input(),
                    self.seed,
                    &DEFAULT_SECRET,
                    xxh3_64_long_with_seed,
                )
            } else {
                xxh3_64_internal(
                    self.buffered_input(),
                    self.seed,
                    &self.custom_secret.0,
                    xxh3_64_long_with_secret,
                )
            }
        }

        ///Computes hash as 128bit integer.
        pub fn digest128(&self) -> u128 {
            //Separating digest mid sized allows us to inline this function, which benefits
            //code generation when hashing fixed size types and/or if the seed is known.
            if self.total_len > MID_SIZE_MAX as u64 {
                self.digest_mid_sized_128()
            } else if self.seed > 0 {
                //Technically we should not need to use it.
                //But in all actuality original xxh3 implementation uses default secret for input with size less or equal to MID_SIZE_MAX
                xxh3_128_internal(
                    self.buffered_input(),
                    self.seed,
                    &DEFAULT_SECRET,
                    xxh3_128_long_with_seed,
                )
            } else {
                xxh3_128_internal(
                    self.buffered_input(),
                    self.seed,
                    &self.custom_secret.0,
                    xxh3_128_long_with_secret,
                )
            }
        }
    }

    impl Default for Xxh3 {
        #[inline(always)]
        fn default() -> Self {
            Self::new()
        }
    }

    impl core::hash::Hasher for Xxh3 {
        #[inline(always)]
        fn finish(&self) -> u64 {
            self.digest()
        }

        #[inline(always)]
        fn write(&mut self, input: &[u8]) {
            self.update(input)
        }
    }

    impl std::io::Write for Xxh3 {
        #[inline]
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            self.update(buf);
            Ok(buf.len())
        }

        #[inline]
        fn flush(&mut self) -> std::io::Result<()> {
            Ok(())
        }
    }

    #[inline]
    fn xxh3_128_long_impl(input: &[u8], secret: &[u8]) -> u128 {
        let mut acc = INITIAL_ACC;

        hash_long_internal_loop(&mut acc, input, secret);

        debug_assert!(secret.len() >= mem::size_of::<Acc>() + SECRET_MERGEACCS_START);
        let lo = merge_accs(
            &mut acc,
            get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START),
            (input.len() as u64).wrapping_mul(xxh64::PRIME_1),
        );
        let hi = merge_accs(
            &mut acc,
            get_aligned_chunk_ref(
                secret,
                secret.len() - mem::size_of::<Acc>() - SECRET_MERGEACCS_START,
            ),
            !(input.len() as u64).wrapping_mul(xxh64::PRIME_2),
        );

        lo as u128 | (hi as u128) << 64
    }

    #[inline(always)]
    fn xxh3_128_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
        let flip_lo =
            (read_64le_unaligned(secret, 32) ^ read_64le_unaligned(secret, 40)).wrapping_sub(seed);
        let flip_hi =
            (read_64le_unaligned(secret, 48) ^ read_64le_unaligned(secret, 56)).wrapping_add(seed);
        let input_lo = read_64le_unaligned(input, 0);
        let mut input_hi = read_64le_unaligned(input, input.len() - 8);

        let (mut mul_low, mut mul_high) =
            mul64_to128(input_lo ^ input_hi ^ flip_lo, xxh64::PRIME_1);

        mul_low = mul_low.wrapping_add((input.len() as u64 - 1) << 54);
        input_hi ^= flip_hi;
        mul_high = mul_high
            .wrapping_add(input_hi.wrapping_add(mult32_to64(input_hi as u32, xxh32::PRIME_2 - 1)));

        mul_low ^= mul_high.swap_bytes();

        let (result_low, mut result_hi) = mul64_to128(mul_low, xxh64::PRIME_2);
        result_hi = result_hi.wrapping_add(mul_high.wrapping_mul(xxh64::PRIME_2));

        to_u128!(avalanche(result_low), avalanche(result_hi))
    }

    #[inline(always)]
    fn xxh3_128_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u128 {
        seed ^= ((seed as u32).swap_bytes() as u64) << 32;

        let lo = read_32le_unaligned(input, 0);
        let hi = read_32le_unaligned(input, input.len() - 4);
        let input_64 = (lo as u64).wrapping_add((hi as u64) << 32);

        let flip =
            (read_64le_unaligned(secret, 16) ^ read_64le_unaligned(secret, 24)).wrapping_add(seed);
        let keyed = input_64 ^ flip;

        let (mut lo, mut hi) = mul64_to128(
            keyed,
            xxh64::PRIME_1.wrapping_add((input.len() as u64) << 2),
        );

        hi = hi.wrapping_add(lo << 1);
        lo ^= hi >> 3;

        lo = xorshift64(lo, 35).wrapping_mul(0x9FB21C651E98DF25);
        lo = xorshift64(lo, 28);
        hi = avalanche(hi);

        lo as u128 | (hi as u128) << 64
    }

    #[inline(always)]
    fn xxh3_128_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
        let c1;
        let c2;
        let c3;
        unsafe {
            c1 = *input.get_unchecked(0);
            c2 = *input.get_unchecked(input.len() >> 1);
            c3 = *input.get_unchecked(input.len() - 1);
        };
        let input_lo =
            (c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
        let input_hi = input_lo.swap_bytes().rotate_left(13);

        let flip_lo = (read_32le_unaligned(secret, 0) as u64
            ^ read_32le_unaligned(secret, 4) as u64)
            .wrapping_add(seed);
        let flip_hi = (read_32le_unaligned(secret, 8) as u64
            ^ read_32le_unaligned(secret, 12) as u64)
            .wrapping_sub(seed);
        let keyed_lo = input_lo as u64 ^ flip_lo;
        let keyed_hi = input_hi as u64 ^ flip_hi;

        xxh64::avalanche(keyed_lo) as u128 | (xxh64::avalanche(keyed_hi) as u128) << 64
    }

    #[inline(always)]
    fn xxh3_128_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
        if input.len() > 8 {
            xxh3_128_9to16(input, seed, secret)
        } else if input.len() >= 4 {
            xxh3_128_4to8(input, seed, secret)
        } else if input.len() > 0 {
            xxh3_128_1to3(input, seed, secret)
        } else {
            let flip_lo = read_64le_unaligned(secret, 64) ^ read_64le_unaligned(secret, 72);
            let flip_hi = read_64le_unaligned(secret, 80) ^ read_64le_unaligned(secret, 88);
            xxh64::avalanche(seed ^ flip_lo) as u128
                | (xxh64::avalanche(seed ^ flip_hi) as u128) << 64
        }
    }

    #[inline(always)]
    fn xxh3_128_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
        let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
        let mut hi = 0;

        if input.len() > 32 {
            if input.len() > 64 {
                if input.len() > 96 {
                    mix32_b(
                        &mut lo,
                        &mut hi,
                        get_aligned_chunk_ref(input, 48),
                        get_aligned_chunk_ref(input, input.len() - 64),
                        get_aligned_chunk_ref(secret, 96),
                        seed,
                    );
                }

                mix32_b(
                    &mut lo,
                    &mut hi,
                    get_aligned_chunk_ref(input, 32),
                    get_aligned_chunk_ref(input, input.len() - 48),
                    get_aligned_chunk_ref(secret, 64),
                    seed,
                );
            }

            mix32_b(
                &mut lo,
                &mut hi,
                get_aligned_chunk_ref(input, 16),
                get_aligned_chunk_ref(input, input.len() - 32),
                get_aligned_chunk_ref(secret, 32),
                seed,
            );
        }

        mix32_b(
            &mut lo,
            &mut hi,
            get_aligned_chunk_ref(input, 0),
            get_aligned_chunk_ref(input, input.len() - 16),
            get_aligned_chunk_ref(secret, 0),
            seed,
        );

        to_u128!(
            avalanche(lo.wrapping_add(hi)),
            0u64.wrapping_sub(avalanche(
                lo.wrapping_mul(xxh64::PRIME_1)
                    .wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
                    .wrapping_add(
                        (input.len() as u64)
                            .wrapping_sub(seed)
                            .wrapping_mul(xxh64::PRIME_2)
                    )
            ))
        )
    }

    #[inline(never)]
    fn xxh3_128_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
        const START_OFFSET: usize = 3;
        const LAST_OFFSET: usize = 17;
        let nb_rounds = input.len() / 32;
        debug_assert!(nb_rounds >= 4);

        let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
        let mut hi = 0;

        let mut idx = 0;
        while idx < 4 {
            let offset_idx = 32 * idx;
            mix32_b(
                &mut lo,
                &mut hi,
                get_aligned_chunk_ref(input, offset_idx),
                get_aligned_chunk_ref(input, offset_idx + 16),
                get_aligned_chunk_ref(secret, offset_idx),
                seed,
            );
            idx = idx.wrapping_add(1);
        }

        lo = avalanche(lo);
        hi = avalanche(hi);

        while idx < nb_rounds {
            mix32_b(
                &mut lo,
                &mut hi,
                get_aligned_chunk_ref(input, 32 * idx),
                get_aligned_chunk_ref(input, (32 * idx) + 16),
                get_aligned_chunk_ref(secret, START_OFFSET.wrapping_add(32 * (idx - 4))),
                seed,
            );
            idx = idx.wrapping_add(1);
        }

        mix32_b(
            &mut lo,
            &mut hi,
            get_aligned_chunk_ref(input, input.len() - 16),
            get_aligned_chunk_ref(input, input.len() - 32),
            get_aligned_chunk_ref(secret, SECRET_SIZE_MIN - LAST_OFFSET - 16),
            0u64.wrapping_sub(seed),
        );

        to_u128!(
            avalanche(lo.wrapping_add(hi)),
            0u64.wrapping_sub(avalanche(
                lo.wrapping_mul(xxh64::PRIME_1)
                    .wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
                    .wrapping_add(
                        (input.len() as u64)
                            .wrapping_sub(seed)
                            .wrapping_mul(xxh64::PRIME_2)
                    )
            ))
        )
    }

    #[inline(always)]
    fn xxh3_128_internal(
        input: &[u8],
        seed: u64,
        secret: &[u8],
        long_hash_fn: LongHashFn128,
    ) -> u128 {
        debug_assert!(secret.len() >= SECRET_SIZE_MIN);

        if input.len() <= 16 {
            xxh3_128_0to16(input, seed, secret)
        } else if input.len() <= 128 {
            xxh3_128_7to128(input, seed, secret)
        } else if input.len() <= MID_SIZE_MAX {
            xxh3_128_129to240(input, seed, secret)
        } else {
            long_hash_fn(input, seed, secret)
        }
    }

    #[inline(never)]
    fn xxh3_128_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u128 {
        xxh3_128_long_impl(input, &DEFAULT_SECRET)
    }

    #[inline(never)]
    fn xxh3_128_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u128 {
        match seed {
            0 => xxh3_128_long_impl(input, &DEFAULT_SECRET),
            seed => xxh3_128_long_impl(input, &custom_default_secret(seed)),
        }
    }

    #[inline(never)]
    fn xxh3_128_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u128 {
        xxh3_128_long_impl(input, secret)
    }

    #[inline]
    ///Returns 128bit hash for provided input.
    pub fn xxh3_128(input: &[u8]) -> u128 {
        xxh3_128_internal(input, 0, &DEFAULT_SECRET, xxh3_128_long_default)
    }

    #[inline]
    ///Returns 128 hash for provided input using seed.
    ///
    ///Note: While overhead of deriving new secret from provided seed is low,
    ///it would more efficient to generate secret at compile time using special function
    ///`const_custom_default_secret` from `const_xxh3`
    pub fn xxh3_128_with_seed(input: &[u8], seed: u64) -> u128 {
        xxh3_128_internal(input, seed, &DEFAULT_SECRET, xxh3_128_long_with_seed)
    }

    #[inline]
    ///Returns 128 hash for provided input using custom secret.
    pub fn xxh3_128_with_secret(input: &[u8], secret: &[u8]) -> u128 {
        xxh3_128_internal(input, 0, secret, xxh3_128_long_with_secret)
    }
}

/// Stable hasher made to be very fast, with predictable output.
#[allow(clippy::large_enum_variant)]
pub enum NoatunHasher {
    Small([u8; 64], usize),
    Large(Xxh3Default),
}

impl Default for NoatunHasher {
    #[inline]
    fn default() -> Self {
        NoatunHasher::Small([0; 64], 0)
    }
}

impl NoatunHasher {
    #[inline]
    pub fn new() -> Self {
        Self::default()
    }
    #[cold]
    pub fn transform(&mut self, more_bytes: &[u8]) {
        match self {
            NoatunHasher::Small(data, ptr) => {
                let mut d = Xxh3Default::default();
                if *ptr > 0 {
                    d.update(&data[0..*ptr]);
                }
                d.update(more_bytes);
                *self = NoatunHasher::Large(d);
            }
            NoatunHasher::Large(_) => {
                unreachable!()
            }
        }
    }
}

impl Hasher for NoatunHasher {
    #[inline]
    fn finish(&self) -> u64 {
        match self {
            NoatunHasher::Small(data, ptr) => xxh3::xxh3_64(&data[0..*ptr]),
            NoatunHasher::Large(d) => d.digest(),
        }
    }
    #[inline]
    fn write(&mut self, a_bytes: &[u8]) {
        match self {
            NoatunHasher::Small(data, ptr) => {
                if *ptr + a_bytes.len() <= data.len() {
                    data[*ptr..*ptr + a_bytes.len()].copy_from_slice(a_bytes);
                    *ptr += a_bytes.len();
                } else {
                    self.transform(a_bytes);
                }
            }
            NoatunHasher::Large(d) => {
                d.update(a_bytes);
            }
        }
    }
}

#[cfg(test)]
mod xxh3_tests {
    use crate::xxh3_vendored::NoatunHasher;
    use std::hash::Hasher;

    #[test]
    fn test_hasher() {
        let mut hasher = NoatunHasher::new();
        hasher.write(b"1234");
        assert_eq!(hasher.finish(), 9777848219803310049);
        let mut hasher = NoatunHasher::new();
        hasher.write(b"5678");
        hasher.write(b" (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade");
        assert_eq!(hasher.finish(), 2773116291557897730);
    }
    #[test]
    fn test_xxh3() {
        let result = crate::xxh3_vendored::xxh3::xxh3_64(b"1234");
        println!("{result:?}");
        assert_eq!(result, 9777848219803310049);
        let result = crate::xxh3_vendored::xxh3::xxh3_64(b"5678");
        println!("{result:?}");
        assert_eq!(result, 14901654952795293208);
        let result = crate::xxh3_vendored::xxh3::xxh3_64(b"5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade. 5678 (Nyctereutes procyonoides) ar ett hunddjur som placeras som ensam art i slaktet Nyctereutes. Den har sitt ursprungliga utbredningsomrade");
        println!("{result:?}");
        assert_eq!(result, 2773116291557897730);
    }
}