twox_hash/
xxhash3_128.rs

1//! The implementation of XXH3_128.
2
3#![deny(
4    clippy::missing_safety_doc,
5    clippy::undocumented_unsafe_blocks,
6    unsafe_op_in_unsafe_fn
7)]
8
9use crate::{
10    xxhash3::{primes::*, *},
11    IntoU128 as _, IntoU64 as _,
12};
13
14pub use crate::xxhash3::{
15    FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
16    SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
17};
18
19/// Calculates the 128-bit hash.
20#[derive(Clone)]
21/// TODO: does not implement hash.
22pub struct Hasher {
23    #[cfg(feature = "alloc")]
24    inner: AllocRawHasher,
25    _private: (),
26}
27
28impl Hasher {
29    /// Hash all data at once. If you can use this function, you may
30    /// see noticable speed gains for certain types of input.
31    #[must_use]
32    #[inline]
33    pub fn oneshot(input: &[u8]) -> u128 {
34        impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
35    }
36
37    /// Hash all data at once using the provided seed and a secret
38    /// derived from the seed. If you can use this function, you may
39    /// see noticable speed gains for certain types of input.
40    #[must_use]
41    #[inline]
42    pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u128 {
43        let mut secret = DEFAULT_SECRET_RAW;
44
45        // We know that the secret will only be used if we have more
46        // than 240 bytes, so don't waste time computing it otherwise.
47        if input.len() > CUTOFF {
48            derive_secret(seed, &mut secret);
49        }
50
51        let secret = Secret::new(&secret).expect("The default secret length is invalid");
52
53        impl_oneshot(secret, seed, input)
54    }
55
56    /// Hash all data at once using the provided secret and the
57    /// default seed. If you can use this function, you may see
58    /// noticable speed gains for certain types of input.
59    #[inline]
60    pub fn oneshot_with_secret(
61        secret: &[u8],
62        input: &[u8],
63    ) -> Result<u128, OneshotWithSecretError> {
64        let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
65        Ok(impl_oneshot(secret, DEFAULT_SEED, input))
66    }
67
68    /// Hash all data at once using the provided seed and secret. If
69    /// you can use this function, you may see noticable speed gains
70    /// for certain types of input.
71    #[inline]
72    pub fn oneshot_with_seed_and_secret(
73        seed: u64,
74        secret: &[u8],
75        input: &[u8],
76    ) -> Result<u128, OneshotWithSecretError> {
77        let secret = if input.len() > CUTOFF {
78            Secret::new(secret).map_err(OneshotWithSecretError)?
79        } else {
80            DEFAULT_SECRET
81        };
82
83        Ok(impl_oneshot(secret, seed, input))
84    }
85}
86#[cfg(feature = "alloc")]
87#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
88mod with_alloc {
89    use ::alloc::boxed::Box;
90
91    use super::*;
92
93    impl Hasher {
94        /// Constructs the hasher using the default seed and secret values.
95        pub fn new() -> Self {
96            Self {
97                inner: RawHasherCore::allocate_default(),
98                _private: (),
99            }
100        }
101
102        /// Constructs the hasher using the provided seed and a secret
103        /// derived from the seed.
104        pub fn with_seed(seed: u64) -> Self {
105            Self {
106                inner: RawHasherCore::allocate_with_seed(seed),
107                _private: (),
108            }
109        }
110
111        /// Constructs the hasher using the provided seed and secret.
112        pub fn with_seed_and_secret(
113            seed: u64,
114            secret: impl Into<Box<[u8]>>,
115        ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
116            Ok(Self {
117                inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
118                _private: (),
119            })
120        }
121
122        /// Returns the secret.
123        pub fn into_secret(self) -> Box<[u8]> {
124            self.inner.into_secret()
125        }
126
127        /// Writes some data into this `Hasher`.
128        #[inline]
129        pub fn write(&mut self, input: &[u8]) {
130            self.inner.write(input);
131        }
132
133        /// Returns the hash value for the values written so
134        /// far. Unlike [`std::hash::Hasher::finish`][], this method
135        /// returns the complete 128-bit value calculated, not a
136        /// 64-bit value.
137        #[inline]
138        pub fn finish_128(&self) -> u128 {
139            self.inner.finish(Finalize128)
140        }
141    }
142
143    impl Default for Hasher {
144        fn default() -> Self {
145            Self::new()
146        }
147    }
148}
149
150#[derive(Clone)]
151/// A lower-level interface for computing a hash from streaming data.
152///
153/// The algorithm requires a secret which can be a reasonably large
154/// piece of data. [`Hasher`][] makes one concrete implementation
155/// decision that uses dynamic memory allocation, but specialized
156/// usages may desire more flexibility. This type, combined with
157/// [`SecretBuffer`][], offer that flexibility at the cost of a
158/// generic type.
159pub struct RawHasher<S>(RawHasherCore<S>);
160
161impl<S> RawHasher<S> {
162    /// Construct the hasher with the provided seed, secret, and
163    /// temporary buffer.
164    pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
165        Self(RawHasherCore::new(secret_buffer))
166    }
167
168    /// Returns the secret.
169    pub fn into_secret(self) -> S {
170        self.0.into_secret()
171    }
172}
173
174impl<S> RawHasher<S>
175where
176    S: FixedBuffer,
177{
178    /// Writes some data into this `Hasher`.
179    #[inline]
180    pub fn write(&mut self, input: &[u8]) {
181        self.0.write(input);
182    }
183
184    /// Returns the hash value for the values written so
185    /// far. Unlike [`std::hash::Hasher::finish`][], this method
186    /// returns the complete 128-bit value calculated, not a
187    /// 64-bit value.
188    #[inline]
189    pub fn finish_128(&self) -> u128 {
190        self.0.finish(Finalize128)
191    }
192}
193
194struct Finalize128;
195
196impl Finalize for Finalize128 {
197    type Output = u128;
198
199    #[inline]
200    fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
201        impl_oneshot(secret, seed, input)
202    }
203
204    #[inline]
205    fn large(
206        &self,
207        vector: impl Vector,
208        acc: [u64; 8],
209        last_block: &[u8],
210        last_stripe: &[u8; 64],
211        secret: &Secret,
212        len: usize,
213    ) -> Self::Output {
214        Algorithm(vector).finalize_128(acc, last_block, last_stripe, secret, len)
215    }
216}
217
218#[inline(always)]
219fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
220    match input.len() {
221        241.. => impl_241_plus_bytes(secret, input),
222
223        129..=240 => impl_129_to_240_bytes(secret, seed, input),
224
225        17..=128 => impl_17_to_128_bytes(secret, seed, input),
226
227        9..=16 => impl_9_to_16_bytes(secret, seed, input),
228
229        4..=8 => impl_4_to_8_bytes(secret, seed, input),
230
231        1..=3 => impl_1_to_3_bytes(secret, seed, input),
232
233        0 => impl_0_bytes(secret, seed),
234    }
235}
236
237#[inline(always)]
238fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 {
239    let secret_words = secret.for_128().words_for_0();
240
241    let low = avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]);
242    let high = avalanche_xxh64(seed ^ secret_words[2] ^ secret_words[3]);
243
244    X128 { low, high }.into()
245}
246
247#[inline(always)]
248fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
249    assert_input_range!(1..=3, input.len());
250
251    let combined = impl_1_to_3_bytes_combined(input);
252    let secret_words = secret.for_128().words_for_1_to_3();
253
254    let low = {
255        let secret = (secret_words[0] ^ secret_words[1]).into_u64();
256        secret.wrapping_add(seed) ^ combined.into_u64()
257    };
258    let high = {
259        let secret = (secret_words[2] ^ secret_words[3]).into_u64();
260        secret.wrapping_sub(seed) ^ combined.swap_bytes().rotate_left(13).into_u64()
261    };
262
263    let low = avalanche_xxh64(low);
264    let high = avalanche_xxh64(high);
265
266    X128 { low, high }.into()
267}
268
269#[inline(always)]
270fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
271    assert_input_range!(4..=8, input.len());
272    let input_first = input.first_u32().unwrap();
273    let input_last = input.last_u32().unwrap();
274
275    let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
276    let secret_words = secret.for_128().words_for_4_to_8();
277
278    let combined = input_first.into_u64() | (input_last.into_u64() << 32);
279    let lhs = {
280        let a = secret_words[0] ^ secret_words[1];
281        let b = a.wrapping_add(modified_seed);
282        b ^ combined
283    };
284    let rhs = PRIME64_1.wrapping_add(input.len().into_u64() << 2);
285    let mul_result = lhs.into_u128().wrapping_mul(rhs.into_u128());
286
287    let mut high = mul_result.upper_half();
288    let mut low = mul_result.lower_half();
289
290    high = high.wrapping_add(low << 1);
291
292    low ^= high >> 3;
293    low ^= low >> 35;
294    low = low.wrapping_mul(PRIME_MX2);
295    low ^= low >> 28;
296
297    high = avalanche(high);
298
299    X128 { low, high }.into()
300}
301
302#[inline(always)]
303fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
304    assert_input_range!(9..=16, input.len());
305    let input_first = input.first_u64().unwrap();
306    let input_last = input.last_u64().unwrap();
307
308    let secret_words = secret.for_128().words_for_9_to_16();
309    let val1 = ((secret_words[0] ^ secret_words[1]).wrapping_sub(seed)) ^ input_first ^ input_last;
310    let val2 = ((secret_words[2] ^ secret_words[3]).wrapping_add(seed)) ^ input_last;
311    let mul_result = val1.into_u128().wrapping_mul(PRIME64_1.into_u128());
312    let low = mul_result
313        .lower_half()
314        .wrapping_add((input.len() - 1).into_u64() << 54);
315
316    // Algorithm describes this in two ways
317    let high = mul_result
318        .upper_half()
319        .wrapping_add(val2.upper_half().into_u64() << 32)
320        .wrapping_add(val2.lower_half().into_u64().wrapping_mul(PRIME32_2));
321
322    let low = low ^ high.swap_bytes();
323
324    // Algorithm describes this multiplication in two ways.
325    let q = X128 { low, high }
326        .into_u128()
327        .wrapping_mul(PRIME64_2.into_u128());
328
329    let low = avalanche(q.lower_half());
330    let high = avalanche(q.upper_half());
331
332    X128 { low, high }.into()
333}
334
335#[inline]
336fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
337    assert_input_range!(17..=128, input.len());
338    let input_len = input.len().into_u64();
339    let mut acc = [input_len.wrapping_mul(PRIME64_1), 0];
340
341    impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
342        mix_two_chunks(&mut acc, fwd, bwd, secret, seed);
343    });
344
345    finalize_medium(acc, input_len, seed)
346}
347
348#[inline]
349fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
350    assert_input_range!(129..=240, input.len());
351    let input_len = input.len().into_u64();
352    let mut acc = [input_len.wrapping_mul(PRIME64_1), 0];
353
354    let head = pairs_of_u64_bytes(input);
355    let mut head = head.iter();
356
357    let ss = secret.for_128().words_for_127_to_240_part1();
358    for (input, secret) in head.by_ref().zip(ss).take(4) {
359        mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed);
360    }
361
362    let mut acc = acc.map(avalanche);
363
364    let ss = secret.for_128().words_for_127_to_240_part2();
365    for (input, secret) in head.zip(ss) {
366        mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed);
367    }
368
369    let (_, tail) = input.bp_as_rchunks::<16>();
370    let (_, tail) = tail.bp_as_rchunks::<2>();
371    let tail = tail.last().unwrap();
372    let ss = secret.for_128().words_for_127_to_240_part3();
373
374    // note that the half-chunk order and the seed is different here
375    mix_two_chunks(&mut acc, &tail[1], &tail[0], ss, seed.wrapping_neg());
376
377    finalize_medium(acc, input_len, seed)
378}
379
380#[inline]
381fn mix_two_chunks(
382    acc: &mut [u64; 2],
383    data1: &[u8; 16],
384    data2: &[u8; 16],
385    secret: &[[u8; 16]; 2],
386    seed: u64,
387) {
388    let data_words1 = to_u64s(data1);
389    let data_words2 = to_u64s(data2);
390
391    acc[0] = acc[0].wrapping_add(mix_step(data1, &secret[0], seed));
392    acc[1] = acc[1].wrapping_add(mix_step(data2, &secret[1], seed));
393    acc[0] ^= data_words2[0].wrapping_add(data_words2[1]);
394    acc[1] ^= data_words1[0].wrapping_add(data_words1[1]);
395}
396
397#[inline]
398fn finalize_medium(acc: [u64; 2], input_len: u64, seed: u64) -> u128 {
399    let low = acc[0].wrapping_add(acc[1]);
400    let high = acc[0]
401        .wrapping_mul(PRIME64_1)
402        .wrapping_add(acc[1].wrapping_mul(PRIME64_4))
403        .wrapping_add((input_len.wrapping_sub(seed)).wrapping_mul(PRIME64_2));
404
405    let low = avalanche(low);
406    let high = avalanche(high).wrapping_neg();
407
408    X128 { low, high }.into()
409}
410
411#[inline]
412fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u128 {
413    assert_input_range!(241.., input.len());
414    dispatch! {
415        fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u128
416        []
417    }
418}
419
420#[inline]
421fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u128 {
422    Algorithm(vector).oneshot(secret, input, Finalize128)
423}
424
425#[cfg(test)]
426mod test {
427    use crate::xxhash3::test::bytes;
428
429    use super::*;
430
431    const _: () = {
432        const fn is_clone<T: Clone>() {}
433        is_clone::<Hasher>();
434    };
435
436    const EMPTY_BYTES: [u8; 0] = [];
437
438    fn hash_byte_by_byte(input: &[u8]) -> u128 {
439        let mut hasher = Hasher::new();
440        for byte in input.chunks(1) {
441            hasher.write(byte)
442        }
443        hasher.finish_128()
444    }
445
446    #[test]
447    fn oneshot_empty() {
448        let hash = Hasher::oneshot(&EMPTY_BYTES);
449        assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f);
450    }
451
452    #[test]
453    fn streaming_empty() {
454        let hash = hash_byte_by_byte(&EMPTY_BYTES);
455        assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f);
456    }
457
458    #[test]
459    fn oneshot_1_to_3_bytes() {
460        test_1_to_3_bytes(Hasher::oneshot)
461    }
462
463    #[test]
464    fn streaming_1_to_3_bytes() {
465        test_1_to_3_bytes(hash_byte_by_byte)
466    }
467
468    #[track_caller]
469    fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u128) {
470        let inputs = bytes![1, 2, 3];
471
472        let expected = [
473            0xa6cd_5e93_9200_0f6a_c44b_dff4_074e_ecdb,
474            0x6a4a_5274_c1b0_d3ad_d664_5fc3_051a_9457,
475            0xe3b5_5f57_945a_17cf_5f42_99fc_161c_9cbb,
476        ];
477
478        for (input, expected) in inputs.iter().zip(expected) {
479            let hash = f(input);
480            assert_eq!(hash, expected, "input was {} bytes", input.len());
481        }
482    }
483
484    #[test]
485    fn oneshot_4_to_8_bytes() {
486        test_4_to_8_bytes(Hasher::oneshot)
487    }
488
489    #[test]
490    fn streaming_4_to_8_bytes() {
491        test_4_to_8_bytes(hash_byte_by_byte)
492    }
493
494    #[track_caller]
495    fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u128) {
496        let inputs = bytes![4, 5, 6, 7, 8];
497
498        let expected = [
499            0xeb70_bf5f_c779_e9e6_a611_1d53_e80a_3db5,
500            0x9434_5321_06a7_c141_c920_d234_7a85_929b,
501            0x545f_093d_32b1_68fe_a6b5_2f4d_ea38_96a3,
502            0x61ce_291b_c3a4_357d_dbb2_0782_1e6d_5efe,
503            0xe1e4_432a_6221_7fe4_cfd5_0c61_c8bb_98c1,
504        ];
505
506        for (input, expected) in inputs.iter().zip(expected) {
507            let hash = f(input);
508            assert_eq!(hash, expected, "input was {} bytes", input.len());
509        }
510    }
511
512    #[test]
513    fn oneshot_9_to_16_bytes() {
514        test_9_to_16_bytes(Hasher::oneshot)
515    }
516
517    #[test]
518    fn streaming_9_to_16_bytes() {
519        test_9_to_16_bytes(hash_byte_by_byte)
520    }
521
522    #[track_caller]
523    fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u128) {
524        let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
525
526        let expected = [
527            0x16c7_69d8_3e4a_ebce_9079_3197_9dca_3746,
528            0xbd93_0669_a87b_4b37_e67b_f1ad_8dcf_73a8,
529            0xacad_8071_8f47_d494_7d67_cfc1_730f_22a3,
530            0x38f9_2247_a7f7_3cc5_7780_eb31_198f_13ca,
531            0xae92_e123_e947_2408_bd79_5526_1902_66c0,
532            0x5f91_e6bf_7418_cfaa_55d6_5715_e2a5_7c31,
533            0x301a_9f75_4e8f_569a_0017_ea4b_e19b_c787,
534            0x7295_0631_8276_07e2_8428_12cc_870d_cae2,
535        ];
536
537        for (input, expected) in inputs.iter().zip(expected) {
538            let hash = f(input);
539            assert_eq!(hash, expected, "input was {} bytes", input.len());
540        }
541    }
542
543    #[test]
544    fn oneshot_17_to_128_bytes() {
545        test_17_to_128_bytes(Hasher::oneshot)
546    }
547
548    #[test]
549    fn streaming_17_to_128_bytes() {
550        test_17_to_128_bytes(hash_byte_by_byte)
551    }
552
553    #[track_caller]
554    fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u128) {
555        let lower_boundary = bytes![17, 18, 19];
556        let chunk_boundary = bytes![31, 32, 33];
557        let upper_boundary = bytes![126, 127, 128];
558
559        let inputs = lower_boundary
560            .iter()
561            .chain(chunk_boundary)
562            .chain(upper_boundary);
563
564        let expected = [
565            // lower_boundary
566            0x685b_c458_b37d_057f_c06e_233d_f772_9217,
567            0x87ce_996b_b557_6d8d_e3a3_c96b_b0af_2c23,
568            0x7619_bcef_2e31_1cd8_c47d_dc58_8737_93df,
569            // chunk_boundary
570            0x4ed3_946d_393b_687b_b54d_e399_3874_ed20,
571            0x25e7_c9b3_424c_eed2_457d_9566_b6fc_d697,
572            0x0217_5c3a_abb0_0637_e08d_8495_1339_de86,
573            // upper_boundary
574            0x0abc_2062_87ce_2afe_5181_0be2_9323_2106,
575            0xd5ad_d870_c9c9_e00f_060c_2e3d_df0f_2fb9,
576            0x1479_2fc3_af88_dc6c_0532_1a0b_64d6_7b41,
577        ];
578
579        for (input, expected) in inputs.zip(expected) {
580            let hash = f(input);
581            assert_eq!(hash, expected, "input was {} bytes", input.len());
582        }
583    }
584
585    #[test]
586    fn oneshot_129_to_240_bytes() {
587        test_129_to_240_bytes(Hasher::oneshot)
588    }
589
590    #[test]
591    fn streaming_129_to_240_bytes() {
592        test_129_to_240_bytes(hash_byte_by_byte)
593    }
594
595    #[track_caller]
596    fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u128) {
597        let lower_boundary = bytes![129, 130, 131];
598        let upper_boundary = bytes![238, 239, 240];
599
600        let inputs = lower_boundary.iter().chain(upper_boundary);
601
602        let expected = [
603            // lower_boundary
604            0xdd5e_74ac_6b45_f54e_bc30_b633_82b0_9a3b,
605            0x6cd2_e56a_10f1_e707_3ec5_f135_d0a7_d28f,
606            0x6da7_92f1_702d_4494_5609_cfc7_9dba_18fd,
607            // upper_boundary
608            0x73a9_e8f7_bd32_83c8_2a9b_ddd0_e5c4_014c,
609            0x9843_ab31_a06b_e0df_fe21_3746_28fc_c539,
610            0x65b5_be86_da55_40e7_c92b_68e1_6f83_bbb6,
611        ];
612
613        for (input, expected) in inputs.zip(expected) {
614            let hash = f(input);
615            assert_eq!(hash, expected, "input was {} bytes", input.len());
616        }
617    }
618
619    #[test]
620    fn oneshot_241_plus_bytes() {
621        test_241_plus_bytes(Hasher::oneshot)
622    }
623
624    #[test]
625    fn streaming_241_plus_bytes() {
626        test_241_plus_bytes(hash_byte_by_byte)
627    }
628
629    #[track_caller]
630    fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u128) {
631        let inputs = bytes![241, 242, 243, 244, 1024, 10240];
632
633        let expected = [
634            0x1da1_cb61_bcb8_a2a1_02e8_cd95_421c_6d02,
635            0x1623_84cb_44d1_d806_ddcb_33c4_9405_1832,
636            0xbd2e_9fcf_378c_35e9_8835_f952_9193_e3dc,
637            0x3ff4_93d7_a813_7ab6_bc17_c91e_c3cf_8d7f,
638            0xd0ac_1f7b_93bf_57b9_e5d7_8baf_a45b_2aa5,
639            0x4f63_75cc_a7ec_e1e1_bcd6_3266_df6e_2244,
640        ];
641
642        for (input, expected) in inputs.iter().zip(expected) {
643            let hash = f(input);
644            eprintln!("{hash:032x}\n{expected:032x}");
645            assert_eq!(hash, expected, "input was {} bytes", input.len());
646        }
647    }
648}