twox_hash/
xxhash3_64.rs

1//! The implementation of XXH3_64.
2
3#![deny(
4    clippy::missing_safety_doc,
5    clippy::undocumented_unsafe_blocks,
6    unsafe_op_in_unsafe_fn
7)]
8
9use core::hash;
10
11use crate::{
12    xxhash3::{primes::*, *},
13    IntoU128 as _, IntoU64 as _,
14};
15
16pub use crate::xxhash3::{
17    FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
18    SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
19};
20
21/// Calculates the 64-bit hash.
22#[derive(Clone)]
23pub struct Hasher {
24    #[cfg(feature = "alloc")]
25    inner: AllocRawHasher,
26    _private: (),
27}
28
29impl Hasher {
30    /// Hash all data at once. If you can use this function, you may
31    /// see noticable speed gains for certain types of input.
32    #[must_use]
33    #[inline]
34    pub fn oneshot(input: &[u8]) -> u64 {
35        impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
36    }
37
38    /// Hash all data at once using the provided seed and a secret
39    /// derived from the seed. If you can use this function, you may
40    /// see noticable speed gains for certain types of input.
41    #[must_use]
42    #[inline]
43    pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
44        let mut secret = DEFAULT_SECRET_RAW;
45
46        // We know that the secret will only be used if we have more
47        // than 240 bytes, so don't waste time computing it otherwise.
48        if input.len() > CUTOFF {
49            derive_secret(seed, &mut secret);
50        }
51
52        let secret = Secret::new(&secret).expect("The default secret length is invalid");
53
54        impl_oneshot(secret, seed, input)
55    }
56
57    /// Hash all data at once using the provided secret and the
58    /// default seed. If you can use this function, you may see
59    /// noticable speed gains for certain types of input.
60    #[inline]
61    pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
62        let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
63        Ok(impl_oneshot(secret, DEFAULT_SEED, input))
64    }
65
66    /// Hash all data at once using the provided seed and secret. If
67    /// you can use this function, you may see noticable speed gains
68    /// for certain types of input.
69    #[inline]
70    pub fn oneshot_with_seed_and_secret(
71        seed: u64,
72        secret: &[u8],
73        input: &[u8],
74    ) -> Result<u64, OneshotWithSecretError> {
75        let secret = if input.len() > CUTOFF {
76            Secret::new(secret).map_err(OneshotWithSecretError)?
77        } else {
78            DEFAULT_SECRET
79        };
80
81        Ok(impl_oneshot(secret, seed, input))
82    }
83}
84
85#[cfg(feature = "alloc")]
86#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
87mod with_alloc {
88    use ::alloc::boxed::Box;
89
90    use super::*;
91
92    impl Hasher {
93        /// Constructs the hasher using the default seed and secret values.
94        pub fn new() -> Self {
95            Self {
96                inner: RawHasherCore::allocate_default(),
97                _private: (),
98            }
99        }
100
101        /// Constructs the hasher using the provided seed and a secret
102        /// derived from the seed.
103        pub fn with_seed(seed: u64) -> Self {
104            Self {
105                inner: RawHasherCore::allocate_with_seed(seed),
106                _private: (),
107            }
108        }
109
110        /// Constructs the hasher using the provided seed and secret.
111        pub fn with_seed_and_secret(
112            seed: u64,
113            secret: impl Into<Box<[u8]>>,
114        ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
115            Ok(Self {
116                inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
117                _private: (),
118            })
119        }
120
121        /// Returns the secret.
122        pub fn into_secret(self) -> Box<[u8]> {
123            self.inner.into_secret()
124        }
125    }
126
127    impl Default for Hasher {
128        fn default() -> Self {
129            Self::new()
130        }
131    }
132
133    impl hash::Hasher for Hasher {
134        #[inline]
135        fn write(&mut self, input: &[u8]) {
136            self.inner.write(input)
137        }
138
139        #[inline]
140        fn finish(&self) -> u64 {
141            self.inner.finish(Finalize64)
142        }
143    }
144}
145
146#[derive(Clone)]
147/// A lower-level interface for computing a hash from streaming data.
148///
149/// The algorithm requires a secret which can be a reasonably large
150/// piece of data. [`Hasher`][] makes one concrete implementation
151/// decision that uses dynamic memory allocation, but specialized
152/// usages may desire more flexibility. This type, combined with
153/// [`SecretBuffer`][], offer that flexibility at the cost of a
154/// generic type.
155pub struct RawHasher<S>(RawHasherCore<S>);
156
157impl<S> RawHasher<S> {
158    /// Construct the hasher with the provided seed, secret, and
159    /// temporary buffer.
160    pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
161        Self(RawHasherCore::new(secret_buffer))
162    }
163
164    /// Returns the secret.
165    pub fn into_secret(self) -> S {
166        self.0.into_secret()
167    }
168}
169
170impl<S> hash::Hasher for RawHasher<S>
171where
172    S: FixedBuffer,
173{
174    #[inline]
175    fn write(&mut self, input: &[u8]) {
176        self.0.write(input);
177    }
178
179    #[inline]
180    fn finish(&self) -> u64 {
181        self.0.finish(Finalize64)
182    }
183}
184
185struct Finalize64;
186
187impl Finalize for Finalize64 {
188    type Output = u64;
189
190    #[inline(always)]
191    fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
192        impl_oneshot(secret, seed, input)
193    }
194
195    #[inline(always)]
196    fn large(
197        &self,
198        vector: impl Vector,
199        acc: [u64; 8],
200        last_block: &[u8],
201        last_stripe: &[u8; 64],
202        secret: &Secret,
203        len: usize,
204    ) -> Self::Output {
205        Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len)
206    }
207}
208
209#[inline(always)]
210fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
211    match input.len() {
212        241.. => impl_241_plus_bytes(secret, input),
213
214        129..=240 => impl_129_to_240_bytes(secret, seed, input),
215
216        17..=128 => impl_17_to_128_bytes(secret, seed, input),
217
218        9..=16 => impl_9_to_16_bytes(secret, seed, input),
219
220        4..=8 => impl_4_to_8_bytes(secret, seed, input),
221
222        1..=3 => impl_1_to_3_bytes(secret, seed, input),
223
224        0 => impl_0_bytes(secret, seed),
225    }
226}
227
228#[inline(always)]
229fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
230    let secret_words = secret.for_64().words_for_0();
231    avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
232}
233
234#[inline(always)]
235fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
236    assert_input_range!(1..=3, input.len());
237    let combined = impl_1_to_3_bytes_combined(input);
238
239    let secret_words = secret.for_64().words_for_1_to_3();
240
241    let value = {
242        let secret = (secret_words[0] ^ secret_words[1]).into_u64();
243        secret.wrapping_add(seed) ^ combined.into_u64()
244    };
245
246    // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
247    avalanche_xxh64(value)
248}
249
250#[inline(always)]
251fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
252    assert_input_range!(4..=8, input.len());
253    let input_first = input.first_u32().unwrap();
254    let input_last = input.last_u32().unwrap();
255
256    let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
257    let secret_words = secret.for_64().words_for_4_to_8();
258
259    let combined = input_last.into_u64() | (input_first.into_u64() << 32);
260
261    let mut value = {
262        let a = secret_words[0] ^ secret_words[1];
263        let b = a.wrapping_sub(modified_seed);
264        b ^ combined
265    };
266    value ^= value.rotate_left(49) ^ value.rotate_left(24);
267    value = value.wrapping_mul(PRIME_MX2);
268    value ^= (value >> 35).wrapping_add(input.len().into_u64());
269    value = value.wrapping_mul(PRIME_MX2);
270    value ^= value >> 28;
271    value
272}
273
274#[inline(always)]
275fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
276    assert_input_range!(9..=16, input.len());
277    let input_first = input.first_u64().unwrap();
278    let input_last = input.last_u64().unwrap();
279
280    let secret_words = secret.for_64().words_for_9_to_16();
281    let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
282    let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
283    let mul_result = low.into_u128().wrapping_mul(high.into_u128());
284    let value = input
285        .len()
286        .into_u64()
287        .wrapping_add(low.swap_bytes())
288        .wrapping_add(high)
289        .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
290
291    avalanche(value)
292}
293
294#[inline]
295fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
296    assert_input_range!(17..=128, input.len());
297    let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
298
299    impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
300        acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed));
301        acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed));
302    });
303
304    avalanche(acc)
305}
306
307#[inline]
308fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
309    assert_input_range!(129..=240, input.len());
310    let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
311
312    let (head, _) = input.bp_as_chunks();
313    let mut head = head.iter();
314
315    let ss = secret.for_64().words_for_127_to_240_part1();
316    for (chunk, secret) in head.by_ref().zip(ss).take(8) {
317        acc = acc.wrapping_add(mix_step(chunk, secret, seed));
318    }
319
320    acc = avalanche(acc);
321
322    let ss = secret.for_64().words_for_127_to_240_part2();
323    for (chunk, secret) in head.zip(ss) {
324        acc = acc.wrapping_add(mix_step(chunk, secret, seed));
325    }
326
327    let last_chunk = input.last_chunk().unwrap();
328    let ss = secret.for_64().words_for_127_to_240_part3();
329    acc = acc.wrapping_add(mix_step(last_chunk, ss, seed));
330
331    avalanche(acc)
332}
333
334#[inline]
335fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
336    assert_input_range!(241.., input.len());
337    dispatch! {
338        fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
339        []
340    }
341}
342
343#[inline]
344fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
345    Algorithm(vector).oneshot(secret, input, Finalize64)
346}
347
348#[cfg(test)]
349mod test {
350    use std::hash::Hasher as _;
351
352    use crate::xxhash3::test::bytes;
353
354    use super::*;
355
356    const _: () = {
357        const fn is_clone<T: Clone>() {}
358        is_clone::<Hasher>();
359    };
360
361    const EMPTY_BYTES: [u8; 0] = [];
362
363    fn hash_byte_by_byte(input: &[u8]) -> u64 {
364        let mut hasher = Hasher::new();
365        for byte in input.chunks(1) {
366            hasher.write(byte)
367        }
368        hasher.finish()
369    }
370
371    fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
372        let mut hasher = Hasher::with_seed(seed);
373        for byte in input.chunks(1) {
374            hasher.write(byte)
375        }
376        hasher.finish()
377    }
378
379    #[test]
380    fn oneshot_empty() {
381        let hash = Hasher::oneshot(&EMPTY_BYTES);
382        assert_eq!(hash, 0x2d06_8005_38d3_94c2);
383    }
384
385    #[test]
386    fn streaming_empty() {
387        let hash = hash_byte_by_byte(&EMPTY_BYTES);
388        assert_eq!(hash, 0x2d06_8005_38d3_94c2);
389    }
390
391    #[test]
392    fn oneshot_1_to_3_bytes() {
393        test_1_to_3_bytes(Hasher::oneshot)
394    }
395
396    #[test]
397    fn streaming_1_to_3_bytes() {
398        test_1_to_3_bytes(hash_byte_by_byte)
399    }
400
401    #[track_caller]
402    fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) {
403        let inputs = bytes![1, 2, 3];
404
405        let expected = [
406            0xc44b_dff4_074e_ecdb,
407            0xd664_5fc3_051a_9457,
408            0x5f42_99fc_161c_9cbb,
409        ];
410
411        for (input, expected) in inputs.iter().zip(expected) {
412            let hash = f(input);
413            assert_eq!(hash, expected, "input was {} bytes", input.len());
414        }
415    }
416
417    #[test]
418    fn oneshot_4_to_8_bytes() {
419        test_4_to_8_bytes(Hasher::oneshot)
420    }
421
422    #[test]
423    fn streaming_4_to_8_bytes() {
424        test_4_to_8_bytes(hash_byte_by_byte)
425    }
426
427    #[track_caller]
428    fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) {
429        let inputs = bytes![4, 5, 6, 7, 8];
430
431        let expected = [
432            0x60da_b036_a582_11f2,
433            0xb075_753a_84ca_0fbe,
434            0xa658_4d1d_9a6a_e704,
435            0x0cd2_084a_6240_6b69,
436            0x3a1c_2d7c_85af_88f8,
437        ];
438
439        for (input, expected) in inputs.iter().zip(expected) {
440            let hash = f(input);
441            assert_eq!(hash, expected, "input was {} bytes", input.len());
442        }
443    }
444
445    #[test]
446    fn oneshot_9_to_16_bytes() {
447        test_9_to_16_bytes(Hasher::oneshot)
448    }
449
450    #[test]
451    fn streaming_9_to_16_bytes() {
452        test_9_to_16_bytes(hash_byte_by_byte)
453    }
454
455    #[track_caller]
456    fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) {
457        let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
458
459        let expected = [
460            0xe961_2598_145b_b9dc,
461            0xab69_a08e_f83d_8f77,
462            0x1cf3_96aa_4de6_198d,
463            0x5ace_6a51_1c10_894b,
464            0xb7a5_d8a8_309a_2cb9,
465            0x4cf4_5c94_4a9a_2237,
466            0x55ec_edc2_b87b_b042,
467            0x8355_e3a6_f617_70db,
468        ];
469
470        for (input, expected) in inputs.iter().zip(expected) {
471            let hash = f(input);
472            assert_eq!(hash, expected, "input was {} bytes", input.len());
473        }
474    }
475
476    #[test]
477    fn oneshot_17_to_128_bytes() {
478        test_17_to_128_bytes(Hasher::oneshot)
479    }
480
481    #[test]
482    fn streaming_17_to_128_bytes() {
483        test_17_to_128_bytes(hash_byte_by_byte)
484    }
485
486    #[track_caller]
487    fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) {
488        let lower_boundary = bytes![17, 18, 19];
489        let chunk_boundary = bytes![31, 32, 33];
490        let upper_boundary = bytes![126, 127, 128];
491
492        let inputs = lower_boundary
493            .iter()
494            .chain(chunk_boundary)
495            .chain(upper_boundary);
496
497        let expected = [
498            // lower_boundary
499            0x9ef3_41a9_9de3_7328,
500            0xf691_2490_d4c0_eed5,
501            0x60e7_2614_3cf5_0312,
502            // chunk_boundary
503            0x4f36_db8e_4df3_78fd,
504            0x3523_581f_e96e_4c05,
505            0xe68c_56ba_8899_1e58,
506            // upper_boundary
507            0x6c2a_9eb7_459c_dc61,
508            0x120b_9787_f842_5f2f,
509            0x85c6_174c_7ff4_c46b,
510        ];
511
512        for (input, expected) in inputs.zip(expected) {
513            let hash = f(input);
514            assert_eq!(hash, expected, "input was {} bytes", input.len());
515        }
516    }
517
518    #[test]
519    fn oneshot_129_to_240_bytes() {
520        test_129_to_240_bytes(Hasher::oneshot)
521    }
522
523    #[test]
524    fn streaming_129_to_240_bytes() {
525        test_129_to_240_bytes(hash_byte_by_byte)
526    }
527
528    #[track_caller]
529    fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) {
530        let lower_boundary = bytes![129, 130, 131];
531        let upper_boundary = bytes![238, 239, 240];
532
533        let inputs = lower_boundary.iter().chain(upper_boundary);
534
535        let expected = [
536            // lower_boundary
537            0xec76_42b4_31ba_3e5a,
538            0x4d32_24b1_0090_8a87,
539            0xe57f_7ea6_741f_e3a0,
540            // upper_boundary
541            0x3044_9a0b_4899_dee9,
542            0x972b_14e3_c46f_214b,
543            0x375a_384d_957f_e865,
544        ];
545
546        for (input, expected) in inputs.zip(expected) {
547            let hash = f(input);
548            assert_eq!(hash, expected, "input was {} bytes", input.len());
549        }
550    }
551
552    #[test]
553    fn oneshot_241_plus_bytes() {
554        test_241_plus_bytes(Hasher::oneshot)
555    }
556
557    #[test]
558    fn streaming_241_plus_bytes() {
559        test_241_plus_bytes(hash_byte_by_byte)
560    }
561
562    #[track_caller]
563    fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) {
564        let inputs = bytes![241, 242, 243, 244, 1024, 10240];
565
566        let expected = [
567            0x02e8_cd95_421c_6d02,
568            0xddcb_33c4_9405_1832,
569            0x8835_f952_9193_e3dc,
570            0xbc17_c91e_c3cf_8d7f,
571            0xe5d7_8baf_a45b_2aa5,
572            0xbcd6_3266_df6e_2244,
573        ];
574
575        for (input, expected) in inputs.iter().zip(expected) {
576            let hash = f(input);
577            assert_eq!(hash, expected, "input was {} bytes", input.len());
578        }
579    }
580
581    #[test]
582    fn oneshot_with_seed() {
583        test_with_seed(Hasher::oneshot_with_seed)
584    }
585
586    #[test]
587    fn streaming_with_seed() {
588        test_with_seed(hash_byte_by_byte_with_seed)
589    }
590
591    #[track_caller]
592    fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) {
593        let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
594
595        let expected = [
596            0x4aed_e683_89c0_e311,
597            0x78fc_079a_75aa_f3c0,
598            0x1b73_06b8_9f25_4507,
599            0x7df7_627f_d1f9_39b6,
600            0x49ca_0fff_0950_1622,
601            0x2bfd_caec_30ff_3000,
602            0xf984_56bc_25be_0901,
603            0x2483_9f0f_cdf4_d078,
604        ];
605
606        for (input, expected) in inputs.iter().zip(expected) {
607            let hash = f(0xdead_cafe, input);
608            assert_eq!(hash, expected, "input was {} bytes", input.len());
609        }
610    }
611}