fx_hash/
lib.rs

1//! A speedy, non-cryptographic hashing algorithm used by `rustc`.
2//!
3//! Fork of [rustc-hash](https://github.com/rust-lang/rustc-hash).
4//! This is for personal usage only, please use `rustc-hash`.
5//!
6//! # Example
7//!
8//! ```rust
9//! # #[cfg(feature = "std")]
10//! # fn main() {
11//! use fx_hash::{FxHashMap, FxHashMapExt};
12//!
13//! let mut map: FxHashMap<u32, u32> = FxHashMap::new();
14//! map.insert(22, 44);
15//! # }
16//! # #[cfg(not(feature = "std"))]
17//! # fn main() { }
18//! ```
19
20#![no_std]
21#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
22
23#[cfg(feature = "std")]
24extern crate std;
25
26mod seeded_state;
27
28use core::default::Default;
29use core::hash::{BuildHasher, Hasher};
30#[cfg(feature = "std")]
31use std::collections::{HashMap, HashSet};
32
33/// Type alias for a hash map that uses the Fx hashing algorithm.
34#[cfg(feature = "std")]
35pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
36
37/// Type alias for a hash set that uses the Fx hashing algorithm.
38#[cfg(feature = "std")]
39pub type FxHashSet<V> = HashSet<V, FxBuildHasher>;
40
41pub use seeded_state::FxSeededState;
42#[cfg(feature = "std")]
43pub use seeded_state::{FxHashMapSeed, FxHashSetSeed};
44
45/// A trait to add a `new` constructor for `FxHashMap`.
46///
47/// # Example
48/// ```
49/// use fx_hash::{FxHashMap, FxHashMapExt};
50///
51/// // Create a new FxHashMap
52/// let map: FxHashMap<String, i32> = FxHashMap::new();
53/// ```
54pub trait FxHashMapExt<K, V> {
55    /// Creates a new instance with the default `FxBuildHasher`.
56    fn new() -> Self;
57}
58
59impl<K, V> FxHashMapExt<K, V> for FxHashMap<K, V> {
60    fn new() -> Self {
61        HashMap::with_hasher(FxBuildHasher)
62    }
63}
64
65/// A trait to add a `new` constructor for `FxHashSet`.
66///
67/// # Example
68/// ```
69/// use fx_hash::{FxHashSet, FxHashSetExt};
70///
71/// // Create a new FxHashSet
72/// let set: FxHashSet<String> = FxHashSet::new();
73/// ```
74pub trait FxHashSetExt<V> {
75    /// Creates a new instance with the default `FxBuildHasher`.
76    fn new() -> Self;
77}
78
79impl<V> FxHashSetExt<V> for FxHashSet<V> {
80    fn new() -> Self {
81        HashSet::with_hasher(FxBuildHasher)
82    }
83}
84
85/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
86/// by default uses SipHash which isn't quite as speedy as we want. In the
87/// compiler we're not really worried about DOS attempts, so we use a fast
88/// non-cryptographic hash.
89///
90/// The current implementation is a fast polynomial hash with a single
91/// bit rotation as a finishing step designed by Orson Peters.
92#[derive(Clone)]
93pub struct FxHasher {
94    hash: usize,
95}
96
97// One might view a polynomial hash
98//    m[0] * k    + m[1] * k^2  + m[2] * k^3  + ...
99// as a multilinear hash with keystream k[..]
100//    m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ...
101// where keystream k just happens to be generated using a multiplicative
102// congruential pseudorandom number generator (MCG). For that reason we chose a
103// constant that was found to be good for a MCG in:
104//     "Computationally Easy, Spectrally Good Multipliers for Congruential
105//     Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna.
106#[cfg(target_pointer_width = "64")]
107const K: usize = 0xf1357aea2e62a9c5;
108#[cfg(target_pointer_width = "32")]
109const K: usize = 0x93d765dd;
110
111impl FxHasher {
112    /// Creates a `fx` hasher with a given seed.
113    pub const fn with_seed(seed: usize) -> FxHasher {
114        FxHasher { hash: seed }
115    }
116
117    /// Creates a default `fx` hasher.
118    pub const fn default() -> FxHasher {
119        FxHasher { hash: 0 }
120    }
121
122    #[inline]
123    fn add_to_hash(&mut self, i: usize) {
124        self.hash = self.hash.wrapping_add(i).wrapping_mul(K);
125    }
126}
127
128impl Default for FxHasher {
129    #[inline]
130    fn default() -> FxHasher {
131        Self::default()
132    }
133}
134
135impl Hasher for FxHasher {
136    #[inline]
137    fn write(&mut self, bytes: &[u8]) {
138        // Compress the byte string to a single u64 and add to our hash.
139        self.write_u64(hash_bytes(bytes));
140    }
141
142    #[inline]
143    fn write_u8(&mut self, i: u8) {
144        self.add_to_hash(i as usize);
145    }
146
147    #[inline]
148    fn write_u16(&mut self, i: u16) {
149        self.add_to_hash(i as usize);
150    }
151
152    #[inline]
153    fn write_u32(&mut self, i: u32) {
154        self.add_to_hash(i as usize);
155    }
156
157    #[inline]
158    fn write_u64(&mut self, i: u64) {
159        self.add_to_hash(i as usize);
160        #[cfg(target_pointer_width = "32")]
161        self.add_to_hash((i >> 32) as usize);
162    }
163
164    #[inline]
165    fn write_u128(&mut self, i: u128) {
166        self.add_to_hash(i as usize);
167        #[cfg(target_pointer_width = "32")]
168        self.add_to_hash((i >> 32) as usize);
169        self.add_to_hash((i >> 64) as usize);
170        #[cfg(target_pointer_width = "32")]
171        self.add_to_hash((i >> 96) as usize);
172    }
173
174    #[inline]
175    fn write_usize(&mut self, i: usize) {
176        self.add_to_hash(i);
177    }
178
179    #[cfg(feature = "nightly")]
180    #[inline]
181    fn write_length_prefix(&mut self, _len: usize) {
182        // Most cases will specialize hash_slice to call write(), which encodes
183        // the length already in a more efficient manner than we could here. For
184        // HashDoS-resistance you would still need to include this for the
185        // non-slice collection hashes, but for the purposes of rustc we do not
186        // care and do not wish to pay the performance penalty of mixing in len
187        // for those collections.
188    }
189
190    #[cfg(feature = "nightly")]
191    #[inline]
192    fn write_str(&mut self, s: &str) {
193        // Similarly here, write already encodes the length, so nothing special
194        // is needed.
195        self.write(s.as_bytes())
196    }
197
198    #[inline]
199    fn finish(&self) -> u64 {
200        // Since we used a multiplicative hash our top bits have the most
201        // entropy (with the top bit having the most, decreasing as you go).
202        // As most hash table implementations (including hashbrown) compute
203        // the bucket index from the bottom bits we want to move bits from the
204        // top to the bottom. Ideally we'd rotate left by exactly the hash table
205        // size, but as we don't know this we'll choose 20 bits, giving decent
206        // entropy up until 2^20 table sizes. On 32-bit hosts we'll dial it
207        // back down a bit to 15 bits.
208
209        #[cfg(target_pointer_width = "64")]
210        const ROTATE: u32 = 20;
211        #[cfg(target_pointer_width = "32")]
212        const ROTATE: u32 = 15;
213
214        self.hash.rotate_left(ROTATE) as u64
215
216        // A bit reversal would be even better, except hashbrown also expects
217        // good entropy in the top 7 bits and a bit reverse would fill those
218        // bits with low entropy. More importantly, bit reversals are very slow
219        // on x86-64. A byte reversal is relatively fast, but still has a 2
220        // cycle latency on x86-64 compared to the 1 cycle latency of a rotate.
221        // It also suffers from the hashbrown-top-7-bit-issue.
222    }
223}
224
225// Nothing special, digits of pi.
226const SEED1: u64 = 0x243f6a8885a308d3;
227const SEED2: u64 = 0x13198a2e03707344;
228const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;
229
230#[inline]
231fn multiply_mix(x: u64, y: u64) -> u64 {
232    #[cfg(target_pointer_width = "64")]
233    {
234        // We compute the full u64 x u64 -> u128 product, this is a single mul
235        // instruction on x86-64, one mul plus one mulhi on ARM64.
236        let full = (x as u128) * (y as u128);
237        let lo = full as u64;
238        let hi = (full >> 64) as u64;
239
240        // The middle bits of the full product fluctuate the most with small
241        // changes in the input. This is the top bits of lo and the bottom bits
242        // of hi. We can thus make the entire output fluctuate with small
243        // changes to the input by XOR'ing these two halves.
244        lo ^ hi
245
246        // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
247        // otherwise combining with + or - could result in a really strong hash, as:
248        //     x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi,   (mod 2^64 + 1)
249        //     x * y = 2^64 * hi + lo =    1 * hi + lo = lo + hi,   (mod 2^64 - 1)
250        // Multiplicative hashing is universal in a field (like mod p).
251    }
252
253    #[cfg(target_pointer_width = "32")]
254    {
255        // u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
256        // Decompose into 32-bit parts.
257        let lx = x as u32;
258        let ly = y as u32;
259        let hx = (x >> 32) as u32;
260        let hy = (y >> 32) as u32;
261
262        // u32 x u32 -> u64 the low bits of one with the high bits of the other.
263        let afull = (lx as u64) * (hy as u64);
264        let bfull = (hx as u64) * (ly as u64);
265
266        // Combine, swapping low/high of one of them so the upper bits of the
267        // product of one combine with the lower bits of the other.
268        afull ^ bfull.rotate_right(32)
269    }
270}
271
272/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
273/// by Orson Peters, with a focus on small strings and small codesize.
274///
275/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
276/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
277/// permutation f() passed all tests with zero failures. When using the 32-bit
278/// version of multiply_mix this hash has a few non-catastrophic failures where
279/// there are a handful more collisions than an optimal hash would give.
280///
281/// We don't bother avalanching here as we'll feed this hash into a
282/// multiplication after which we take the high bits, which avalanches for us.
283#[inline]
284fn hash_bytes(bytes: &[u8]) -> u64 {
285    let len = bytes.len();
286    let mut s0 = SEED1;
287    let mut s1 = SEED2;
288
289    if len <= 16 {
290        // XOR the input into s0, s1.
291        if len >= 8 {
292            s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
293            s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
294        } else if len >= 4 {
295            s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
296            s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
297        } else if len > 0 {
298            let lo = bytes[0];
299            let mid = bytes[len / 2];
300            let hi = bytes[len - 1];
301            s0 ^= lo as u64;
302            s1 ^= ((hi as u64) << 8) | mid as u64;
303        }
304    } else {
305        // Handle bulk (can partially overlap with suffix).
306        let mut off = 0;
307        while off < len - 16 {
308            let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap());
309            let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap());
310
311            // Replace s1 with a mix of s0, x, and y, and s0 with s1.
312            // This ensures the compiler can unroll this loop into two
313            // independent streams, one operating on s0, the other on s1.
314            //
315            // Since zeroes are a common input we prevent an immediate trivial
316            // collapse of the hash function by XOR'ing a constant with y.
317            let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
318            s0 = s1;
319            s1 = t;
320            off += 16;
321        }
322
323        let suffix = &bytes[len - 16..];
324        s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
325        s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
326    }
327
328    multiply_mix(s0, s1) ^ (len as u64)
329}
330
331/// An implementation of [`BuildHasher`] that produces [`FxHasher`]s.
332///
333/// ```
334/// use std::hash::BuildHasher;
335/// use fx_hash::FxBuildHasher;
336/// assert_ne!(FxBuildHasher.hash_one(1), FxBuildHasher.hash_one(2));
337/// ```
338#[derive(Copy, Clone, Default)]
339pub struct FxBuildHasher;
340
341impl BuildHasher for FxBuildHasher {
342    type Hasher = FxHasher;
343    fn build_hasher(&self) -> FxHasher {
344        FxHasher::default()
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
351    compile_error!("The test suite only supports 64 bit and 32 bit usize");
352
353    use crate::{FxBuildHasher, FxHasher};
354    use core::hash::{BuildHasher, Hash, Hasher};
355
356    macro_rules! test_hash {
357        (
358            $(
359                hash($value:expr) == $result:expr,
360            )*
361        ) => {
362            $(
363                assert_eq!(FxBuildHasher.hash_one($value), $result);
364            )*
365        };
366    }
367
368    const B32: bool = cfg!(target_pointer_width = "32");
369
370    #[test]
371    fn unsigned() {
372        test_hash! {
373            hash(0_u8) == 0,
374            hash(1_u8) == if B32 { 3001993707 } else { 12583873379513078615 },
375            hash(100_u8) == if B32 { 3844759569 } else { 4008740938959785536 },
376            hash(u8::MAX) == if B32 { 999399879 } else { 17600987023830959190 },
377
378            hash(0_u16) == 0,
379            hash(1_u16) == if B32 { 3001993707 } else { 12583873379513078615 },
380            hash(100_u16) == if B32 { 3844759569 } else { 4008740938959785536 },
381            hash(u16::MAX) == if B32 { 3440503042 } else { 4001367065645062987 },
382
383            hash(0_u32) == 0,
384            hash(1_u32) == if B32 { 3001993707 } else { 12583873379513078615 },
385            hash(100_u32) == if B32 { 3844759569 } else { 4008740938959785536 },
386            hash(u32::MAX) == if B32 { 1293006356 } else { 17126373362251322066 },
387
388            hash(0_u64) == 0,
389            hash(1_u64) == if B32 { 275023839 } else { 12583873379513078615 },
390            hash(100_u64) == if B32 { 1732383522 } else { 4008740938959785536 },
391            hash(u64::MAX) == if B32 { 1017982517 } else { 5862870694197521576 },
392
393            hash(0_u128) == 0,
394            hash(1_u128) == if B32 { 1860738631 } else { 12885773367358079611 },
395            hash(100_u128) == if B32 { 1389515751 } else { 15751995649841559633 },
396            hash(u128::MAX) == if B32 { 2156022013 } else { 11423841400550042156 },
397
398            hash(0_usize) == 0,
399            hash(1_usize) == if B32 { 3001993707 } else { 12583873379513078615 },
400            hash(100_usize) == if B32 { 3844759569 } else { 4008740938959785536 },
401            hash(usize::MAX) == if B32 { 1293006356 } else { 5862870694197521576 },
402        }
403    }
404
405    #[test]
406    fn signed() {
407        test_hash! {
408            hash(i8::MIN) == if B32 { 2000713177 } else { 5869058164817243095 },
409            hash(0_i8) == 0,
410            hash(1_i8) == if B32 { 3001993707 } else { 12583873379513078615 },
411            hash(100_i8) == if B32 { 3844759569 } else { 4008740938959785536 },
412            hash(i8::MAX) == if B32 { 3293686765 } else { 11731928859014764671 },
413
414            hash(i16::MIN) == if B32 { 1073764727 } else { 8292620222579070801 },
415            hash(0_i16) == 0,
416            hash(1_i16) == if B32 { 3001993707 } else { 12583873379513078615 },
417            hash(100_i16) == if B32 { 3844759569 } else { 4008740938959785536 },
418            hash(i16::MAX) == if B32 { 2366738315 } else { 14155490916776592377 },
419
420            hash(i32::MIN) == if B32 { 16384 } else { 5631751334026900245 },
421            hash(0_i32) == 0,
422            hash(1_i32) == if B32 { 3001993707 } else { 12583873379513078615 },
423            hash(100_i32) == if B32 { 3844759569 } else { 4008740938959785536 },
424            hash(i32::MAX) == if B32 { 1293022740 } else { 11494622028224421821 },
425
426            hash(i64::MIN) == if B32 { 16384 } else { 524288 },
427            hash(0_i64) == 0,
428            hash(1_i64) == if B32 { 275023839 } else { 12583873379513078615 },
429            hash(100_i64) == if B32 { 1732383522 } else { 4008740938959785536 },
430            hash(i64::MAX) == if B32 { 1017998901 } else { 5862870694198045864 },
431
432            hash(i128::MIN) == if B32 { 16384 } else { 524288 },
433            hash(0_i128) == 0,
434            hash(1_i128) == if B32 { 1860738631 } else { 12885773367358079611 },
435            hash(100_i128) == if B32 { 1389515751 } else { 15751995649841559633 },
436            hash(i128::MAX) == if B32 { 2156005629 } else { 11423841400549517868 },
437
438            hash(isize::MIN) == if B32 { 16384 } else { 524288 },
439            hash(0_isize) == 0,
440            hash(1_isize) == if B32 { 3001993707 } else { 12583873379513078615 },
441            hash(100_isize) == if B32 { 3844759569 } else { 4008740938959785536 },
442            hash(isize::MAX) == if B32 { 1293022740 } else { 5862870694198045864 },
443        }
444    }
445
446    // Avoid relying on any `Hash` implementations in the standard library.
447    struct HashBytes(&'static [u8]);
448    impl Hash for HashBytes {
449        fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
450            state.write(self.0);
451        }
452    }
453
454    #[test]
455    fn bytes() {
456        test_hash! {
457            hash(HashBytes(&[])) == if B32 { 2673204745 } else { 5175017818631658678 },
458            hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 11037888512829180254 },
459            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 6891281800865632452 },
460            hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 4127763515449136980 },
461            hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 11322700005987241762 },
462            hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 2129615206728903013 },
463            hash(HashBytes(b"These are some bytes for testing fx_hash.")) == if B32 { 2303640537 } else { 5180056157752790530 },
464        }
465    }
466
467    #[test]
468    fn with_seed_actually_different() {
469        let seeds = [[1, 2], [42, 17], [124436707, 99237], [
470            usize::MIN,
471            usize::MAX,
472        ]];
473
474        for [a_seed, b_seed] in seeds {
475            let a = || FxHasher::with_seed(a_seed);
476            let b = || FxHasher::with_seed(b_seed);
477
478            for x in u8::MIN..=u8::MAX {
479                let mut a = a();
480                let mut b = b();
481
482                x.hash(&mut a);
483                x.hash(&mut b);
484
485                assert_ne!(a.finish(), b.finish())
486            }
487        }
488    }
489}