Skip to main content

rustc_hash/
lib.rs

1//! A speedy, non-cryptographic hashing algorithm used by `rustc`.
2//!
3//! # Example
4//!
5//! ```rust
6//! # #[cfg(feature = "std")]
7//! # fn main() {
8//! use rustc_hash::FxHashMap;
9//!
10//! let mut map: FxHashMap<u32, u32> = FxHashMap::default();
11//! map.insert(22, 44);
12//! # }
13//! # #[cfg(not(feature = "std"))]
14//! # fn main() { }
15//! ```
16
17#![no_std]
18#![cfg_attr(feature = "nightly", feature(const_default))]
19#![cfg_attr(feature = "nightly", feature(const_trait_impl))]
20#![cfg_attr(feature = "nightly", feature(derive_const))]
21#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
22#![allow(rustc::default_hash_types)]
23
24#[cfg(feature = "std")]
25extern crate std;
26
27#[cfg(feature = "rand")]
28extern crate rand;
29
30#[cfg(feature = "rand")]
31mod random_state;
32
33mod seeded_state;
34
35use core::default::Default;
36use core::hash::{BuildHasher, Hasher};
37#[cfg(feature = "std")]
38use std::collections::{HashMap, HashSet};
39
40/// Type alias for a hash map that uses the Fx hashing algorithm.
41#[cfg(feature = "std")]
42pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
43
44/// Type alias for a hash set that uses the Fx hashing algorithm.
45#[cfg(feature = "std")]
46pub type FxHashSet<V> = HashSet<V, FxBuildHasher>;
47
48#[cfg(feature = "rand")]
49pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState};
50
51pub use seeded_state::FxSeededState;
52#[cfg(feature = "std")]
53pub use seeded_state::{FxHashMapSeed, FxHashSetSeed};
54
55/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
56/// by default uses SipHash which isn't quite as speedy as we want. In the
57/// compiler we're not really worried about DOS attempts, so we use a fast
58/// non-cryptographic hash.
59///
60/// The current implementation is a fast polynomial hash with a single
61/// bit rotation as a finishing step designed by Orson Peters.
62#[derive(Clone)]
63pub struct FxHasher {
64    hash: usize,
65}
66
67// One might view a polynomial hash
68//    m[0] * k    + m[1] * k^2  + m[2] * k^3  + ...
69// as a multilinear hash with keystream k[..]
70//    m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ...
71// where keystream k just happens to be generated using a multiplicative
72// congruential pseudorandom number generator (MCG). For that reason we chose a
73// constant that was found to be good for a MCG in:
74//     "Computationally Easy, Spectrally Good Multipliers for Congruential
75//     Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna.
76#[cfg(target_pointer_width = "64")]
77const K: usize = 0xf1357aea2e62a9c5;
78#[cfg(target_pointer_width = "32")]
79const K: usize = 0x93d765dd;
80
81impl FxHasher {
82    /// Creates a `fx` hasher with a given seed.
83    pub const fn with_seed(seed: usize) -> FxHasher {
84        FxHasher { hash: seed }
85    }
86
87    /// Creates a default `fx` hasher.
88    pub const fn default() -> FxHasher {
89        FxHasher { hash: 0 }
90    }
91}
92
93#[cfg(not(feature = "nightly"))]
94macro_rules! default_impl {
95    () => {
96        impl Default for FxHasher {
97            #[inline]
98            fn default() -> FxHasher {
99                Self::default()
100            }
101        }
102    };
103}
104
105// In order to use the nightly-only `const` syntax, we gate the definition behind a macro so that
106// parsing still succeeds on stable.
107#[cfg(feature = "nightly")]
108macro_rules! default_impl {
109    () => {
110        impl const Default for FxHasher {
111            #[inline]
112            fn default() -> FxHasher {
113                Self::default()
114            }
115        }
116    };
117}
118
119default_impl!();
120
121impl FxHasher {
122    #[inline]
123    fn add_to_hash(&mut self, i: usize) {
124        self.hash = self.hash.wrapping_add(i).wrapping_mul(K);
125    }
126}
127
128impl Hasher for FxHasher {
129    #[inline]
130    fn write(&mut self, bytes: &[u8]) {
131        // Compress the byte string to a single u64 and add to our hash.
132        self.write_u64(hash_bytes(bytes));
133    }
134
135    #[inline]
136    fn write_u8(&mut self, i: u8) {
137        self.add_to_hash(i as usize);
138    }
139
140    #[inline]
141    fn write_u16(&mut self, i: u16) {
142        self.add_to_hash(i as usize);
143    }
144
145    #[inline]
146    fn write_u32(&mut self, i: u32) {
147        self.add_to_hash(i as usize);
148    }
149
150    #[inline]
151    fn write_u64(&mut self, i: u64) {
152        self.add_to_hash(i as usize);
153        #[cfg(target_pointer_width = "32")]
154        self.add_to_hash((i >> 32) as usize);
155    }
156
157    #[inline]
158    fn write_u128(&mut self, i: u128) {
159        self.add_to_hash(i as usize);
160        #[cfg(target_pointer_width = "32")]
161        self.add_to_hash((i >> 32) as usize);
162        self.add_to_hash((i >> 64) as usize);
163        #[cfg(target_pointer_width = "32")]
164        self.add_to_hash((i >> 96) as usize);
165    }
166
167    #[inline]
168    fn write_usize(&mut self, i: usize) {
169        self.add_to_hash(i);
170    }
171
172    #[cfg(feature = "nightly")]
173    #[inline]
174    fn write_length_prefix(&mut self, _len: usize) {
175        // Most cases will specialize hash_slice to call write(), which encodes
176        // the length already in a more efficient manner than we could here. For
177        // HashDoS-resistance you would still need to include this for the
178        // non-slice collection hashes, but for the purposes of rustc we do not
179        // care and do not wish to pay the performance penalty of mixing in len
180        // for those collections.
181    }
182
183    #[cfg(feature = "nightly")]
184    #[inline]
185    fn write_str(&mut self, s: &str) {
186        // Similarly here, write already encodes the length, so nothing special
187        // is needed.
188        self.write(s.as_bytes())
189    }
190
191    #[inline]
192    fn finish(&self) -> u64 {
193        // Since we used a multiplicative hash our top bits have the most
194        // entropy (with the top bit having the most, decreasing as you go).
195        // As most hash table implementations (including hashbrown) compute
196        // the bucket index from the bottom bits we want to move bits from the
197        // top to the bottom. Ideally we'd rotate left by exactly the hash table
198        // size, but as we don't know this we'll choose 26 bits, giving decent
199        // entropy up until 2^26 table sizes. On 32-bit hosts we'll dial it
200        // back down a bit to 15 bits.
201
202        #[cfg(target_pointer_width = "64")]
203        const ROTATE: u32 = 26;
204        #[cfg(target_pointer_width = "32")]
205        const ROTATE: u32 = 15;
206
207        self.hash.rotate_left(ROTATE) as u64
208
209        // A bit reversal would be even better, except hashbrown also expects
210        // good entropy in the top 7 bits and a bit reverse would fill those
211        // bits with low entropy. More importantly, bit reversals are very slow
212        // on x86-64. A byte reversal is relatively fast, but still has a 2
213        // cycle latency on x86-64 compared to the 1 cycle latency of a rotate.
214        // It also suffers from the hashbrown-top-7-bit-issue.
215    }
216}
217
218// Nothing special, digits of pi.
219const SEED1: u64 = 0x243f6a8885a308d3;
220const SEED2: u64 = 0x13198a2e03707344;
221const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;
222
223#[inline]
224fn multiply_mix(x: u64, y: u64) -> u64 {
225    // The following code path is only fast if 64-bit to 128-bit widening
226    // multiplication is supported by the architecture. Most 64-bit
227    // architectures except SPARC64 and Wasm64 support it. However, the target
228    // pointer width doesn't always indicate that we are dealing with a 64-bit
229    // architecture, as there are ABIs that reduce the pointer width, especially
230    // on AArch64 and x86-64. WebAssembly (regardless of pointer width) supports
231    // 64-bit to 128-bit widening multiplication with the `wide-arithmetic`
232    // proposal.
233    if cfg!(any(
234        all(
235            target_pointer_width = "64",
236            not(any(target_arch = "sparc64", target_arch = "wasm64")),
237        ),
238        target_arch = "aarch64",
239        target_arch = "x86_64",
240        all(target_family = "wasm", target_feature = "wide-arithmetic"),
241    )) {
242        // We compute the full u64 x u64 -> u128 product, this is a single mul
243        // instruction on x86-64, one mul plus one mulhi on ARM64.
244        let full = (x as u128).wrapping_mul(y as u128);
245        let lo = full as u64;
246        let hi = (full >> 64) as u64;
247
248        // The middle bits of the full product fluctuate the most with small
249        // changes in the input. This is the top bits of lo and the bottom bits
250        // of hi. We can thus make the entire output fluctuate with small
251        // changes to the input by XOR'ing these two halves.
252        lo ^ hi
253
254        // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
255        // otherwise combining with + or - could result in a really strong hash, as:
256        //     x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi,   (mod 2^64 + 1)
257        //     x * y = 2^64 * hi + lo =    1 * hi + lo = lo + hi,   (mod 2^64 - 1)
258        // Multiplicative hashing is universal in a field (like mod p).
259    } else {
260        // u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
261        // Decompose into 32-bit parts.
262        let lx = x as u32;
263        let ly = y as u32;
264        let hx = (x >> 32) as u32;
265        let hy = (y >> 32) as u32;
266
267        // u32 x u32 -> u64 the low bits of one with the high bits of the other.
268        let afull = (lx as u64).wrapping_mul(hy as u64);
269        let bfull = (hx as u64).wrapping_mul(ly as u64);
270
271        // Combine, swapping low/high of one of them so the upper bits of the
272        // product of one combine with the lower bits of the other.
273        afull ^ bfull.rotate_right(32)
274    }
275}
276
277/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
278/// by Orson Peters, with a focus on small strings and small codesize.
279///
280/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
281/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
282/// permutation f() passed all tests with zero failures. When using the 32-bit
283/// version of multiply_mix this hash has a few non-catastrophic failures where
284/// there are a handful more collisions than an optimal hash would give.
285///
286/// We don't bother avalanching here as we'll feed this hash into a
287/// multiplication after which we take the high bits, which avalanches for us.
288#[inline]
289fn hash_bytes(bytes: &[u8]) -> u64 {
290    let len = bytes.len();
291    let mut s0 = SEED1;
292    let mut s1 = SEED2;
293
294    if len <= 16 {
295        // XOR the input into s0, s1.
296        if len >= 8 {
297            s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
298            s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
299        } else if len >= 4 {
300            s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
301            s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
302        } else if len > 0 {
303            let lo = bytes[0];
304            let mid = bytes[len / 2];
305            let hi = bytes[len - 1];
306            s0 ^= lo as u64;
307            s1 ^= ((hi as u64) << 8) | mid as u64;
308        }
309    } else {
310        // Handle bulk (can partially overlap with suffix).
311        let mut bulk = &bytes[..(len - 1)];
312        while let Some((chunk, rest)) = bulk.split_first_chunk::<16>() {
313            let x = u64::from_le_bytes((&chunk[..8]).try_into().unwrap());
314            let y = u64::from_le_bytes((&chunk[8..]).try_into().unwrap());
315
316            // Replace s1 with a mix of s0, x, and y, and s0 with s1.
317            // This ensures the compiler can unroll this loop into two
318            // independent streams, one operating on s0, the other on s1.
319            //
320            // Since zeroes are a common input we prevent an immediate trivial
321            // collapse of the hash function by XOR'ing a constant with y.
322            let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
323            s0 = s1;
324            s1 = t;
325            bulk = rest;
326        }
327
328        let suffix = &bytes[len - 16..];
329        s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
330        s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
331    }
332
333    multiply_mix(s0, s1) ^ (len as u64)
334}
335
336/// An implementation of [`BuildHasher`] that produces [`FxHasher`]s.
337///
338/// ```
339/// use std::hash::BuildHasher;
340/// use rustc_hash::FxBuildHasher;
341/// assert_ne!(FxBuildHasher.hash_one(1), FxBuildHasher.hash_one(2));
342/// ```
343#[derive(Copy, Clone)]
344#[cfg_attr(not(feature = "nightly"), derive(Default))]
345#[cfg_attr(feature = "nightly", derive_const(Default))]
346pub struct FxBuildHasher;
347
348impl BuildHasher for FxBuildHasher {
349    type Hasher = FxHasher;
350    fn build_hasher(&self) -> FxHasher {
351        FxHasher::default()
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
358    compile_error!("The test suite only supports 64 bit and 32 bit usize");
359
360    use crate::{FxBuildHasher, FxHasher};
361    use core::hash::{BuildHasher, Hash, Hasher};
362
363    macro_rules! test_hash {
364        (
365            $(
366                hash($value:expr) == $result:expr,
367            )*
368        ) => {
369            $(
370                assert_eq!(FxBuildHasher.hash_one($value), $result);
371            )*
372        };
373    }
374
375    const B32: bool = cfg!(target_pointer_width = "32");
376
377    #[test]
378    fn unsigned() {
379        test_hash! {
380            hash(0_u8) == 0,
381            hash(1_u8) == if B32 { 3001993707 } else { 12157901119326311915 },
382            hash(100_u8) == if B32 { 3844759569 } else { 16751747135202103309 },
383            hash(u8::MAX) == if B32 { 999399879 } else { 1211781028898739645 },
384
385            hash(0_u16) == 0,
386            hash(1_u16) == if B32 { 3001993707 } else { 12157901119326311915 },
387            hash(100_u16) == if B32 { 3844759569 } else { 16751747135202103309 },
388            hash(u16::MAX) == if B32 { 3440503042 } else { 16279819243059860173 },
389
390            hash(0_u32) == 0,
391            hash(1_u32) == if B32 { 3001993707 } else { 12157901119326311915 },
392            hash(100_u32) == if B32 { 3844759569 } else { 16751747135202103309 },
393            hash(u32::MAX) == if B32 { 1293006356 } else { 7729994835221066939 },
394
395            hash(0_u64) == 0,
396            hash(1_u64) == if B32 { 275023839 } else { 12157901119326311915 },
397            hash(100_u64) == if B32 { 1732383522 } else { 16751747135202103309 },
398            hash(u64::MAX) == if B32 { 1017982517 } else { 6288842954450348564 },
399
400            hash(0_u128) == 0,
401            hash(1_u128) == if B32 { 1860738631 } else { 13032756267696824044 },
402            hash(100_u128) == if B32 { 1389515751 } else { 12003541609544029302 },
403            hash(u128::MAX) == if B32 { 2156022013 } else { 11702830760530184999 },
404
405            hash(0_usize) == 0,
406            hash(1_usize) == if B32 { 3001993707 } else { 12157901119326311915 },
407            hash(100_usize) == if B32 { 3844759569 } else { 16751747135202103309 },
408            hash(usize::MAX) == if B32 { 1293006356 } else { 6288842954450348564 },
409        }
410    }
411
412    #[test]
413    fn signed() {
414        test_hash! {
415            hash(i8::MIN) == if B32 { 2000713177 } else { 6684841074112525780 },
416            hash(0_i8) == 0,
417            hash(1_i8) == if B32 { 3001993707 } else { 12157901119326311915 },
418            hash(100_i8) == if B32 { 3844759569 } else { 16751747135202103309 },
419            hash(i8::MAX) == if B32 { 3293686765 } else { 12973684028562874344 },
420
421            hash(i16::MIN) == if B32 { 1073764727 } else { 14218860181193086044 },
422            hash(0_i16) == 0,
423            hash(1_i16) == if B32 { 3001993707 } else { 12157901119326311915 },
424            hash(100_i16) == if B32 { 3844759569 } else { 16751747135202103309 },
425            hash(i16::MAX) == if B32 { 2366738315 } else { 2060959061933882993 },
426
427            hash(i32::MIN) == if B32 { 16384 } else { 9943947977240134995 },
428            hash(0_i32) == 0,
429            hash(1_i32) == if B32 { 3001993707 } else { 12157901119326311915 },
430            hash(100_i32) == if B32 { 3844759569 } else { 16751747135202103309 },
431            hash(i32::MAX) == if B32 { 1293022740 } else { 16232790931690483559 },
432
433            hash(i64::MIN) == if B32 { 16384 } else { 33554432 },
434            hash(0_i64) == 0,
435            hash(1_i64) == if B32 { 275023839 } else { 12157901119326311915 },
436            hash(100_i64) == if B32 { 1732383522 } else { 16751747135202103309 },
437            hash(i64::MAX) == if B32 { 1017998901 } else { 6288842954483902996 },
438
439            hash(i128::MIN) == if B32 { 16384 } else { 33554432 },
440            hash(0_i128) == 0,
441            hash(1_i128) == if B32 { 1860738631 } else { 13032756267696824044 },
442            hash(100_i128) == if B32 { 1389515751 } else { 12003541609544029302 },
443            hash(i128::MAX) == if B32 { 2156005629 } else { 11702830760496630567 },
444
445            hash(isize::MIN) == if B32 { 16384 } else { 33554432 },
446            hash(0_isize) == 0,
447            hash(1_isize) == if B32 { 3001993707 } else { 12157901119326311915 },
448            hash(100_isize) == if B32 { 3844759569 } else { 16751747135202103309 },
449            hash(isize::MAX) == if B32 { 1293022740 } else { 6288842954483902996 },
450        }
451    }
452
453    // Avoid relying on any `Hash` implementations in the standard library.
454    struct HashBytes(&'static [u8]);
455    impl Hash for HashBytes {
456        fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
457            state.write(self.0);
458        }
459    }
460
461    #[test]
462    fn bytes() {
463        test_hash! {
464            hash(HashBytes(&[])) == if B32 { 2673204745 } else { 17606491139363777937 },
465            hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 5448590020104574886 },
466            hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 16766921560080789783 },
467            hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 5922447956811044110 },
468            hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 5229781508510959783 },
469            hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 7168164714682931527 },
470            hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 2349210501944688211 },
471        }
472    }
473
474    #[test]
475    fn with_seed_actually_different() {
476        let seeds = [
477            [1, 2],
478            [42, 17],
479            [124436707, 99237],
480            [usize::MIN, usize::MAX],
481        ];
482
483        for [a_seed, b_seed] in seeds {
484            let a = || FxHasher::with_seed(a_seed);
485            let b = || FxHasher::with_seed(b_seed);
486
487            for x in u8::MIN..=u8::MAX {
488                let mut a = a();
489                let mut b = b();
490
491                x.hash(&mut a);
492                x.hash(&mut b);
493
494                assert_ne!(a.finish(), b.finish())
495            }
496        }
497    }
498}