Skip to main content

kevy_hash/
lib.rs

1//! kevy-hash — fast, well-distributed hashing for kevy's single-trust-domain
2//! keyspace. Zero dependencies.
3//!
4//! std's `HashMap` is a hashbrown Swiss table (excellent — kevy keeps it) keyed
5//! by `SipHash-1-3` (DoS-resistant, but a tax a single-threaded-per-shard
6//! keyspace facing no adversarial cross-trust key collisions does not need).
7//! This crate supplies the hasher that table should use instead: an FxHash-style
8//! word-at-a-time absorb plus a murmur3 [`fmix64`] avalanche finalizer.
9//!
10//! Measured (via `kevy-store/examples/bench_keyspace.rs`): ~4× faster
11//! hashing, ~1.2–2.8× faster GET-hit, ~1.1–1.7× faster GET-miss than
12//! SipHash, with no clustering.
13//! The finalizer is **essential** — the bare Fx absorb (no `fmix64`) clusters
14//! 30–50× on low-entropy sequential keys like `"key:0".."key:99999"`.
15//!
16//! **Not DoS-resistant.** There is no random seed, so an attacker who can choose
17//! keys *and* observe timing could force collisions. kevy's keyspace lives
18//! inside one trust domain per shard, so this is the right trade; do not reuse
19//! this hasher for maps fed untrusted, adversarially-chosen keys across a trust
20//! boundary.
21//!
22//! ```
23//! use kevy_hash::FxHashMap;
24//!
25//! let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
26//! m.insert(b"key".to_vec(), 1);
27//! assert_eq!(m.get(b"key".as_slice()), Some(&1));
28//! ```
29#![forbid(unsafe_code)]
30#![warn(missing_docs)]
31
32use std::collections::{HashMap, HashSet};
33use std::hash::{BuildHasherDefault, Hasher};
34
35/// FxHash mixing constant (rustc's `rustc-hash` seed).
36const SEED: u64 = 0x517c_c1b7_2722_0a95;
37const ROTATE: u32 = 5;
38
39/// murmur3 `fmix64` avalanche — spreads every input bit across all 64 output
40/// bits. ~6 ALU ops, applied once on [`Hasher::finish`]. This is what the bare
41/// Fx absorb lacks, and why it clusters without it.
42#[inline]
43pub fn fmix64(mut h: u64) -> u64 {
44    h ^= h >> 33;
45    h = h.wrapping_mul(0xff51_afd7_ed55_8ccd);
46    h ^= h >> 33;
47    h = h.wrapping_mul(0xc4ce_b9fe_1a85_ec53);
48    h ^= h >> 33;
49    h
50}
51
52#[inline]
53fn mix(state: u64, word: u64) -> u64 {
54    (state.rotate_left(ROTATE) ^ word).wrapping_mul(SEED)
55}
56
57/// Two-stream pipelined hash_bytes inspired by rustc-hash 2.x's design
58/// (`rustc-hash/src/lib.rs#hash_bytes`). The key trick is keeping two
59/// independent state words `s0` / `s1` updated via 64×64→128 widening
60/// multiplication (one `mul`+`mulhi` on aarch64, one `mul` on x86_64),
61/// XORing the two halves of the product to mix top with bottom. The two
62/// streams are independent of each other in the bulk loop, so LLVM can
63/// schedule them on two ALU ports per cycle.
64///
65/// Lengths ≤ 16: XOR-only absorb of two reads (start + end), then a
66/// single `multiply_mix` of the two streams. The XOR-only absorb is fast
67/// because there's no ALU dependency between the two reads.
68///
69/// Lengths > 16: per-16-byte iteration, `s1 <- multiply_mix(s0 ^ x,
70/// CONST ^ y); s0 <- s1`. The `CONST` (digits of pi) prevents the
71/// all-zeros input from collapsing.
72///
73/// Final mix: `multiply_mix(s0, s1) ^ len` — folds length in so that
74/// `"abc"` and `"ab\0c"` hash differently (the XOR-only short path
75/// doesn't distinguish length-by-position without this).
76///
77/// Then `fmix64` to give us the anti-clustering avalanche we need (the
78/// rustc-hash design assumes its consumer mixes again; we don't, so we
79/// avalanche ourselves — same property as the legacy [`FxHasher`] path).
80#[inline]
81fn hash_bytes_pipelined(bytes: &[u8]) -> u64 {
82    // Constants — digits of pi (matches rustc-hash 2.x for cross-bench
83    // sanity; the actual choice doesn't matter beyond "non-zero, not
84    // sharing structure with input distributions").
85    const S1: u64 = 0x243f_6a88_85a3_08d3;
86    const S2: u64 = 0x1319_8a2e_0370_7344;
87    const ANTI_ZERO: u64 = 0xa409_3822_299f_31d0;
88    let len = bytes.len();
89    let mut s0 = S1;
90    let mut s1 = S2;
91
92    if len <= 16 {
93        if len >= 8 {
94            // Read first 8 and last 8 (may overlap when 8 ≤ len ≤ 15).
95            s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
96            s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
97        } else if len >= 4 {
98            s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
99            s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
100        } else if len > 0 {
101            // 1-3 byte tail: form a 3-byte key (lo, mid, hi) that
102            // distinguishes "ab" from "ba" etc.
103            let lo = bytes[0];
104            let mid = bytes[len / 2];
105            let hi = bytes[len - 1];
106            s0 ^= lo as u64;
107            s1 ^= ((hi as u64) << 8) | mid as u64;
108        }
109        // len == 0 falls through with s0 == S1, s1 == S2 unchanged.
110    } else {
111        // Bulk: drop the very last byte from the bulk slice so the suffix
112        // 16 bytes can partially overlap with bulk's tail (this is what
113        // rustc-hash 2.x does; it makes the suffix path uniform).
114        let mut bulk = &bytes[..len - 1];
115        while let Some((chunk, rest)) = bulk.split_first_chunk::<16>() {
116            let x = u64::from_le_bytes(chunk[..8].try_into().unwrap());
117            let y = u64::from_le_bytes(chunk[8..].try_into().unwrap());
118            let t = multiply_mix(s0 ^ x, ANTI_ZERO ^ y);
119            s0 = s1;
120            s1 = t;
121            bulk = rest;
122        }
123        // Suffix 16 bytes (may overlap with last bulk iter).
124        let suffix = &bytes[len - 16..];
125        s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
126        s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
127    }
128
129    let folded = multiply_mix(s0, s1) ^ (len as u64);
130    fmix64(folded)
131}
132
133/// 64×64→128 widening multiply, XOR'ing the two halves of the product.
134/// Single `mul` on x86_64, one `mul`+one `mulhi` on aarch64. Mixes top and
135/// bottom of the product so the entire output fluctuates with small
136/// changes in the input.
137#[inline]
138fn multiply_mix(x: u64, y: u64) -> u64 {
139    let full = (x as u128).wrapping_mul(y as u128);
140    let lo = full as u64;
141    let hi = (full >> 64) as u64;
142    lo ^ hi
143}
144
145/// Fast, well-distributed [`Hasher`] for kevy's keyspace. Word-at-a-time absorb
146/// (FxHash-style) finished with [`fmix64`]. See the crate docs for the security
147/// trade-off.
148#[derive(Default)]
149pub struct FxHasher(u64);
150
151impl Hasher for FxHasher {
152    #[inline]
153    fn finish(&self) -> u64 {
154        fmix64(self.0)
155    }
156
157    #[inline]
158    fn write(&mut self, mut bytes: &[u8]) {
159        let mut state = self.0;
160        while bytes.len() >= 8 {
161            let word = u64::from_le_bytes(bytes[..8].try_into().unwrap());
162            state = mix(state, word);
163            bytes = &bytes[8..];
164        }
165        if bytes.len() >= 4 {
166            let word = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as u64;
167            state = mix(state, word);
168            bytes = &bytes[4..];
169        }
170        for &b in bytes {
171            state = mix(state, b as u64);
172        }
173        self.0 = state;
174    }
175
176    // Fixed-width integer keys (e.g. connection-id maps) skip the byte loop.
177    #[inline]
178    fn write_u64(&mut self, i: u64) {
179        self.0 = mix(self.0, i);
180    }
181    #[inline]
182    fn write_usize(&mut self, i: usize) {
183        self.0 = mix(self.0, i as u64);
184    }
185}
186
187/// [`BuildHasher`](std::hash::BuildHasher) for [`FxHasher`]. Seedless, so equal
188/// keys hash equally across instances and process runs.
189pub type FxBuildHasher = BuildHasherDefault<FxHasher>;
190
191/// Single-call hashing for kevy's per-command hot path.
192///
193/// `std::hash::Hasher` is a state-machine API — every hash is `Hasher::default()`
194/// → `write_*` → `finish`, with `BuildHasher` indirection on top. For
195/// `kevy-map`'s open-addressing table the keyspace is a small handful of
196/// well-known leaf types (`[u8]`, `u32`, `u64`, `i32`); we get a faster, inline-
197/// friendly hash by exposing one method on each that produces the final mixed
198/// 64-bit value in one go.
199///
200/// All impls must agree with feeding the value through [`FxHasher`] then
201/// calling `finish` — this lets us cut the trait dispatch without changing the
202/// hash function. `kevy-map` consumes both the full hash (for bucket index)
203/// and its top 7 bits (for the metadata byte).
204pub trait KevyHash {
205    /// Compute the final mixed 64-bit hash of `self` in one call.
206    fn kevy_hash(&self) -> u64;
207}
208
209impl KevyHash for [u8] {
210    /// Byte-slice hash. Uses the **two-stream pipelined** path internally
211    /// for ILP on the bench's 8-64 byte keyspace, closing the prior 1 ns
212    /// gap vs rustc-hash 2.x's `hash_bytes`. The final `fmix64` retains
213    /// the anti-clustering guarantee that the
214    /// `no_catastrophic_clustering_on_low_entropy_keys` test enforces.
215    ///
216    /// Note: the result diverges from the legacy [`FxHasher`] absorb path —
217    /// callers using `FxHashMap<Vec<u8>, _>` route through std's
218    /// `Hash::hash → Hasher::write → finish` (the legacy single-stream
219    /// path), which intentionally stays put for cross-instance hash
220    /// stability with anything that depended on the v0.polish bit pattern.
221    /// The `KevyHash for [u8]` impl is for one-call hot paths like
222    /// `kevy-map::find_by_borrow`, which is the only one we measure.
223    #[inline]
224    fn kevy_hash(&self) -> u64 {
225        hash_bytes_pipelined(self)
226    }
227}
228
229impl KevyHash for Vec<u8> {
230    #[inline]
231    fn kevy_hash(&self) -> u64 {
232        self.as_slice().kevy_hash()
233    }
234}
235
236impl KevyHash for u64 {
237    #[inline]
238    fn kevy_hash(&self) -> u64 {
239        fmix64(mix(0, *self))
240    }
241}
242
243impl KevyHash for u32 {
244    #[inline]
245    fn kevy_hash(&self) -> u64 {
246        fmix64(mix(0, *self as u64))
247    }
248}
249
250impl KevyHash for i32 {
251    #[inline]
252    fn kevy_hash(&self) -> u64 {
253        // Sign-extend to u64 so equal i32 values hash the same as if widened
254        // through the integer path; negatives' top bits still fmix64 away.
255        fmix64(mix(0, *self as i64 as u64))
256    }
257}
258
259impl KevyHash for usize {
260    #[inline]
261    fn kevy_hash(&self) -> u64 {
262        fmix64(mix(0, *self as u64))
263    }
264}
265
266/// A [`HashMap`] using [`FxHasher`] instead of SipHash.
267pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
268
269/// A [`HashSet`] using [`FxHasher`] instead of SipHash.
270pub type FxHashSet<T> = HashSet<T, FxBuildHasher>;
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use std::hash::BuildHasher;
276
277    fn h(bytes: &[u8]) -> u64 {
278        FxBuildHasher::default().hash_one(bytes)
279    }
280
281    #[test]
282    fn deterministic_across_instances() {
283        assert_eq!(h(b"hello"), h(b"hello"));
284        assert_ne!(h(b"hello"), h(b"hellp"));
285        assert_ne!(h(b""), h(b"\0"));
286    }
287
288    #[test]
289    fn map_roundtrip() {
290        let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
291        for i in 0..10_000u64 {
292            m.insert(format!("key:{i}").into_bytes(), i);
293        }
294        assert_eq!(m.len(), 10_000);
295        for i in 0..10_000u64 {
296            assert_eq!(m.get(format!("key:{i}").into_bytes().as_slice()), Some(&i));
297        }
298    }
299
300    #[test]
301    fn kevy_hash_bytes_is_deterministic_and_distinct() {
302        // KevyHash for [u8] uses the two-stream pipelined hash_bytes_pipelined
303        // path (the rustc-hash 2.x trick + our fmix64 finalize). It diverges
304        // from the legacy FxHasher::write byte absorb path — see the impl
305        // doc-comment.
306        let key = b"hello-world".as_slice();
307        // Deterministic across calls (no random seed).
308        assert_eq!(key.kevy_hash(), key.kevy_hash());
309        // Distinct from a single-bit-flipped key.
310        assert_ne!(key.kevy_hash(), b"hello-worle".as_slice().kevy_hash());
311        // Length matters (XOR-only short path otherwise wouldn't distinguish).
312        assert_ne!(b"abc".as_slice().kevy_hash(), b"abcd".as_slice().kevy_hash());
313        // The legacy FxHasher path is still available via std Hasher trait
314        // (FxHashMap users); the two no longer have to agree.
315        let mut staged = FxHasher::default();
316        staged.write(key);
317        let _fx_legacy = staged.finish();
318        // Intentionally no assert_eq! here — divergence is the point.
319    }
320
321    #[test]
322    fn kevy_hash_integer_paths_differ_per_value() {
323        let a: u64 = 1;
324        let b: u64 = 2;
325        assert_ne!(a.kevy_hash(), b.kevy_hash());
326        let i: i32 = -1;
327        let j: i32 = 1;
328        assert_ne!(i.kevy_hash(), j.kevy_hash());
329    }
330
331    #[test]
332    fn kevy_hash_top7_bits_distribute() {
333        // Same low-entropy clustering guard, but driven through `kevy_hash`
334        // on byte slices — the path kevy-map's metadata byte will use.
335        let mut top = [0u32; 128];
336        for i in 0..4096u64 {
337            let mut k = format!("key:{i}").into_bytes();
338            k.resize(12, b'x');
339            let hash = k.as_slice().kevy_hash();
340            top[(hash >> 57) as usize] += 1;
341        }
342        let max = *top.iter().max().unwrap();
343        assert!(max < 128, "top-7-bit skew {max} (mean 32) — avalanche failing");
344    }
345
346    #[test]
347    fn integer_keys_roundtrip() {
348        let mut m: FxHashMap<u64, u64> = FxHashMap::default();
349        for i in 0..1_000u64 {
350            m.insert(i, i * 2);
351        }
352        assert_eq!(m.get(&500), Some(&1_000));
353        assert_eq!(m.get(&999), Some(&1_998));
354    }
355
356    /// Guards against the raw-Fx failure mode: low-entropy sequential keys
357    /// (`"key:0xxxxx".."key:99999x"`) must spread across buckets, not pile up.
358    /// `fmix64` is what makes this pass; removing it would fail loudly.
359    #[test]
360    fn no_catastrophic_clustering_on_low_entropy_keys() {
361        let keys: Vec<Vec<u8>> = (0..4096u64)
362            .map(|i| {
363                let mut k = format!("key:{i}").into_bytes();
364                k.resize(12, b'x');
365                k
366            })
367            .collect();
368
369        // Low bits drive the bucket index; 4096 keys / 256 → mean 16/bucket.
370        let mut low = [0u32; 256];
371        // Top 7 bits drive hashbrown's SIMD control byte; / 128 → mean 32.
372        let mut top = [0u32; 128];
373        for k in &keys {
374            let hash = h(k);
375            low[(hash & 0xff) as usize] += 1;
376            top[(hash >> 57) as usize] += 1;
377        }
378        let max_low = *low.iter().max().unwrap();
379        let max_top = *top.iter().max().unwrap();
380        // Well-avalanched ⇒ no bucket exceeds ~4× the mean.
381        assert!(max_low < 64, "low-bit skew {max_low} (mean 16) — avalanche failing");
382        assert!(max_top < 128, "top-bit skew {max_top} (mean 32) — avalanche failing");
383    }
384
385    // ---- KevyHash impls for delegating types (cov for u32 / usize / Vec<u8>) -
386
387    #[test]
388    fn kevy_hash_vec_u8_agrees_with_slice() {
389        let v: Vec<u8> = b"hello-world".to_vec();
390        assert_eq!(v.kevy_hash(), v.as_slice().kevy_hash());
391    }
392
393    #[test]
394    fn kevy_hash_u32_agrees_with_widened_u64() {
395        // u32 widens through u64 → same hash as the u64 form of the same value.
396        let n: u32 = 0xCAFE_BABE;
397        assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
398        // Distinct values produce distinct hashes.
399        let m: u32 = n.wrapping_add(1);
400        assert_ne!(n.kevy_hash(), m.kevy_hash());
401    }
402
403    #[test]
404    fn kevy_hash_usize_agrees_with_u64() {
405        // usize sign-free widens through u64. Equal-valued usize ↔ u64
406        // must hash the same so a map keyed by either reads back equivalently.
407        let n: usize = 42;
408        assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
409        let m: usize = 43;
410        assert_ne!(n.kevy_hash(), m.kevy_hash());
411    }
412}