kevy_hash/
lib.rs

1//! kevy-hash — fast, well-distributed hashing for kevy's single-trust-domain
2//! keyspace. Zero dependencies.
3//!
4//! std's `HashMap` is a hashbrown Swiss table (excellent — kevy keeps it) keyed
5//! by `SipHash-1-3` (DoS-resistant, but a tax a single-threaded-per-shard
6//! keyspace facing no adversarial cross-trust key collisions does not need).
7//! This crate supplies the hasher that table should use instead: an FxHash-style
8//! word-at-a-time absorb plus a murmur3 [`fmix64`] avalanche finalizer.
9//!
10//! Measured (via `kevy-store/examples/bench_keyspace.rs`): ~4× faster
11//! hashing, ~1.2–2.8× faster GET-hit, ~1.1–1.7× faster GET-miss than
12//! SipHash, with no clustering.
13//! The finalizer is **essential** — the bare Fx absorb (no `fmix64`) clusters
14//! 30–50× on low-entropy sequential keys like `"key:0".."key:99999"`.
15//!
16//! **Not DoS-resistant.** There is no random seed, so an attacker who can choose
17//! keys *and* observe timing could force collisions. kevy's keyspace lives
18//! inside one trust domain per shard, so this is the right trade; do not reuse
19//! this hasher for maps fed untrusted, adversarially-chosen keys across a trust
20//! boundary.
21//!
22//! ```
23//! use kevy_hash::FxHashMap;
24//!
25//! let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
26//! m.insert(b"key".to_vec(), 1);
27//! assert_eq!(m.get(b"key".as_slice()), Some(&1));
28//! ```
29#![forbid(unsafe_code)]
30#![warn(missing_docs)]
31
32use std::collections::{HashMap, HashSet};
33use std::hash::{BuildHasherDefault, Hasher};
34
35mod crc16;
36pub use crc16::{crc16, key_hash_slot};
37
38/// FxHash mixing constant (rustc's `rustc-hash` seed).
39const SEED: u64 = 0x517c_c1b7_2722_0a95;
40const ROTATE: u32 = 5;
41
42/// murmur3 `fmix64` avalanche — spreads every input bit across all 64 output
43/// bits. ~6 ALU ops, applied once on [`Hasher::finish`]. This is what the bare
44/// Fx absorb lacks, and why it clusters without it.
45#[inline]
46pub fn fmix64(mut h: u64) -> u64 {
47    h ^= h >> 33;
48    h = h.wrapping_mul(0xff51_afd7_ed55_8ccd);
49    h ^= h >> 33;
50    h = h.wrapping_mul(0xc4ce_b9fe_1a85_ec53);
51    h ^= h >> 33;
52    h
53}
54
55#[inline]
56fn mix(state: u64, word: u64) -> u64 {
57    (state.rotate_left(ROTATE) ^ word).wrapping_mul(SEED)
58}
59
60/// Two-stream pipelined hash_bytes inspired by rustc-hash 2.x's design
61/// (`rustc-hash/src/lib.rs#hash_bytes`). The key trick is keeping two
62/// independent state words `s0` / `s1` updated via 64×64→128 widening
63/// multiplication (one `mul`+`mulhi` on aarch64, one `mul` on x86_64),
64/// XORing the two halves of the product to mix top with bottom. The two
65/// streams are independent of each other in the bulk loop, so LLVM can
66/// schedule them on two ALU ports per cycle.
67///
68/// Lengths ≤ 16: XOR-only absorb of two reads (start + end), then a
69/// single `multiply_mix` of the two streams. The XOR-only absorb is fast
70/// because there's no ALU dependency between the two reads.
71///
72/// Lengths > 16: per-16-byte iteration, `s1 <- multiply_mix(s0 ^ x,
73/// CONST ^ y); s0 <- s1`. The `CONST` (digits of pi) prevents the
74/// all-zeros input from collapsing.
75///
76/// Final mix: `multiply_mix(s0, s1) ^ len` — folds length in so that
77/// `"abc"` and `"ab\0c"` hash differently (the XOR-only short path
78/// doesn't distinguish length-by-position without this).
79///
80/// Then `fmix64` to give us the anti-clustering avalanche we need (the
81/// rustc-hash design assumes its consumer mixes again; we don't, so we
82/// avalanche ourselves — same property as the legacy [`FxHasher`] path).
83#[inline]
84fn hash_bytes_pipelined(bytes: &[u8]) -> u64 {
85    // Constants — digits of pi (matches rustc-hash 2.x for cross-bench
86    // sanity; the actual choice doesn't matter beyond "non-zero, not
87    // sharing structure with input distributions").
88    const S1: u64 = 0x243f_6a88_85a3_08d3;
89    const S2: u64 = 0x1319_8a2e_0370_7344;
90    const ANTI_ZERO: u64 = 0xa409_3822_299f_31d0;
91    let len = bytes.len();
92    let mut s0 = S1;
93    let mut s1 = S2;
94
95    if len <= 16 {
96        if len >= 8 {
97            // Read first 8 and last 8 (may overlap when 8 ≤ len ≤ 15).
98            s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
99            s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
100        } else if len >= 4 {
101            s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
102            s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
103        } else if len > 0 {
104            // 1-3 byte tail: form a 3-byte key (lo, mid, hi) that
105            // distinguishes "ab" from "ba" etc.
106            let lo = bytes[0];
107            let mid = bytes[len / 2];
108            let hi = bytes[len - 1];
109            s0 ^= lo as u64;
110            s1 ^= ((hi as u64) << 8) | mid as u64;
111        }
112        // len == 0 falls through with s0 == S1, s1 == S2 unchanged.
113    } else {
114        // Bulk: drop the very last byte from the bulk slice so the suffix
115        // 16 bytes can partially overlap with bulk's tail (this is what
116        // rustc-hash 2.x does; it makes the suffix path uniform).
117        let mut bulk = &bytes[..len - 1];
118        while let Some((chunk, rest)) = bulk.split_first_chunk::<16>() {
119            let x = u64::from_le_bytes(chunk[..8].try_into().unwrap());
120            let y = u64::from_le_bytes(chunk[8..].try_into().unwrap());
121            let t = multiply_mix(s0 ^ x, ANTI_ZERO ^ y);
122            s0 = s1;
123            s1 = t;
124            bulk = rest;
125        }
126        // Suffix 16 bytes (may overlap with last bulk iter).
127        let suffix = &bytes[len - 16..];
128        s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
129        s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
130    }
131
132    let folded = multiply_mix(s0, s1) ^ (len as u64);
133    fmix64(folded)
134}
135
136/// 64×64→128 widening multiply, XOR'ing the two halves of the product.
137/// Single `mul` on x86_64, one `mul`+one `mulhi` on aarch64. Mixes top and
138/// bottom of the product so the entire output fluctuates with small
139/// changes in the input.
140#[inline]
141fn multiply_mix(x: u64, y: u64) -> u64 {
142    let full = (x as u128).wrapping_mul(y as u128);
143    let lo = full as u64;
144    let hi = (full >> 64) as u64;
145    lo ^ hi
146}
147
148/// Fast, well-distributed [`Hasher`] for kevy's keyspace. Word-at-a-time absorb
149/// (FxHash-style) finished with [`fmix64`]. See the crate docs for the security
150/// trade-off.
151#[derive(Default)]
152pub struct FxHasher(u64);
153
154impl Hasher for FxHasher {
155    #[inline]
156    fn finish(&self) -> u64 {
157        fmix64(self.0)
158    }
159
160    #[inline]
161    fn write(&mut self, mut bytes: &[u8]) {
162        let mut state = self.0;
163        while bytes.len() >= 8 {
164            let word = u64::from_le_bytes(bytes[..8].try_into().unwrap());
165            state = mix(state, word);
166            bytes = &bytes[8..];
167        }
168        if bytes.len() >= 4 {
169            let word = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as u64;
170            state = mix(state, word);
171            bytes = &bytes[4..];
172        }
173        for &b in bytes {
174            state = mix(state, b as u64);
175        }
176        self.0 = state;
177    }
178
179    // Fixed-width integer keys (e.g. connection-id maps) skip the byte loop.
180    #[inline]
181    fn write_u64(&mut self, i: u64) {
182        self.0 = mix(self.0, i);
183    }
184    #[inline]
185    fn write_usize(&mut self, i: usize) {
186        self.0 = mix(self.0, i as u64);
187    }
188}
189
190/// [`BuildHasher`](std::hash::BuildHasher) for [`FxHasher`]. Seedless, so equal
191/// keys hash equally across instances and process runs.
192pub type FxBuildHasher = BuildHasherDefault<FxHasher>;
193
194/// Single-call hashing for kevy's per-command hot path.
195///
196/// `std::hash::Hasher` is a state-machine API — every hash is `Hasher::default()`
197/// → `write_*` → `finish`, with `BuildHasher` indirection on top. For
198/// `kevy-map`'s open-addressing table the keyspace is a small handful of
199/// well-known leaf types (`[u8]`, `u32`, `u64`, `i32`); we get a faster, inline-
200/// friendly hash by exposing one method on each that produces the final mixed
201/// 64-bit value in one go.
202///
203/// All impls must agree with feeding the value through [`FxHasher`] then
204/// calling `finish` — this lets us cut the trait dispatch without changing the
205/// hash function. `kevy-map` consumes both the full hash (for bucket index)
206/// and its top 7 bits (for the metadata byte).
207pub trait KevyHash {
208    /// Compute the final mixed 64-bit hash of `self` in one call.
209    fn kevy_hash(&self) -> u64;
210}
211
212impl KevyHash for [u8] {
213    /// Byte-slice hash. Uses the **two-stream pipelined** path internally
214    /// for ILP on the bench's 8-64 byte keyspace, closing the prior 1 ns
215    /// gap vs rustc-hash 2.x's `hash_bytes`. The final `fmix64` retains
216    /// the anti-clustering guarantee that the
217    /// `no_catastrophic_clustering_on_low_entropy_keys` test enforces.
218    ///
219    /// Note: the result diverges from the legacy [`FxHasher`] absorb path —
220    /// callers using `FxHashMap<Vec<u8>, _>` route through std's
221    /// `Hash::hash → Hasher::write → finish` (the legacy single-stream
222    /// path), which intentionally stays put for cross-instance hash
223    /// stability with anything that depended on the v0.polish bit pattern.
224    /// The `KevyHash for [u8]` impl is for one-call hot paths like
225    /// `kevy-map::find_by_borrow`, which is the only one we measure.
226    #[inline]
227    fn kevy_hash(&self) -> u64 {
228        hash_bytes_pipelined(self)
229    }
230}
231
232impl KevyHash for Vec<u8> {
233    #[inline]
234    fn kevy_hash(&self) -> u64 {
235        self.as_slice().kevy_hash()
236    }
237}
238
239impl KevyHash for u64 {
240    #[inline]
241    fn kevy_hash(&self) -> u64 {
242        fmix64(mix(0, *self))
243    }
244}
245
246impl KevyHash for u32 {
247    #[inline]
248    fn kevy_hash(&self) -> u64 {
249        fmix64(mix(0, *self as u64))
250    }
251}
252
253impl KevyHash for i32 {
254    #[inline]
255    fn kevy_hash(&self) -> u64 {
256        // Sign-extend to u64 so equal i32 values hash the same as if widened
257        // through the integer path; negatives' top bits still fmix64 away.
258        fmix64(mix(0, *self as i64 as u64))
259    }
260}
261
262impl KevyHash for usize {
263    #[inline]
264    fn kevy_hash(&self) -> u64 {
265        fmix64(mix(0, *self as u64))
266    }
267}
268
269/// A [`HashMap`] using [`FxHasher`] instead of SipHash.
270pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
271
272/// A [`HashSet`] using [`FxHasher`] instead of SipHash.
273pub type FxHashSet<T> = HashSet<T, FxBuildHasher>;
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use std::hash::BuildHasher;
279
280    fn h(bytes: &[u8]) -> u64 {
281        FxBuildHasher::default().hash_one(bytes)
282    }
283
284    #[test]
285    fn deterministic_across_instances() {
286        assert_eq!(h(b"hello"), h(b"hello"));
287        assert_ne!(h(b"hello"), h(b"hellp"));
288        assert_ne!(h(b""), h(b"\0"));
289    }
290
291    #[test]
292    fn map_roundtrip() {
293        let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
294        for i in 0..10_000u64 {
295            m.insert(format!("key:{i}").into_bytes(), i);
296        }
297        assert_eq!(m.len(), 10_000);
298        for i in 0..10_000u64 {
299            assert_eq!(m.get(format!("key:{i}").into_bytes().as_slice()), Some(&i));
300        }
301    }
302
303    #[test]
304    fn kevy_hash_bytes_is_deterministic_and_distinct() {
305        // KevyHash for [u8] uses the two-stream pipelined hash_bytes_pipelined
306        // path (the rustc-hash 2.x trick + our fmix64 finalize). It diverges
307        // from the legacy FxHasher::write byte absorb path — see the impl
308        // doc-comment.
309        let key = b"hello-world".as_slice();
310        // Deterministic across calls (no random seed).
311        assert_eq!(key.kevy_hash(), key.kevy_hash());
312        // Distinct from a single-bit-flipped key.
313        assert_ne!(key.kevy_hash(), b"hello-worle".as_slice().kevy_hash());
314        // Length matters (XOR-only short path otherwise wouldn't distinguish).
315        assert_ne!(b"abc".as_slice().kevy_hash(), b"abcd".as_slice().kevy_hash());
316        // The legacy FxHasher path is still available via std Hasher trait
317        // (FxHashMap users); the two no longer have to agree.
318        let mut staged = FxHasher::default();
319        staged.write(key);
320        let _fx_legacy = staged.finish();
321        // Intentionally no assert_eq! here — divergence is the point.
322    }
323
324    #[test]
325    fn kevy_hash_integer_paths_differ_per_value() {
326        let a: u64 = 1;
327        let b: u64 = 2;
328        assert_ne!(a.kevy_hash(), b.kevy_hash());
329        let i: i32 = -1;
330        let j: i32 = 1;
331        assert_ne!(i.kevy_hash(), j.kevy_hash());
332    }
333
334    #[test]
335    fn kevy_hash_top7_bits_distribute() {
336        // Same low-entropy clustering guard, but driven through `kevy_hash`
337        // on byte slices — the path kevy-map's metadata byte will use.
338        let mut top = [0u32; 128];
339        for i in 0..4096u64 {
340            let mut k = format!("key:{i}").into_bytes();
341            k.resize(12, b'x');
342            let hash = k.as_slice().kevy_hash();
343            top[(hash >> 57) as usize] += 1;
344        }
345        let max = *top.iter().max().unwrap();
346        assert!(max < 128, "top-7-bit skew {max} (mean 32) — avalanche failing");
347    }
348
349    #[test]
350    fn integer_keys_roundtrip() {
351        let mut m: FxHashMap<u64, u64> = FxHashMap::default();
352        for i in 0..1_000u64 {
353            m.insert(i, i * 2);
354        }
355        assert_eq!(m.get(&500), Some(&1_000));
356        assert_eq!(m.get(&999), Some(&1_998));
357    }
358
359    /// Guards against the raw-Fx failure mode: low-entropy sequential keys
360    /// (`"key:0xxxxx".."key:99999x"`) must spread across buckets, not pile up.
361    /// `fmix64` is what makes this pass; removing it would fail loudly.
362    #[test]
363    fn no_catastrophic_clustering_on_low_entropy_keys() {
364        let keys: Vec<Vec<u8>> = (0..4096u64)
365            .map(|i| {
366                let mut k = format!("key:{i}").into_bytes();
367                k.resize(12, b'x');
368                k
369            })
370            .collect();
371
372        // Low bits drive the bucket index; 4096 keys / 256 → mean 16/bucket.
373        let mut low = [0u32; 256];
374        // Top 7 bits drive hashbrown's SIMD control byte; / 128 → mean 32.
375        let mut top = [0u32; 128];
376        for k in &keys {
377            let hash = h(k);
378            low[(hash & 0xff) as usize] += 1;
379            top[(hash >> 57) as usize] += 1;
380        }
381        let max_low = *low.iter().max().unwrap();
382        let max_top = *top.iter().max().unwrap();
383        // Well-avalanched ⇒ no bucket exceeds ~4× the mean.
384        assert!(max_low < 64, "low-bit skew {max_low} (mean 16) — avalanche failing");
385        assert!(max_top < 128, "top-bit skew {max_top} (mean 32) — avalanche failing");
386    }
387
388    // ---- KevyHash impls for delegating types (cov for u32 / usize / Vec<u8>) -
389
390    #[test]
391    fn kevy_hash_vec_u8_agrees_with_slice() {
392        let v: Vec<u8> = b"hello-world".to_vec();
393        assert_eq!(v.kevy_hash(), v.as_slice().kevy_hash());
394    }
395
396    #[test]
397    fn kevy_hash_u32_agrees_with_widened_u64() {
398        // u32 widens through u64 → same hash as the u64 form of the same value.
399        let n: u32 = 0xCAFE_BABE;
400        assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
401        // Distinct values produce distinct hashes.
402        let m: u32 = n.wrapping_add(1);
403        assert_ne!(n.kevy_hash(), m.kevy_hash());
404    }
405
406    #[test]
407    fn kevy_hash_usize_agrees_with_u64() {
408        // usize sign-free widens through u64. Equal-valued usize ↔ u64
409        // must hash the same so a map keyed by either reads back equivalently.
410        let n: usize = 42;
411        assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
412        let m: usize = 43;
413        assert_ne!(n.kevy_hash(), m.kevy_hash());
414    }
415}
kevy_hash/lib.rs

kevy_hash/
lib.rs