kevy_hash/lib.rs
1//! kevy-hash — fast, well-distributed hashing for kevy's single-trust-domain
2//! keyspace. Zero dependencies.
3//!
4//! std's `HashMap` is a hashbrown Swiss table (excellent — kevy keeps it) keyed
5//! by `SipHash-1-3` (DoS-resistant, but a tax a single-threaded-per-shard
6//! keyspace facing no adversarial cross-trust key collisions does not need).
7//! This crate supplies the hasher that table should use instead: an FxHash-style
8//! word-at-a-time absorb plus a murmur3 [`fmix64`] avalanche finalizer.
9//!
10//! Measured (via `kevy-store/examples/bench_keyspace.rs`): ~4× faster
11//! hashing, ~1.2–2.8× faster GET-hit, ~1.1–1.7× faster GET-miss than
12//! SipHash, with no clustering.
13//! The finalizer is **essential** — the bare Fx absorb (no `fmix64`) clusters
14//! 30–50× on low-entropy sequential keys like `"key:0".."key:99999"`.
15//!
16//! **Not DoS-resistant.** There is no random seed, so an attacker who can choose
17//! keys *and* observe timing could force collisions. kevy's keyspace lives
18//! inside one trust domain per shard, so this is the right trade; do not reuse
19//! this hasher for maps fed untrusted, adversarially-chosen keys across a trust
20//! boundary.
21//!
22//! ```
23//! use kevy_hash::FxHashMap;
24//!
25//! let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
26//! m.insert(b"key".to_vec(), 1);
27//! assert_eq!(m.get(b"key".as_slice()), Some(&1));
28//! ```
29#![forbid(unsafe_code)]
30#![warn(missing_docs)]
31
32use std::collections::{HashMap, HashSet};
33use std::hash::{BuildHasherDefault, Hasher};
34
35/// FxHash mixing constant (rustc's `rustc-hash` seed).
36const SEED: u64 = 0x517c_c1b7_2722_0a95;
37const ROTATE: u32 = 5;
38
39/// murmur3 `fmix64` avalanche — spreads every input bit across all 64 output
40/// bits. ~6 ALU ops, applied once on [`Hasher::finish`]. This is what the bare
41/// Fx absorb lacks, and why it clusters without it.
42#[inline]
43pub fn fmix64(mut h: u64) -> u64 {
44 h ^= h >> 33;
45 h = h.wrapping_mul(0xff51_afd7_ed55_8ccd);
46 h ^= h >> 33;
47 h = h.wrapping_mul(0xc4ce_b9fe_1a85_ec53);
48 h ^= h >> 33;
49 h
50}
51
52#[inline]
53fn mix(state: u64, word: u64) -> u64 {
54 (state.rotate_left(ROTATE) ^ word).wrapping_mul(SEED)
55}
56
57/// Two-stream pipelined hash_bytes inspired by rustc-hash 2.x's design
58/// (`rustc-hash/src/lib.rs#hash_bytes`). The key trick is keeping two
59/// independent state words `s0` / `s1` updated via 64×64→128 widening
60/// multiplication (one `mul`+`mulhi` on aarch64, one `mul` on x86_64),
61/// XORing the two halves of the product to mix top with bottom. The two
62/// streams are independent of each other in the bulk loop, so LLVM can
63/// schedule them on two ALU ports per cycle.
64///
65/// Lengths ≤ 16: XOR-only absorb of two reads (start + end), then a
66/// single `multiply_mix` of the two streams. The XOR-only absorb is fast
67/// because there's no ALU dependency between the two reads.
68///
69/// Lengths > 16: per-16-byte iteration, `s1 <- multiply_mix(s0 ^ x,
70/// CONST ^ y); s0 <- s1`. The `CONST` (digits of pi) prevents the
71/// all-zeros input from collapsing.
72///
73/// Final mix: `multiply_mix(s0, s1) ^ len` — folds length in so that
74/// `"abc"` and `"ab\0c"` hash differently (the XOR-only short path
75/// doesn't distinguish length-by-position without this).
76///
77/// Then `fmix64` to give us the anti-clustering avalanche we need (the
78/// rustc-hash design assumes its consumer mixes again; we don't, so we
79/// avalanche ourselves — same property as the legacy [`FxHasher`] path).
80#[inline]
81fn hash_bytes_pipelined(bytes: &[u8]) -> u64 {
82 // Constants — digits of pi (matches rustc-hash 2.x for cross-bench
83 // sanity; the actual choice doesn't matter beyond "non-zero, not
84 // sharing structure with input distributions").
85 const S1: u64 = 0x243f_6a88_85a3_08d3;
86 const S2: u64 = 0x1319_8a2e_0370_7344;
87 const ANTI_ZERO: u64 = 0xa409_3822_299f_31d0;
88 let len = bytes.len();
89 let mut s0 = S1;
90 let mut s1 = S2;
91
92 if len <= 16 {
93 if len >= 8 {
94 // Read first 8 and last 8 (may overlap when 8 ≤ len ≤ 15).
95 s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
96 s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
97 } else if len >= 4 {
98 s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
99 s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
100 } else if len > 0 {
101 // 1-3 byte tail: form a 3-byte key (lo, mid, hi) that
102 // distinguishes "ab" from "ba" etc.
103 let lo = bytes[0];
104 let mid = bytes[len / 2];
105 let hi = bytes[len - 1];
106 s0 ^= lo as u64;
107 s1 ^= ((hi as u64) << 8) | mid as u64;
108 }
109 // len == 0 falls through with s0 == S1, s1 == S2 unchanged.
110 } else {
111 // Bulk: drop the very last byte from the bulk slice so the suffix
112 // 16 bytes can partially overlap with bulk's tail (this is what
113 // rustc-hash 2.x does; it makes the suffix path uniform).
114 let mut bulk = &bytes[..len - 1];
115 while let Some((chunk, rest)) = bulk.split_first_chunk::<16>() {
116 let x = u64::from_le_bytes(chunk[..8].try_into().unwrap());
117 let y = u64::from_le_bytes(chunk[8..].try_into().unwrap());
118 let t = multiply_mix(s0 ^ x, ANTI_ZERO ^ y);
119 s0 = s1;
120 s1 = t;
121 bulk = rest;
122 }
123 // Suffix 16 bytes (may overlap with last bulk iter).
124 let suffix = &bytes[len - 16..];
125 s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
126 s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
127 }
128
129 let folded = multiply_mix(s0, s1) ^ (len as u64);
130 fmix64(folded)
131}
132
133/// 64×64→128 widening multiply, XOR'ing the two halves of the product.
134/// Single `mul` on x86_64, one `mul`+one `mulhi` on aarch64. Mixes top and
135/// bottom of the product so the entire output fluctuates with small
136/// changes in the input.
137#[inline]
138fn multiply_mix(x: u64, y: u64) -> u64 {
139 let full = (x as u128).wrapping_mul(y as u128);
140 let lo = full as u64;
141 let hi = (full >> 64) as u64;
142 lo ^ hi
143}
144
145/// Fast, well-distributed [`Hasher`] for kevy's keyspace. Word-at-a-time absorb
146/// (FxHash-style) finished with [`fmix64`]. See the crate docs for the security
147/// trade-off.
148#[derive(Default)]
149pub struct FxHasher(u64);
150
151impl Hasher for FxHasher {
152 #[inline]
153 fn finish(&self) -> u64 {
154 fmix64(self.0)
155 }
156
157 #[inline]
158 fn write(&mut self, mut bytes: &[u8]) {
159 let mut state = self.0;
160 while bytes.len() >= 8 {
161 let word = u64::from_le_bytes(bytes[..8].try_into().unwrap());
162 state = mix(state, word);
163 bytes = &bytes[8..];
164 }
165 if bytes.len() >= 4 {
166 let word = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as u64;
167 state = mix(state, word);
168 bytes = &bytes[4..];
169 }
170 for &b in bytes {
171 state = mix(state, b as u64);
172 }
173 self.0 = state;
174 }
175
176 // Fixed-width integer keys (e.g. connection-id maps) skip the byte loop.
177 #[inline]
178 fn write_u64(&mut self, i: u64) {
179 self.0 = mix(self.0, i);
180 }
181 #[inline]
182 fn write_usize(&mut self, i: usize) {
183 self.0 = mix(self.0, i as u64);
184 }
185}
186
187/// [`BuildHasher`](std::hash::BuildHasher) for [`FxHasher`]. Seedless, so equal
188/// keys hash equally across instances and process runs.
189pub type FxBuildHasher = BuildHasherDefault<FxHasher>;
190
191/// Single-call hashing for kevy's per-command hot path.
192///
193/// `std::hash::Hasher` is a state-machine API — every hash is `Hasher::default()`
194/// → `write_*` → `finish`, with `BuildHasher` indirection on top. For
195/// `kevy-map`'s open-addressing table the keyspace is a small handful of
196/// well-known leaf types (`[u8]`, `u32`, `u64`, `i32`); we get a faster, inline-
197/// friendly hash by exposing one method on each that produces the final mixed
198/// 64-bit value in one go.
199///
200/// All impls must agree with feeding the value through [`FxHasher`] then
201/// calling `finish` — this lets us cut the trait dispatch without changing the
202/// hash function. `kevy-map` consumes both the full hash (for bucket index)
203/// and its top 7 bits (for the metadata byte).
204pub trait KevyHash {
205 /// Compute the final mixed 64-bit hash of `self` in one call.
206 fn kevy_hash(&self) -> u64;
207}
208
209impl KevyHash for [u8] {
210 /// Byte-slice hash. Uses the **two-stream pipelined** path internally
211 /// for ILP on the bench's 8-64 byte keyspace, closing the prior 1 ns
212 /// gap vs rustc-hash 2.x's `hash_bytes`. The final `fmix64` retains
213 /// the anti-clustering guarantee that the
214 /// `no_catastrophic_clustering_on_low_entropy_keys` test enforces.
215 ///
216 /// Note: the result diverges from the legacy [`FxHasher`] absorb path —
217 /// callers using `FxHashMap<Vec<u8>, _>` route through std's
218 /// `Hash::hash → Hasher::write → finish` (the legacy single-stream
219 /// path), which intentionally stays put for cross-instance hash
220 /// stability with anything that depended on the v0.polish bit pattern.
221 /// The `KevyHash for [u8]` impl is for one-call hot paths like
222 /// `kevy-map::find_by_borrow`, which is the only one we measure.
223 #[inline]
224 fn kevy_hash(&self) -> u64 {
225 hash_bytes_pipelined(self)
226 }
227}
228
229impl KevyHash for Vec<u8> {
230 #[inline]
231 fn kevy_hash(&self) -> u64 {
232 self.as_slice().kevy_hash()
233 }
234}
235
236impl KevyHash for u64 {
237 #[inline]
238 fn kevy_hash(&self) -> u64 {
239 fmix64(mix(0, *self))
240 }
241}
242
243impl KevyHash for u32 {
244 #[inline]
245 fn kevy_hash(&self) -> u64 {
246 fmix64(mix(0, *self as u64))
247 }
248}
249
250impl KevyHash for i32 {
251 #[inline]
252 fn kevy_hash(&self) -> u64 {
253 // Sign-extend to u64 so equal i32 values hash the same as if widened
254 // through the integer path; negatives' top bits still fmix64 away.
255 fmix64(mix(0, *self as i64 as u64))
256 }
257}
258
259impl KevyHash for usize {
260 #[inline]
261 fn kevy_hash(&self) -> u64 {
262 fmix64(mix(0, *self as u64))
263 }
264}
265
266/// A [`HashMap`] using [`FxHasher`] instead of SipHash.
267pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
268
269/// A [`HashSet`] using [`FxHasher`] instead of SipHash.
270pub type FxHashSet<T> = HashSet<T, FxBuildHasher>;
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275 use std::hash::BuildHasher;
276
277 fn h(bytes: &[u8]) -> u64 {
278 FxBuildHasher::default().hash_one(bytes)
279 }
280
281 #[test]
282 fn deterministic_across_instances() {
283 assert_eq!(h(b"hello"), h(b"hello"));
284 assert_ne!(h(b"hello"), h(b"hellp"));
285 assert_ne!(h(b""), h(b"\0"));
286 }
287
288 #[test]
289 fn map_roundtrip() {
290 let mut m: FxHashMap<Vec<u8>, u64> = FxHashMap::default();
291 for i in 0..10_000u64 {
292 m.insert(format!("key:{i}").into_bytes(), i);
293 }
294 assert_eq!(m.len(), 10_000);
295 for i in 0..10_000u64 {
296 assert_eq!(m.get(format!("key:{i}").into_bytes().as_slice()), Some(&i));
297 }
298 }
299
300 #[test]
301 fn kevy_hash_bytes_is_deterministic_and_distinct() {
302 // KevyHash for [u8] uses the two-stream pipelined hash_bytes_pipelined
303 // path (the rustc-hash 2.x trick + our fmix64 finalize). It diverges
304 // from the legacy FxHasher::write byte absorb path — see the impl
305 // doc-comment.
306 let key = b"hello-world".as_slice();
307 // Deterministic across calls (no random seed).
308 assert_eq!(key.kevy_hash(), key.kevy_hash());
309 // Distinct from a single-bit-flipped key.
310 assert_ne!(key.kevy_hash(), b"hello-worle".as_slice().kevy_hash());
311 // Length matters (XOR-only short path otherwise wouldn't distinguish).
312 assert_ne!(b"abc".as_slice().kevy_hash(), b"abcd".as_slice().kevy_hash());
313 // The legacy FxHasher path is still available via std Hasher trait
314 // (FxHashMap users); the two no longer have to agree.
315 let mut staged = FxHasher::default();
316 staged.write(key);
317 let _fx_legacy = staged.finish();
318 // Intentionally no assert_eq! here — divergence is the point.
319 }
320
321 #[test]
322 fn kevy_hash_integer_paths_differ_per_value() {
323 let a: u64 = 1;
324 let b: u64 = 2;
325 assert_ne!(a.kevy_hash(), b.kevy_hash());
326 let i: i32 = -1;
327 let j: i32 = 1;
328 assert_ne!(i.kevy_hash(), j.kevy_hash());
329 }
330
331 #[test]
332 fn kevy_hash_top7_bits_distribute() {
333 // Same low-entropy clustering guard, but driven through `kevy_hash`
334 // on byte slices — the path kevy-map's metadata byte will use.
335 let mut top = [0u32; 128];
336 for i in 0..4096u64 {
337 let mut k = format!("key:{i}").into_bytes();
338 k.resize(12, b'x');
339 let hash = k.as_slice().kevy_hash();
340 top[(hash >> 57) as usize] += 1;
341 }
342 let max = *top.iter().max().unwrap();
343 assert!(max < 128, "top-7-bit skew {max} (mean 32) — avalanche failing");
344 }
345
346 #[test]
347 fn integer_keys_roundtrip() {
348 let mut m: FxHashMap<u64, u64> = FxHashMap::default();
349 for i in 0..1_000u64 {
350 m.insert(i, i * 2);
351 }
352 assert_eq!(m.get(&500), Some(&1_000));
353 assert_eq!(m.get(&999), Some(&1_998));
354 }
355
356 /// Guards against the raw-Fx failure mode: low-entropy sequential keys
357 /// (`"key:0xxxxx".."key:99999x"`) must spread across buckets, not pile up.
358 /// `fmix64` is what makes this pass; removing it would fail loudly.
359 #[test]
360 fn no_catastrophic_clustering_on_low_entropy_keys() {
361 let keys: Vec<Vec<u8>> = (0..4096u64)
362 .map(|i| {
363 let mut k = format!("key:{i}").into_bytes();
364 k.resize(12, b'x');
365 k
366 })
367 .collect();
368
369 // Low bits drive the bucket index; 4096 keys / 256 → mean 16/bucket.
370 let mut low = [0u32; 256];
371 // Top 7 bits drive hashbrown's SIMD control byte; / 128 → mean 32.
372 let mut top = [0u32; 128];
373 for k in &keys {
374 let hash = h(k);
375 low[(hash & 0xff) as usize] += 1;
376 top[(hash >> 57) as usize] += 1;
377 }
378 let max_low = *low.iter().max().unwrap();
379 let max_top = *top.iter().max().unwrap();
380 // Well-avalanched ⇒ no bucket exceeds ~4× the mean.
381 assert!(max_low < 64, "low-bit skew {max_low} (mean 16) — avalanche failing");
382 assert!(max_top < 128, "top-bit skew {max_top} (mean 32) — avalanche failing");
383 }
384
385 // ---- KevyHash impls for delegating types (cov for u32 / usize / Vec<u8>) -
386
387 #[test]
388 fn kevy_hash_vec_u8_agrees_with_slice() {
389 let v: Vec<u8> = b"hello-world".to_vec();
390 assert_eq!(v.kevy_hash(), v.as_slice().kevy_hash());
391 }
392
393 #[test]
394 fn kevy_hash_u32_agrees_with_widened_u64() {
395 // u32 widens through u64 → same hash as the u64 form of the same value.
396 let n: u32 = 0xCAFE_BABE;
397 assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
398 // Distinct values produce distinct hashes.
399 let m: u32 = n.wrapping_add(1);
400 assert_ne!(n.kevy_hash(), m.kevy_hash());
401 }
402
403 #[test]
404 fn kevy_hash_usize_agrees_with_u64() {
405 // usize sign-free widens through u64. Equal-valued usize ↔ u64
406 // must hash the same so a map keyed by either reads back equivalently.
407 let n: usize = 42;
408 assert_eq!(n.kevy_hash(), (n as u64).kevy_hash());
409 let m: usize = 43;
410 assert_ne!(n.kevy_hash(), m.kevy_hash());
411 }
412}