dynomite/hashkit/mod.rs
1//! Hashing primitives, token arithmetic, and ring distributions.
2//!
3//! The [`HashType`] enum lists every hashing algorithm offered by the
4//! engine. [`hash`] dispatches a key to the chosen algorithm and yields a
5//! [`token::DynToken`] that can be compared and used as a key in a
6//! continuum.
7//!
8//! # Examples
9//!
10//! ```
11//! use dynomite::hashkit::{hash, HashType};
12//!
13//! let token = hash(HashType::Murmur3, b"dynomite");
14//! assert_eq!(token.len(), 4);
15//! ```
16
17// Hashing primitives reinterpret bytes across signed/unsigned
18// boundaries and truncate by design; the casts are part of the
19// algorithmic contract and isolated to this subtree.
20#![allow(
21 clippy::cast_possible_truncation,
22 clippy::cast_possible_wrap,
23 clippy::cast_sign_loss
24)]
25
26mod crc16;
27mod crc32;
28mod fnv;
29mod hsieh;
30mod jenkins;
31mod md5;
32mod murmur;
33mod murmur3;
34mod one_at_a_time;
35mod random;
36
37pub mod ketama;
38pub mod modula;
39pub mod random_slicing;
40pub mod token;
41
42pub use crate::hashkit::murmur3::murmur3_x64_64;
43pub use crate::hashkit::random::PseudoRng;
44pub use crate::hashkit::random_slicing::{RandomSlices, RandomSlicesError};
45pub use crate::hashkit::token::DynToken;
46
47/// All hashing algorithms supported by the engine.
48///
49/// Variant order mirrors the on-disk `hash_type_t` integer codec:
50/// configuration files and the `dyn-hash-tool` CLI rely on the integer
51/// ordering of these variants when they round-trip through other
52/// systems.
53///
54/// # Examples
55///
56/// ```
57/// use dynomite::hashkit::HashType;
58/// assert_eq!(HashType::all().len(), 14);
59/// assert_eq!(HashType::Murmur3.as_str(), "murmur3");
60/// ```
61#[allow(non_camel_case_types)]
62#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
63pub enum HashType {
64 /// Jenkins one-at-a-time.
65 OneAtATime,
66 /// MD5; truncates the digest to its low 32 bits.
67 Md5,
68 /// CRC-16 (CCITT polynomial table).
69 Crc16,
70 /// libmemcached-compatible CRC-32.
71 Crc32,
72 /// Standards-compliant CRC-32.
73 Crc32a,
74 /// FNV-1 over 64-bit accumulator, truncated to 32 bits.
75 Fnv1_64,
76 /// FNV-1a operating in 32-bit space using the 64-bit prime.
77 Fnv1a_64,
78 /// FNV-1 in 32 bits.
79 Fnv1_32,
80 /// FNV-1a in 32 bits.
81 Fnv1a_32,
82 /// Paul Hsieh's "SuperFastHash".
83 Hsieh,
84 /// MurmurHash 2.
85 Murmur,
86 /// Bob Jenkins lookup3.
87 Jenkins,
88 /// MurmurHash3 x86 128-bit.
89 Murmur3,
90 /// MurmurHash3 truncated to 64 bits (used by the
91 /// random-slicing distribution lookup).
92 #[allow(non_camel_case_types)]
93 Murmur3X64_64,
94}
95
96impl HashType {
97 /// Lower-case algorithm name as it appears in YAML configurations.
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// use dynomite::hashkit::HashType;
103 /// assert_eq!(HashType::Murmur3.as_str(), "murmur3");
104 /// ```
105 #[must_use]
106 pub const fn as_str(self) -> &'static str {
107 match self {
108 HashType::OneAtATime => "one_at_a_time",
109 HashType::Md5 => "md5",
110 HashType::Crc16 => "crc16",
111 HashType::Crc32 => "crc32",
112 HashType::Crc32a => "crc32a",
113 HashType::Fnv1_64 => "fnv1_64",
114 HashType::Fnv1a_64 => "fnv1a_64",
115 HashType::Fnv1_32 => "fnv1_32",
116 HashType::Fnv1a_32 => "fnv1a_32",
117 HashType::Hsieh => "hsieh",
118 HashType::Murmur => "murmur",
119 HashType::Jenkins => "jenkins",
120 HashType::Murmur3 => "murmur3",
121 HashType::Murmur3X64_64 => "murmur3_x64_64",
122 }
123 }
124
125 /// All variants, in declaration order.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// use dynomite::hashkit::HashType;
131 /// assert!(HashType::all().contains(&HashType::Murmur3));
132 /// ```
133 #[must_use]
134 pub const fn all() -> &'static [HashType] {
135 &[
136 HashType::OneAtATime,
137 HashType::Md5,
138 HashType::Crc16,
139 HashType::Crc32,
140 HashType::Crc32a,
141 HashType::Fnv1_64,
142 HashType::Fnv1a_64,
143 HashType::Fnv1_32,
144 HashType::Fnv1a_32,
145 HashType::Hsieh,
146 HashType::Murmur,
147 HashType::Jenkins,
148 HashType::Murmur3,
149 HashType::Murmur3X64_64,
150 ]
151 }
152
153 /// Parse a YAML-style algorithm name. Unknown names yield `None`.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// use dynomite::hashkit::HashType;
159 /// assert_eq!(HashType::from_name("murmur3"), Some(HashType::Murmur3));
160 /// assert_eq!(HashType::from_name("nope"), None);
161 /// ```
162 #[must_use]
163 pub fn from_name(name: &str) -> Option<HashType> {
164 Self::all().iter().copied().find(|h| h.as_str() == name)
165 }
166}
167
168/// Hash `key` with the requested algorithm and return the resulting token.
169///
170/// The returned token has `len == 1` for every 32-bit algorithm and
171/// `len == 4` for `Murmur3`. The contract is identical to the C
172/// `hash_func_t` it replaces: dispatch is total over [`HashType::all`].
173///
174/// # Examples
175///
176/// ```
177/// use dynomite::hashkit::{hash, HashType};
178///
179/// let t1 = hash(HashType::Crc32a, b"abc");
180/// let t2 = hash(HashType::Crc32a, b"abc");
181/// assert_eq!(t1, t2);
182/// ```
183#[must_use]
184pub fn hash(ty: HashType, key: &[u8]) -> DynToken {
185 match ty {
186 HashType::OneAtATime => one_at_a_time::hash(key),
187 HashType::Md5 => md5::hash(key),
188 HashType::Crc16 => crc16::hash(key),
189 HashType::Crc32 => crc32::hash_libmemcached(key),
190 HashType::Crc32a => crc32::hash_standard(key),
191 HashType::Fnv1_64 => fnv::hash_fnv1_64(key),
192 HashType::Fnv1a_64 => fnv::hash_fnv1a_64(key),
193 HashType::Fnv1_32 => fnv::hash_fnv1_32(key),
194 HashType::Fnv1a_32 => fnv::hash_fnv1a_32(key),
195 HashType::Hsieh => hsieh::hash(key),
196 HashType::Murmur => murmur::hash(key),
197 HashType::Jenkins => jenkins::hash(key),
198 HashType::Murmur3 => murmur3::hash(key),
199 HashType::Murmur3X64_64 => {
200 // Pack the 64-bit fingerprint into the low two
201 // words of a `DynToken` so callers that compare
202 // against the existing 32-bit ring math observe the
203 // canonical low-half ordering. The random-slicing
204 // dispatcher does not consume tokens directly; it
205 // calls [`hash64`] for its u64 ring lookup.
206 let h = murmur3::murmur3_x64_64(0xc0a1_e5ce, key);
207 let mut token = DynToken::default();
208 token.size(2).expect("len 2 fits");
209 let mag = token.mag_mut();
210 mag[0] = h as u32;
211 mag[1] = (h >> 32) as u32;
212 token
213 }
214 }
215}
216
217/// Hash `key` and return a 64-bit value used by the
218/// random-slicing distribution table.
219///
220/// Every algorithm has a deterministic 64-bit projection: the
221/// 64-bit and 128-bit families return their natural width
222/// (truncated to 64 bits where applicable), and the legacy
223/// 32-bit families lift their result into the low half of the
224/// returned `u64`. The function is total over
225/// [`HashType::all`].
226///
227/// # Examples
228///
229/// ```
230/// use dynomite::hashkit::{hash64, HashType};
231///
232/// let a = hash64(HashType::Murmur3X64_64, b"alice");
233/// let b = hash64(HashType::Murmur3X64_64, b"alice");
234/// assert_eq!(a, b);
235/// ```
236#[must_use]
237pub fn hash64(ty: HashType, key: &[u8]) -> u64 {
238 if matches!(ty, HashType::Murmur3X64_64) {
239 return murmur3::murmur3_x64_64(0xc0a1_e5ce, key);
240 }
241 let token = hash(ty, key);
242 let mag = token.mag();
243 let lo = u64::from(mag[0]);
244 let hi = if mag.len() > 1 { u64::from(mag[1]) } else { 0 };
245 (hi << 32) | lo
246}
247
248/// Compute the raw 128-byte MD5 digest of `key`.
249///
250/// Exposed so the `ketama` continuum can build per-server points using the
251/// same digest layout as the original implementation.
252///
253/// # Examples
254///
255/// ```
256/// use dynomite::hashkit::md5_signature;
257/// // The empty string has a known MD5 digest.
258/// let d = md5_signature(b"");
259/// assert_eq!(d[0], 0xd4);
260/// assert_eq!(d.len(), 16);
261/// ```
262#[must_use]
263pub fn md5_signature(key: &[u8]) -> [u8; 16] {
264 md5::digest(key)
265}
266
267/// CRC-32 over a buffer, lower-cased before mixing.
268///
269/// Each byte is forced to lower case before being fed to the table; the
270/// helper is used by the entropy reconciliation path.
271///
272/// # Examples
273///
274/// ```
275/// use dynomite::hashkit::crc32_sz;
276/// // Lower-casing means "ABC" and "abc" produce the same digest.
277/// assert_eq!(crc32_sz(b"ABC", 0), crc32_sz(b"abc", 0));
278/// ```
279#[must_use]
280pub fn crc32_sz(buf: &[u8], in_crc32: u32) -> u32 {
281 crc32::crc32_sz(buf, in_crc32)
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 #[test]
289 fn names_round_trip() {
290 for ty in HashType::all().iter().copied() {
291 assert_eq!(HashType::from_name(ty.as_str()), Some(ty));
292 }
293 }
294
295 #[test]
296 fn dispatch_is_deterministic() {
297 let key = b"the quick brown fox";
298 for ty in HashType::all().iter().copied() {
299 let a = hash(ty, key);
300 let b = hash(ty, key);
301 assert_eq!(a, b, "algorithm {ty:?} not deterministic");
302 }
303 }
304
305 #[test]
306 fn dispatch_lengths_match_c_codec() {
307 for ty in HashType::all().iter().copied() {
308 let token = hash(ty, b"k");
309 let expected = match ty {
310 HashType::Murmur3 => 4,
311 HashType::Murmur3X64_64 => 2,
312 _ => 1,
313 };
314 assert_eq!(
315 token.len(),
316 expected,
317 "wrong token length for {ty:?}: got {}",
318 token.len()
319 );
320 }
321 }
322}