Skip to main content

dynomite/hashkit/
mod.rs

1//! Hashing primitives, token arithmetic, and ring distributions.
2//!
3//! The [`HashType`] enum lists every hashing algorithm offered by the
4//! engine. [`hash`] dispatches a key to the chosen algorithm and yields a
5//! [`token::DynToken`] that can be compared and used as a key in a
6//! continuum.
7//!
8//! # Examples
9//!
10//! ```
11//! use dynomite::hashkit::{hash, HashType};
12//!
13//! let token = hash(HashType::Murmur3, b"dynomite");
14//! assert_eq!(token.len(), 4);
15//! ```
16
17// Hashing primitives reinterpret bytes across signed/unsigned
18// boundaries and truncate by design; the casts are part of the
19// algorithmic contract and isolated to this subtree.
20#![allow(
21    clippy::cast_possible_truncation,
22    clippy::cast_possible_wrap,
23    clippy::cast_sign_loss
24)]
25
26mod crc16;
27mod crc32;
28mod fnv;
29mod hsieh;
30mod jenkins;
31mod md5;
32mod murmur;
33mod murmur3;
34mod one_at_a_time;
35mod random;
36
37pub mod ketama;
38pub mod modula;
39pub mod random_slicing;
40pub mod token;
41
42pub use crate::hashkit::murmur3::murmur3_x64_64;
43pub use crate::hashkit::random::PseudoRng;
44pub use crate::hashkit::random_slicing::{RandomSlices, RandomSlicesError};
45pub use crate::hashkit::token::DynToken;
46
47/// All hashing algorithms supported by the engine.
48///
49/// Variant order mirrors the on-disk `hash_type_t` integer codec:
50/// configuration files and the `dyn-hash-tool` CLI rely on the integer
51/// ordering of these variants when they round-trip through other
52/// systems.
53///
54/// # Examples
55///
56/// ```
57/// use dynomite::hashkit::HashType;
58/// assert_eq!(HashType::all().len(), 14);
59/// assert_eq!(HashType::Murmur3.as_str(), "murmur3");
60/// ```
61#[allow(non_camel_case_types)]
62#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
63pub enum HashType {
64    /// Jenkins one-at-a-time.
65    OneAtATime,
66    /// MD5; truncates the digest to its low 32 bits.
67    Md5,
68    /// CRC-16 (CCITT polynomial table).
69    Crc16,
70    /// libmemcached-compatible CRC-32.
71    Crc32,
72    /// Standards-compliant CRC-32.
73    Crc32a,
74    /// FNV-1 over 64-bit accumulator, truncated to 32 bits.
75    Fnv1_64,
76    /// FNV-1a operating in 32-bit space using the 64-bit prime.
77    Fnv1a_64,
78    /// FNV-1 in 32 bits.
79    Fnv1_32,
80    /// FNV-1a in 32 bits.
81    Fnv1a_32,
82    /// Paul Hsieh's "SuperFastHash".
83    Hsieh,
84    /// MurmurHash 2.
85    Murmur,
86    /// Bob Jenkins lookup3.
87    Jenkins,
88    /// MurmurHash3 x86 128-bit.
89    Murmur3,
90    /// MurmurHash3 truncated to 64 bits (used by the
91    /// random-slicing distribution lookup).
92    #[allow(non_camel_case_types)]
93    Murmur3X64_64,
94}
95
96impl HashType {
97    /// Lower-case algorithm name as it appears in YAML configurations.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// use dynomite::hashkit::HashType;
103    /// assert_eq!(HashType::Murmur3.as_str(), "murmur3");
104    /// ```
105    #[must_use]
106    pub const fn as_str(self) -> &'static str {
107        match self {
108            HashType::OneAtATime => "one_at_a_time",
109            HashType::Md5 => "md5",
110            HashType::Crc16 => "crc16",
111            HashType::Crc32 => "crc32",
112            HashType::Crc32a => "crc32a",
113            HashType::Fnv1_64 => "fnv1_64",
114            HashType::Fnv1a_64 => "fnv1a_64",
115            HashType::Fnv1_32 => "fnv1_32",
116            HashType::Fnv1a_32 => "fnv1a_32",
117            HashType::Hsieh => "hsieh",
118            HashType::Murmur => "murmur",
119            HashType::Jenkins => "jenkins",
120            HashType::Murmur3 => "murmur3",
121            HashType::Murmur3X64_64 => "murmur3_x64_64",
122        }
123    }
124
125    /// All variants, in declaration order.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// use dynomite::hashkit::HashType;
131    /// assert!(HashType::all().contains(&HashType::Murmur3));
132    /// ```
133    #[must_use]
134    pub const fn all() -> &'static [HashType] {
135        &[
136            HashType::OneAtATime,
137            HashType::Md5,
138            HashType::Crc16,
139            HashType::Crc32,
140            HashType::Crc32a,
141            HashType::Fnv1_64,
142            HashType::Fnv1a_64,
143            HashType::Fnv1_32,
144            HashType::Fnv1a_32,
145            HashType::Hsieh,
146            HashType::Murmur,
147            HashType::Jenkins,
148            HashType::Murmur3,
149            HashType::Murmur3X64_64,
150        ]
151    }
152
153    /// Parse a YAML-style algorithm name. Unknown names yield `None`.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use dynomite::hashkit::HashType;
159    /// assert_eq!(HashType::from_name("murmur3"), Some(HashType::Murmur3));
160    /// assert_eq!(HashType::from_name("nope"), None);
161    /// ```
162    #[must_use]
163    pub fn from_name(name: &str) -> Option<HashType> {
164        Self::all().iter().copied().find(|h| h.as_str() == name)
165    }
166}
167
168/// Hash `key` with the requested algorithm and return the resulting token.
169///
170/// The returned token has `len == 1` for every 32-bit algorithm and
171/// `len == 4` for `Murmur3`. The contract is identical to the C
172/// `hash_func_t` it replaces: dispatch is total over [`HashType::all`].
173///
174/// # Examples
175///
176/// ```
177/// use dynomite::hashkit::{hash, HashType};
178///
179/// let t1 = hash(HashType::Crc32a, b"abc");
180/// let t2 = hash(HashType::Crc32a, b"abc");
181/// assert_eq!(t1, t2);
182/// ```
183#[must_use]
184pub fn hash(ty: HashType, key: &[u8]) -> DynToken {
185    match ty {
186        HashType::OneAtATime => one_at_a_time::hash(key),
187        HashType::Md5 => md5::hash(key),
188        HashType::Crc16 => crc16::hash(key),
189        HashType::Crc32 => crc32::hash_libmemcached(key),
190        HashType::Crc32a => crc32::hash_standard(key),
191        HashType::Fnv1_64 => fnv::hash_fnv1_64(key),
192        HashType::Fnv1a_64 => fnv::hash_fnv1a_64(key),
193        HashType::Fnv1_32 => fnv::hash_fnv1_32(key),
194        HashType::Fnv1a_32 => fnv::hash_fnv1a_32(key),
195        HashType::Hsieh => hsieh::hash(key),
196        HashType::Murmur => murmur::hash(key),
197        HashType::Jenkins => jenkins::hash(key),
198        HashType::Murmur3 => murmur3::hash(key),
199        HashType::Murmur3X64_64 => {
200            // Pack the 64-bit fingerprint into the low two
201            // words of a `DynToken` so callers that compare
202            // against the existing 32-bit ring math observe the
203            // canonical low-half ordering. The random-slicing
204            // dispatcher does not consume tokens directly; it
205            // calls [`hash64`] for its u64 ring lookup.
206            let h = murmur3::murmur3_x64_64(0xc0a1_e5ce, key);
207            let mut token = DynToken::default();
208            token.size(2).expect("len 2 fits");
209            let mag = token.mag_mut();
210            mag[0] = h as u32;
211            mag[1] = (h >> 32) as u32;
212            token
213        }
214    }
215}
216
217/// Hash `key` and return a 64-bit value used by the
218/// random-slicing distribution table.
219///
220/// Every algorithm has a deterministic 64-bit projection: the
221/// 64-bit and 128-bit families return their natural width
222/// (truncated to 64 bits where applicable), and the legacy
223/// 32-bit families lift their result into the low half of the
224/// returned `u64`. The function is total over
225/// [`HashType::all`].
226///
227/// # Examples
228///
229/// ```
230/// use dynomite::hashkit::{hash64, HashType};
231///
232/// let a = hash64(HashType::Murmur3X64_64, b"alice");
233/// let b = hash64(HashType::Murmur3X64_64, b"alice");
234/// assert_eq!(a, b);
235/// ```
236#[must_use]
237pub fn hash64(ty: HashType, key: &[u8]) -> u64 {
238    if matches!(ty, HashType::Murmur3X64_64) {
239        return murmur3::murmur3_x64_64(0xc0a1_e5ce, key);
240    }
241    let token = hash(ty, key);
242    let mag = token.mag();
243    let lo = u64::from(mag[0]);
244    let hi = if mag.len() > 1 { u64::from(mag[1]) } else { 0 };
245    (hi << 32) | lo
246}
247
248/// Compute the raw 128-byte MD5 digest of `key`.
249///
250/// Exposed so the `ketama` continuum can build per-server points using the
251/// same digest layout as the original implementation.
252///
253/// # Examples
254///
255/// ```
256/// use dynomite::hashkit::md5_signature;
257/// // The empty string has a known MD5 digest.
258/// let d = md5_signature(b"");
259/// assert_eq!(d[0], 0xd4);
260/// assert_eq!(d.len(), 16);
261/// ```
262#[must_use]
263pub fn md5_signature(key: &[u8]) -> [u8; 16] {
264    md5::digest(key)
265}
266
267/// CRC-32 over a buffer, lower-cased before mixing.
268///
269/// Each byte is forced to lower case before being fed to the table; the
270/// helper is used by the entropy reconciliation path.
271///
272/// # Examples
273///
274/// ```
275/// use dynomite::hashkit::crc32_sz;
276/// // Lower-casing means "ABC" and "abc" produce the same digest.
277/// assert_eq!(crc32_sz(b"ABC", 0), crc32_sz(b"abc", 0));
278/// ```
279#[must_use]
280pub fn crc32_sz(buf: &[u8], in_crc32: u32) -> u32 {
281    crc32::crc32_sz(buf, in_crc32)
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    #[test]
289    fn names_round_trip() {
290        for ty in HashType::all().iter().copied() {
291            assert_eq!(HashType::from_name(ty.as_str()), Some(ty));
292        }
293    }
294
295    #[test]
296    fn dispatch_is_deterministic() {
297        let key = b"the quick brown fox";
298        for ty in HashType::all().iter().copied() {
299            let a = hash(ty, key);
300            let b = hash(ty, key);
301            assert_eq!(a, b, "algorithm {ty:?} not deterministic");
302        }
303    }
304
305    #[test]
306    fn dispatch_lengths_match_c_codec() {
307        for ty in HashType::all().iter().copied() {
308            let token = hash(ty, b"k");
309            let expected = match ty {
310                HashType::Murmur3 => 4,
311                HashType::Murmur3X64_64 => 2,
312                _ => 1,
313            };
314            assert_eq!(
315                token.len(),
316                expected,
317                "wrong token length for {ty:?}: got {}",
318                token.len()
319            );
320        }
321    }
322}