Skip to main content

commonware_storage/
translator.rs

1//! Primitive implementations of [Translator].
2
3use core::hash::{BuildHasher, Hash, Hasher};
4
5/// Translate keys into a new representation (often a smaller one).
6///
7/// # Warning
8///
9/// The output of [Translator::transform] is often used as a key in a hash table. If the output is
10/// not uniformly distributed, the performance of said hash table will degrade substantially.
11pub trait Translator: Clone + BuildHasher + Send + Sync + 'static {
12    /// The type of the internal representation of keys.
13    ///
14    /// Although [Translator] is a [BuildHasher], the `Key` type must still implement [Hash] for
15    /// compatibility with any hash table that wraps [Translator]. We also require [Ord] for
16    /// compatibility with ordered collections. [Send] and [Sync] are required for thread-safe
17    /// concurrent access.
18    type Key: Ord + Hash + Copy + Send + Sync;
19
20    /// Transform a key into its new representation.
21    fn transform(&self, key: &[u8]) -> Self::Key;
22}
23
24/// A “do-nothing” hasher for `uint`.
25///
26/// Most users typically store keys that are **already hashed** (shortened by the [Translator]).
27/// Re-hashing them with SipHash (by [std::collections::HashMap]) would waste CPU, so we give
28/// [std::collections::HashMap] this identity hasher instead:
29///
30/// * [Hasher::write_u8], [Hasher::write_u16], [Hasher::write_u32], [Hasher::write_u64] copies the
31///   input into an internal field;
32/// * [Hasher::finish] returns that value unchanged.
33///
34/// # Warning
35///
36/// This hasher is not suitable for general use. If the hasher is called on a byte slice longer
37/// than `size_of::<u64>()`, it will panic.
38#[derive(Default, Clone)]
39pub struct UintIdentity {
40    value: u64,
41}
42
43impl Hasher for UintIdentity {
44    #[inline]
45    fn write(&mut self, bytes: &[u8]) {
46        assert!(bytes.len() <= 8, "UintIdenty hasher cannot handle >8 bytes");
47        // Treat the input array as a little-endian int to ensure low-order bits don't end up mostly
48        // 0s, given that we right-pad.
49        self.value = u64::from_le_bytes(cap::<8>(bytes));
50    }
51
52    #[inline]
53    fn write_u8(&mut self, i: u8) {
54        self.value = i as u64;
55    }
56
57    #[inline]
58    fn write_u16(&mut self, i: u16) {
59        self.value = i as u64;
60    }
61
62    #[inline]
63    fn write_u32(&mut self, i: u32) {
64        self.value = i as u64;
65    }
66
67    #[inline]
68    fn write_u64(&mut self, i: u64) {
69        self.value = i;
70    }
71
72    #[inline]
73    fn finish(&self) -> u64 {
74        self.value
75    }
76}
77
78/// Cap the key to a fixed length.
79///
80/// # Behavior
81///
82/// - If input is shorter than `N`, the output is zero-padded.
83/// - If input is longer than `N`, the output is truncated.
84/// - If input is exactly `N`, the output is identical.
85fn cap<const N: usize>(key: &[u8]) -> [u8; N] {
86    let mut capped = [0; N];
87    let len = key.len().min(N);
88    capped[..len].copy_from_slice(&key[..len]);
89    capped
90}
91
92macro_rules! define_cap_translator {
93    ($name:ident, $size:expr, $int:ty) => {
94        #[doc = concat!("Translator that caps the key to ", stringify!($size), " byte(s) and returns it packed in a ", stringify!($int), ".")]
95        #[derive(Clone, Default)]
96        pub struct $name;
97
98        impl Translator for $name {
99            // Minimal uint size for the key.
100            type Key = $int;
101
102            #[inline]
103            fn transform(&self, key: &[u8]) -> Self::Key {
104                let capped = cap::<$size>(key);
105                <$int>::from_be_bytes(capped)
106            }
107        }
108
109        // Implement the `BuildHasher` trait for `IdentityHasher`.
110        impl BuildHasher for $name {
111            type Hasher = UintIdentity;
112
113            #[inline]
114            fn build_hasher(&self) -> Self::Hasher {
115                UintIdentity::default()
116            }
117        }
118    };
119}
120
121// Define order-preserving translators for different sizes.
122define_cap_translator!(OneCap, 1, u8);
123define_cap_translator!(TwoCap, 2, u16);
124define_cap_translator!(FourCap, 4, u32);
125define_cap_translator!(EightCap, 8, u64);
126
127/// Define a special array type for which we'll implement our own identity hasher. This avoids the
128/// overhead of the default Array hasher which unnecessarily (for our use case) includes a length
129/// prefix.
130#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
131pub struct UnhashedArray<const N: usize> {
132    pub inner: [u8; N],
133}
134
135impl<const N: usize> Hash for UnhashedArray<N> {
136    #[inline]
137    fn hash<H: Hasher>(&self, state: &mut H) {
138        state.write(&self.inner);
139    }
140}
141
142impl<const N: usize> PartialEq<[u8; N]> for UnhashedArray<N> {
143    fn eq(&self, other: &[u8; N]) -> bool {
144        &self.inner == other
145    }
146}
147
148/// Translators for keys that are not the length of a standard integer.
149#[derive(Clone, Copy)]
150pub struct Cap<const N: usize>;
151
152impl<const N: usize> Cap<N> {
153    pub const fn new() -> Self {
154        const {
155            assert!(N <= 8 && N > 0, "Cap must be between 1 and 8");
156        };
157        Self
158    }
159}
160
161// Manually implement Default for Cap<N> so it calls new() which validates N.
162impl<const N: usize> Default for Cap<N> {
163    fn default() -> Self {
164        Self::new()
165    }
166}
167
168impl<const N: usize> Translator for Cap<N> {
169    type Key = UnhashedArray<N>;
170
171    #[inline]
172    fn transform(&self, key: &[u8]) -> Self::Key {
173        const {
174            assert!(N <= 8 && N > 0, "Cap must be between 1 and 8");
175        };
176        UnhashedArray {
177            inner: cap::<N>(key),
178        }
179    }
180}
181
182impl<const N: usize> BuildHasher for Cap<N> {
183    type Hasher = UintIdentity;
184
185    #[inline]
186    fn build_hasher(&self) -> Self::Hasher {
187        UintIdentity::default()
188    }
189}
190
191/// Collision-resistant wrapper for any [Translator].
192///
193/// Hashes the full key with a per-instance secret seed before delegating to the inner translator.
194/// This makes translated-key collisions unpredictable to an adversary who
195/// does not know the seed, similar to how [std::collections::HashMap] uses
196/// [std::collections::hash_map::RandomState] to prevent HashDoS attacks. It can also be used to
197/// ensure uniform distribution of skewed keyspaces when used by non-hashing structures such as
198/// [crate::index].
199///
200/// # No Ordering
201///
202/// Hashing destroys lexicographic key ordering. Do not use [Hashed] with ordered indices when
203/// callers rely on translated-key adjacency matching original-key adjacency (e.g., exclusion proofs
204/// in the ordered QMDB). [Hashed] is safe for unordered indices and partitioned unordered indices.
205///
206/// # `no_std` Support
207///
208/// [Hashed::new] and [Default] are available only with this crate's `std` feature. They use
209/// [ahash::RandomState::new], which requires OS-provided randomness (the `runtime-rng` feature of
210/// `ahash`, enabled by `std` here). In `no_std` environments, use [Hashed::from_seed] with an
211/// externally-sourced random seed instead.
212///
213/// # No Stability Guarantees
214///
215/// [ahash::RandomState] is used as the underlying hasher. While `ahash` is robust, its exact
216/// algorithm might change across versions. As a result, transformed outputs are not stable across
217/// Rust versions or platforms. Treat this translator as an in-memory collision-hardening mechanism,
218/// not as a stable/persisted encoding.
219///
220/// # Examples
221///
222/// ```
223/// use commonware_storage::translator::{Hashed, TwoCap, Translator};
224///
225/// # #[cfg(feature = "std")]
226/// # {
227/// // Random seed (production use, requires `std`):
228/// let t = Hashed::new(TwoCap);
229/// let _k = t.transform(b"hello");
230/// # }
231///
232/// // Deterministic seed (testing within the same toolchain/runtime):
233/// let t = Hashed::from_seed(42, TwoCap);
234/// assert_eq!(t.transform(b"hello"), t.transform(b"hello"));
235/// ```
236#[derive(Clone)]
237pub struct Hashed<T: Translator> {
238    random_state: ahash::RandomState,
239    inner: T,
240}
241
242#[cfg(feature = "std")]
243impl<T: Translator + Default> Default for Hashed<T> {
244    fn default() -> Self {
245        Self::new(T::default())
246    }
247}
248
249impl<T: Translator> Hashed<T> {
250    /// Create a new [Hashed] translator with a random seed.
251    #[cfg(feature = "std")]
252    pub fn new(inner: T) -> Self {
253        Self {
254            random_state: ahash::RandomState::new(),
255            inner,
256        }
257    }
258
259    /// Create a new [Hashed] translator with a specific seed.
260    pub const fn from_seed(seed: u64, inner: T) -> Self {
261        Self {
262            random_state: ahash::RandomState::with_seeds(seed, 0, 0, 0),
263            inner,
264        }
265    }
266}
267
268impl<T: Translator> Translator for Hashed<T> {
269    type Key = T::Key;
270
271    #[inline]
272    fn transform(&self, key: &[u8]) -> T::Key {
273        let hash_val = self.random_state.hash_one(key);
274        self.inner.transform(&hash_val.to_le_bytes())
275    }
276}
277
278impl<T: Translator> BuildHasher for Hashed<T> {
279    type Hasher = <T as BuildHasher>::Hasher;
280
281    #[inline]
282    fn build_hasher(&self) -> Self::Hasher {
283        self.inner.build_hasher()
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use core::hash::Hasher;
291
292    #[test]
293    fn test_one_cap() {
294        let t = OneCap;
295        assert_eq!(t.transform(b""), 0);
296        assert_eq!(t.transform(b"a"), b'a');
297        assert_eq!(t.transform(b"ab"), b'a');
298        assert_eq!(t.transform(b"abc"), b'a');
299    }
300
301    #[test]
302    fn test_two_cap() {
303        let t = TwoCap;
304        assert_eq!(t.transform(b""), 0);
305        assert_eq!(t.transform(b"abc"), t.transform(b"ab"));
306        assert!(t.transform(b"") < t.transform(b"a"));
307        assert!(t.transform(b"a") < t.transform(b"b"));
308        assert!(t.transform(b"ab") < t.transform(b"ac"));
309        assert!(t.transform(b"z") < t.transform(b"zz"));
310        assert_eq!(t.transform(b"zz"), t.transform(b"zzabc"));
311    }
312
313    #[test]
314    fn test_four_cap() {
315        let t = FourCap;
316        let t1 = t.transform(b"");
317        let t2 = t.transform(b"a");
318        let t3 = t.transform(b"abcd");
319        let t4 = t.transform(b"abcdef");
320        let t5 = t.transform(b"b");
321
322        assert_eq!(t1, 0);
323        assert!(t1 < t2);
324        assert!(t2 < t3);
325        assert_eq!(t3, t4);
326        assert!(t3 < t5);
327        assert!(t4 < t5);
328    }
329
330    #[test]
331    fn test_cap_3() {
332        let t = Cap::<3>::new();
333        assert_eq!(t.transform(b""), [0; 3]);
334        assert_eq!(t.transform(b"abc"), *b"abc");
335        assert_eq!(t.transform(b"abcdef"), *b"abc");
336        assert_eq!(t.transform(b"ab"), [b'a', b'b', 0]);
337    }
338
339    #[test]
340    fn test_cap_6() {
341        let t = Cap::<6>::new();
342        assert_eq!(t.transform(b""), [0; 6]);
343        assert_eq!(t.transform(b"abcdef"), *b"abcdef");
344        assert_eq!(t.transform(b"abcdefghi"), *b"abcdef");
345        assert_eq!(t.transform(b"abc"), [b'a', b'b', b'c', 0, 0, 0]);
346    }
347
348    #[test]
349    fn test_eight_cap() {
350        let t = EightCap;
351        let t1 = t.transform(b"");
352        let t2 = t.transform(b"a");
353        let t3 = t.transform(b"abcdefghaaaaaaa");
354        let t4 = t.transform(b"abcdefghijkzzzzzzzzzzzzzzzzzz");
355        let t5 = t.transform(b"b");
356
357        assert_eq!(t1, 0);
358        assert!(t1 < t2);
359        assert!(t2 < t3);
360        assert_eq!(t3, t4);
361        assert!(t3 < t5);
362        assert!(t4 < t5);
363    }
364
365    #[test]
366    fn identity_hasher_works_on_small_slice() {
367        let mut h = UintIdentity::default();
368        h.write(b"abc");
369        assert_eq!(h.finish(), u64::from_le_bytes(cap::<8>(b"abc")));
370    }
371
372    #[test]
373    #[should_panic]
374    fn identity_hasher_panics_on_large_write_slice() {
375        let mut h = UintIdentity::default();
376        h.write(b"too big for an int");
377    }
378
379    #[test]
380    fn test_hashed_consistency() {
381        let t = Hashed::from_seed(42, TwoCap);
382        assert_eq!(t.transform(b"hello"), t.transform(b"hello"));
383        assert_eq!(t.transform(b""), t.transform(b""));
384        assert_eq!(t.transform(b"abcdef"), t.transform(b"abcdef"));
385    }
386
387    #[test]
388    fn test_hashed_seed_determinism() {
389        let t1 = Hashed::from_seed(42, TwoCap);
390        let t2 = Hashed::from_seed(42, TwoCap);
391        assert_eq!(t1.transform(b"hello"), t2.transform(b"hello"));
392        assert_eq!(t1.transform(b"world"), t2.transform(b"world"));
393    }
394
395    #[test]
396    fn test_hashed_seed_independence() {
397        let t1 = Hashed::from_seed(1, EightCap);
398        let t2 = Hashed::from_seed(2, EightCap);
399        // Different seeds should (with overwhelming probability) produce different outputs.
400        assert_ne!(t1.transform(b"hello"), t2.transform(b"hello"));
401    }
402
403    #[test]
404    fn test_hashed_prefix_collisions_avoided() {
405        // Without hashing, keys sharing a prefix collide. With hashing, they should not.
406        let t = Hashed::from_seed(99, TwoCap);
407        let k1 = t.transform(b"abXXX");
408        let k2 = t.transform(b"abYYY");
409        // The unhashed TwoCap would map both to the same value (first 2 bytes "ab").
410        assert_ne!(k1, k2);
411    }
412
413    #[test]
414    fn test_hashed_all_cap_sizes() {
415        let t1 = Hashed::from_seed(7, OneCap);
416        assert_eq!(t1.transform(b"test"), t1.transform(b"test"));
417
418        let t4 = Hashed::from_seed(7, FourCap);
419        assert_eq!(t4.transform(b"test"), t4.transform(b"test"));
420
421        let t8 = Hashed::from_seed(7, EightCap);
422        assert_eq!(t8.transform(b"test"), t8.transform(b"test"));
423
424        let tc = Hashed::from_seed(7, Cap::<3>::new());
425        assert_eq!(tc.transform(b"test"), tc.transform(b"test"));
426    }
427
428    #[test]
429    fn test_hashed_random_seed() {
430        // Two instances with random seeds should (with overwhelming probability)
431        // produce different outputs.
432        let t1 = Hashed::new(EightCap);
433        let t2 = Hashed::new(EightCap);
434        assert_ne!(t1.transform(b"hello"), t2.transform(b"hello"));
435    }
436}