commonware_storage/
translator.rs

1//! Primitive implementations of [Translator].
2
3use std::hash::{BuildHasher, Hash, Hasher};
4
5/// Translate keys into a new representation (often a smaller one).
6///
7/// # Warning
8///
9/// The output of [Translator::transform] is often used as a key in a hash table. If the output is
10/// not uniformly distributed, the performance of said hash table will degrade substantially.
11pub trait Translator: Clone + BuildHasher + Send + Sync + 'static {
12    /// The type of the internal representation of keys.
13    ///
14    /// Although [Translator] is a [BuildHasher], the `Key` type must still implement [Hash] for
15    /// compatibility with any hash table that wraps [Translator]. We also require [Ord] for
16    /// compatibility with ordered collections. [Send] and [Sync] are required for thread-safe
17    /// concurrent access.
18    type Key: Ord + Hash + Copy + Send + Sync;
19
20    /// Transform a key into its new representation.
21    fn transform(&self, key: &[u8]) -> Self::Key;
22}
23
24/// A “do-nothing” hasher for `uint`.
25///
26/// Most users typically store keys that are **already hashed** (shortened by the [Translator]).
27/// Re-hashing them with SipHash (by [std::collections::HashMap]) would waste CPU, so we give
28/// [std::collections::HashMap] this identity hasher instead:
29///
30/// * [Hasher::write_u8], [Hasher::write_u16], [Hasher::write_u32], [Hasher::write_u64] copies the
31///   input into an internal field;
32/// * [Hasher::finish] returns that value unchanged.
33///
34/// # Warning
35///
36/// This hasher is not suitable for general use. If the hasher is called on a byte slice longer
37/// than `size_of::<u64>()`, it will panic.
38#[derive(Default, Clone)]
39pub struct UintIdentity {
40    value: u64,
41}
42
43impl Hasher for UintIdentity {
44    #[inline]
45    fn write(&mut self, bytes: &[u8]) {
46        assert!(bytes.len() <= 8, "UintIdenty hasher cannot handle >8 bytes");
47        // Treat the input array as a little-endian int to ensure low-order bits don't end up mostly
48        // 0s, given that we right-pad.
49        self.value = u64::from_le_bytes(cap::<8>(bytes));
50    }
51
52    #[inline]
53    fn write_u8(&mut self, i: u8) {
54        self.value = i as u64;
55    }
56
57    #[inline]
58    fn write_u16(&mut self, i: u16) {
59        self.value = i as u64;
60    }
61
62    #[inline]
63    fn write_u32(&mut self, i: u32) {
64        self.value = i as u64;
65    }
66
67    #[inline]
68    fn write_u64(&mut self, i: u64) {
69        self.value = i;
70    }
71
72    #[inline]
73    fn finish(&self) -> u64 {
74        self.value
75    }
76}
77
78/// Cap the key to a fixed length.
79///
80/// # Behavior
81///
82/// - If input is shorter than `N`, the output is zero-padded.
83/// - If input is longer than `N`, the output is truncated.
84/// - If input is exactly `N`, the output is identical.
85fn cap<const N: usize>(key: &[u8]) -> [u8; N] {
86    let mut capped = [0; N];
87    let len = key.len().min(N);
88    capped[..len].copy_from_slice(&key[..len]);
89    capped
90}
91
92macro_rules! define_cap_translator {
93    ($name:ident, $size:expr, $int:ty) => {
94        #[doc = concat!("Translator that caps the key to ", stringify!($size), " byte(s) and returns it packed in a ", stringify!($int), ".")]
95        #[derive(Clone, Default)]
96        pub struct $name;
97
98        impl Translator for $name {
99            // Minimal uint size for the key.
100            type Key = $int;
101
102            #[inline]
103            fn transform(&self, key: &[u8]) -> Self::Key {
104                let capped = cap::<$size>(key);
105                <$int>::from_be_bytes(capped)
106            }
107        }
108
109        // Implement the `BuildHasher` trait for `IdentityHasher`.
110        impl BuildHasher for $name {
111            type Hasher = UintIdentity;
112
113            #[inline]
114            fn build_hasher(&self) -> Self::Hasher {
115                UintIdentity::default()
116            }
117        }
118    };
119}
120
121// Define order-preserving translators for different sizes.
122define_cap_translator!(OneCap, 1, u8);
123define_cap_translator!(TwoCap, 2, u16);
124define_cap_translator!(FourCap, 4, u32);
125define_cap_translator!(EightCap, 8, u64);
126
127/// Define a special array type for which we'll implement our own identity hasher. This avoids the
128/// overhead of the default Array hasher which unnecessarily (for our use case) includes a length
129/// prefix.
130#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
131pub struct UnhashedArray<const N: usize> {
132    pub inner: [u8; N],
133}
134
135impl<const N: usize> Hash for UnhashedArray<N> {
136    #[inline]
137    fn hash<H: Hasher>(&self, state: &mut H) {
138        state.write(&self.inner);
139    }
140}
141
142impl<const N: usize> PartialEq<[u8; N]> for UnhashedArray<N> {
143    fn eq(&self, other: &[u8; N]) -> bool {
144        &self.inner == other
145    }
146}
147
148/// Translators for keys that are not the length of a standard integer.
149#[derive(Clone, Copy)]
150pub struct Cap<const N: usize>;
151
152impl<const N: usize> Cap<N> {
153    pub const fn new() -> Self {
154        const {
155            assert!(N <= 8 && N > 0, "Cap must be between 1 and 8");
156        };
157        Self
158    }
159}
160
161// Manually implement Default for Cap<N> so it calls new() which validates N.
162impl<const N: usize> Default for Cap<N> {
163    fn default() -> Self {
164        Self::new()
165    }
166}
167
168impl<const N: usize> Translator for Cap<N> {
169    type Key = UnhashedArray<N>;
170
171    #[inline]
172    fn transform(&self, key: &[u8]) -> Self::Key {
173        const {
174            assert!(N <= 8 && N > 0, "Cap must be between 1 and 8");
175        };
176        UnhashedArray {
177            inner: cap::<N>(key),
178        }
179    }
180}
181
182impl<const N: usize> BuildHasher for Cap<N> {
183    type Hasher = UintIdentity;
184
185    #[inline]
186    fn build_hasher(&self) -> Self::Hasher {
187        UintIdentity::default()
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194    use std::hash::Hasher;
195
196    #[test]
197    fn test_one_cap() {
198        let t = OneCap;
199        assert_eq!(t.transform(b""), 0);
200        assert_eq!(t.transform(b"a"), b'a');
201        assert_eq!(t.transform(b"ab"), b'a');
202        assert_eq!(t.transform(b"abc"), b'a');
203    }
204
205    #[test]
206    fn test_two_cap() {
207        let t = TwoCap;
208        assert_eq!(t.transform(b""), 0);
209        assert_eq!(t.transform(b"abc"), t.transform(b"ab"));
210        assert!(t.transform(b"") < t.transform(b"a"));
211        assert!(t.transform(b"a") < t.transform(b"b"));
212        assert!(t.transform(b"ab") < t.transform(b"ac"));
213        assert!(t.transform(b"z") < t.transform(b"zz"));
214        assert_eq!(t.transform(b"zz"), t.transform(b"zzabc"));
215    }
216
217    #[test]
218    fn test_four_cap() {
219        let t = FourCap;
220        let t1 = t.transform(b"");
221        let t2 = t.transform(b"a");
222        let t3 = t.transform(b"abcd");
223        let t4 = t.transform(b"abcdef");
224        let t5 = t.transform(b"b");
225
226        assert_eq!(t1, 0);
227        assert!(t1 < t2);
228        assert!(t2 < t3);
229        assert_eq!(t3, t4);
230        assert!(t3 < t5);
231        assert!(t4 < t5);
232    }
233
234    #[test]
235    fn test_cap_3() {
236        let t = Cap::<3>::new();
237        assert_eq!(t.transform(b""), [0; 3]);
238        assert_eq!(t.transform(b"abc"), *b"abc");
239        assert_eq!(t.transform(b"abcdef"), *b"abc");
240        assert_eq!(t.transform(b"ab"), [b'a', b'b', 0]);
241    }
242
243    #[test]
244    fn test_cap_6() {
245        let t = Cap::<6>::new();
246        assert_eq!(t.transform(b""), [0; 6]);
247        assert_eq!(t.transform(b"abcdef"), *b"abcdef");
248        assert_eq!(t.transform(b"abcdefghi"), *b"abcdef");
249        assert_eq!(t.transform(b"abc"), [b'a', b'b', b'c', 0, 0, 0]);
250    }
251
252    #[test]
253    fn test_eight_cap() {
254        let t = EightCap;
255        let t1 = t.transform(b"");
256        let t2 = t.transform(b"a");
257        let t3 = t.transform(b"abcdefghaaaaaaa");
258        let t4 = t.transform(b"abcdefghijkzzzzzzzzzzzzzzzzzz");
259        let t5 = t.transform(b"b");
260
261        assert_eq!(t1, 0);
262        assert!(t1 < t2);
263        assert!(t2 < t3);
264        assert_eq!(t3, t4);
265        assert!(t3 < t5);
266        assert!(t4 < t5);
267    }
268
269    #[test]
270    fn identity_hasher_works_on_small_slice() {
271        let mut h = UintIdentity::default();
272        h.write(b"abc");
273        assert_eq!(h.finish(), u64::from_le_bytes(cap::<8>(b"abc")));
274    }
275
276    #[test]
277    #[should_panic]
278    fn identity_hasher_panics_on_large_write_slice() {
279        let mut h = UintIdentity::default();
280        h.write(b"too big for an int");
281    }
282}