commonware_storage/
translator.rs

1//! Primitive implementations of [Translator].
2
3use std::hash::{BuildHasher, Hash, Hasher};
4
5/// Translate keys into a new representation (often a smaller one).
6///
7/// # Warning
8///
9/// The output of [Translator::transform] is often used as a key in a hash table. If the output is
10/// not uniformly distributed, the performance of said hash table will degrade substantially.
11pub trait Translator: Clone + BuildHasher {
12    /// The type of the internal representation of keys.
13    ///
14    /// Although [Translator] is a [BuildHasher], the `Key` type must still implement [Hash] for
15    /// compatibility with any hash table that wraps [Translator]. We also require [Ord] for
16    /// compatibility with ordered collections.
17    type Key: Ord + Hash + Copy;
18
19    /// Transform a key into its new representation.
20    fn transform(&self, key: &[u8]) -> Self::Key;
21}
22
23/// A “do-nothing” hasher for `uint`.
24///
25/// Most users typically store keys that are **already hashed** (shortened by the [Translator]).
26/// Re-hashing them with SipHash (by [std::collections::HashMap]) would waste CPU, so we give
27/// [std::collections::HashMap] this identity hasher instead:
28///
29/// * [Hasher::write_u8], [Hasher::write_u16], [Hasher::write_u32], [Hasher::write_u64] copies the
30///   input into an internal field;
31/// * [Hasher::finish] returns that value unchanged.
32///
33/// # Warning
34///
35/// This hasher is not suitable for general use. If the hasher is called over some type that is not
36/// [u8], [u16], [u32] or [u64], it will panic.
37#[derive(Default, Clone)]
38pub struct UintIdentity {
39    value: u64,
40}
41
42impl Hasher for UintIdentity {
43    #[inline]
44    fn write(&mut self, _: &[u8]) {
45        unimplemented!("we should only ever call type-specific write methods");
46    }
47
48    #[inline]
49    fn write_u8(&mut self, i: u8) {
50        self.value = i as u64;
51    }
52
53    #[inline]
54    fn write_u16(&mut self, i: u16) {
55        self.value = i as u64;
56    }
57
58    #[inline]
59    fn write_u32(&mut self, i: u32) {
60        self.value = i as u64;
61    }
62
63    #[inline]
64    fn write_u64(&mut self, i: u64) {
65        self.value = i;
66    }
67
68    #[inline]
69    fn finish(&self) -> u64 {
70        self.value
71    }
72}
73
74/// Cap the key to a fixed length.
75///
76/// # Behavior
77///
78/// - If input is shorter than `N`, the output is zero-padded.
79/// - If input is longer than `N`, the output is truncated.
80/// - If input is exactly `N`, the output is identical.
81fn cap<const N: usize>(key: &[u8]) -> [u8; N] {
82    let mut capped = [0; N];
83    let len = key.len().min(N);
84    capped[..len].copy_from_slice(&key[..len]);
85    capped
86}
87
88macro_rules! define_cap_translator {
89    ($name:ident, $size:expr, $int:ty) => {
90        #[doc = concat!("Translator that caps the key to ", stringify!($size), " byte(s) and returns it packed in a ", stringify!($int), ".")]
91        #[derive(Clone, Default)]
92        pub struct $name;
93
94        impl Translator for $name {
95            // Minimal uint size for the key.
96            type Key = $int;
97
98            #[inline]
99            fn transform(&self, key: &[u8]) -> Self::Key {
100                let capped = cap::<$size>(key);
101                <$int>::from_be_bytes(capped)
102            }
103        }
104
105        // Implement the `BuildHasher` trait for `IdentityHasher`.
106        impl BuildHasher for $name {
107            type Hasher = UintIdentity;
108
109            #[inline]
110            fn build_hasher(&self) -> Self::Hasher {
111                UintIdentity::default()
112            }
113        }
114    };
115}
116
117// Define order-preserving translators for different sizes.
118define_cap_translator!(OneCap, 1, u8);
119define_cap_translator!(TwoCap, 2, u16);
120define_cap_translator!(FourCap, 4, u32);
121define_cap_translator!(EightCap, 8, u64);
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126    use std::hash::Hasher;
127
128    #[test]
129    fn test_one_cap() {
130        let t = OneCap;
131        assert_eq!(t.transform(b""), 0);
132        assert_eq!(t.transform(b"a"), b'a');
133        assert_eq!(t.transform(b"ab"), b'a');
134        assert_eq!(t.transform(b"abc"), b'a');
135    }
136
137    #[test]
138    fn test_two_cap() {
139        let t = TwoCap;
140        assert_eq!(t.transform(b""), 0);
141        assert_eq!(t.transform(b"abc"), t.transform(b"ab"));
142        assert!(t.transform(b"") < t.transform(b"a"));
143        assert!(t.transform(b"a") < t.transform(b"b"));
144        assert!(t.transform(b"ab") < t.transform(b"ac"));
145        assert!(t.transform(b"z") < t.transform(b"zz"));
146        assert_eq!(t.transform(b"zz"), t.transform(b"zzabc"));
147    }
148
149    #[test]
150    fn test_four_cap() {
151        let t = FourCap;
152        let t1 = t.transform(b"");
153        let t2 = t.transform(b"a");
154        let t3 = t.transform(b"abcd");
155        let t4 = t.transform(b"abcdef");
156        let t5 = t.transform(b"b");
157
158        assert_eq!(t1, 0);
159        assert!(t1 < t2);
160        assert!(t2 < t3);
161        assert_eq!(t3, t4);
162        assert!(t3 < t5);
163        assert!(t4 < t5);
164    }
165
166    #[test]
167    fn test_eight_cap() {
168        let t = EightCap;
169        let t1 = t.transform(b"");
170        let t2 = t.transform(b"a");
171        let t3 = t.transform(b"abcdefghaaaaaaa");
172        let t4 = t.transform(b"abcdefghijkzzzzzzzzzzzzzzzzzz");
173        let t5 = t.transform(b"b");
174
175        assert_eq!(t1, 0);
176        assert!(t1 < t2);
177        assert!(t2 < t3);
178        assert_eq!(t3, t4);
179        assert!(t3 < t5);
180        assert!(t4 < t5);
181    }
182
183    #[test]
184    #[should_panic(expected = "we should only ever call type-specific write methods")]
185    fn identity_hasher_panics_on_write_slice() {
186        let mut h = UintIdentity::default();
187        h.write(b"not an int");
188    }
189}