commonware_storage/
translator.rs

1//! Primitive implementations of [Translator].
2
3use std::hash::{BuildHasher, Hash, Hasher};
4
5/// Translate keys into a new representation (often a smaller one).
6///
7/// # Warning
8///
9/// The output of [Translator::transform] is often used as a key in a hash table. If the output is not
10/// uniformly distributed, the performance of said hash table will degrade substantially.
11pub trait Translator: Clone + BuildHasher {
12    /// The type of the internal representation of keys.
13    ///
14    /// Although [Translator] is a [BuildHasher], the `Key` type must still implement [Hash] for compatibility
15    /// with any hash table that wraps [Translator].
16    type Key: Eq + Hash + Copy;
17
18    /// Transform a key into its new representation.
19    fn transform(&self, key: &[u8]) -> Self::Key;
20}
21
22/// A “do-nothing” hasher for `uint`.
23///
24/// Most users typically store keys that are **already hashed** (shortened by the [Translator]).
25/// Re-hashing them with SipHash (by [std::collections::HashMap]) would waste CPU, so we give
26/// [std::collections::HashMap] this identity hasher instead:
27///
28/// * [Hasher::write_u8], [Hasher::write_u16], [Hasher::write_u32], [Hasher::write_u64] copies the input into an internal field;
29/// * [Hasher::finish] returns that value unchanged.
30///
31/// # Warning
32///
33/// This hasher is not suitable for general use. If the hasher is called over some type that is not
34/// [u8], [u16], [u32] or [u64], it will panic.
35#[derive(Default, Clone)]
36pub struct UintIdentity {
37    value: u64,
38}
39
40impl Hasher for UintIdentity {
41    #[inline]
42    fn write(&mut self, _: &[u8]) {
43        unimplemented!("we should only ever call type-specific write methods");
44    }
45
46    #[inline]
47    fn write_u8(&mut self, i: u8) {
48        self.value = i as u64;
49    }
50
51    #[inline]
52    fn write_u16(&mut self, i: u16) {
53        self.value = i as u64;
54    }
55
56    #[inline]
57    fn write_u32(&mut self, i: u32) {
58        self.value = i as u64;
59    }
60
61    #[inline]
62    fn write_u64(&mut self, i: u64) {
63        self.value = i;
64    }
65
66    #[inline]
67    fn finish(&self) -> u64 {
68        self.value
69    }
70}
71
72/// Cap the key to a fixed length.
73///
74/// # Behavior
75///
76/// - If input is shorter than `N`, the output is zero-padded.
77/// - If input is longer than `N`, the output is truncated.
78/// - If input is exactly `N`, the output is identical.
79fn cap<const N: usize>(key: &[u8]) -> [u8; N] {
80    let mut capped = [0; N];
81    let len = key.len().min(N);
82    capped[..len].copy_from_slice(&key[..len]);
83    capped
84}
85
86macro_rules! define_cap_translator {
87    ($name:ident, $size:expr, $int:ty) => {
88        #[doc = concat!("Translator that caps the key to ", stringify!($size), " byte(s) and returns it packed in a ", stringify!($int), ".")]
89        #[derive(Clone, Default)]
90        pub struct $name;
91
92        impl Translator for $name {
93            // Minimal uint size for the key.
94            type Key = $int;
95
96            #[inline]
97            fn transform(&self, key: &[u8]) -> Self::Key {
98                let capped = cap::<$size>(key);
99                <$int>::from_le_bytes(capped)
100            }
101        }
102
103        // Implement the `BuildHasher` trait for `IdentityHasher`.
104        impl BuildHasher for $name {
105            type Hasher = UintIdentity;
106
107            #[inline]
108            fn build_hasher(&self) -> Self::Hasher {
109                UintIdentity::default()
110            }
111        }
112    };
113}
114
115// Define translators for different sizes.
116define_cap_translator!(OneCap, 1, u8);
117define_cap_translator!(TwoCap, 2, u16);
118define_cap_translator!(FourCap, 4, u32);
119define_cap_translator!(EightCap, 8, u64);
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use std::hash::Hasher;
125
126    #[test]
127    fn test_one_cap() {
128        let t = OneCap;
129        assert_eq!(t.transform(b"").to_le_bytes(), [0]);
130        assert_eq!(t.transform(b"a").to_le_bytes(), [b'a']);
131        assert_eq!(t.transform(b"ab").to_le_bytes(), [b'a']);
132        assert_eq!(t.transform(b"abc").to_le_bytes(), [b'a']);
133    }
134
135    #[test]
136    fn test_two_cap() {
137        let t = TwoCap;
138        assert_eq!(t.transform(b"").to_le_bytes(), [0, 0]);
139        assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0]);
140        assert_eq!(t.transform(b"ab").to_le_bytes(), [b'a', b'b']);
141        assert_eq!(t.transform(b"abc").to_le_bytes(), [b'a', b'b']);
142    }
143
144    #[test]
145    fn test_four_cap() {
146        let t = FourCap;
147        assert_eq!(t.transform(b"").to_le_bytes(), [0, 0, 0, 0]);
148        assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0, 0, 0]);
149        assert_eq!(t.transform(b"abcd").to_le_bytes(), [b'a', b'b', b'c', b'd']);
150        assert_eq!(
151            t.transform(b"abcdef").to_le_bytes(),
152            [b'a', b'b', b'c', b'd']
153        );
154    }
155
156    #[test]
157    fn test_eight_cap() {
158        let t = EightCap;
159        assert_eq!(t.transform(b"").to_le_bytes(), [0, 0, 0, 0, 0, 0, 0, 0]);
160        assert_eq!(t.transform(b"a").to_le_bytes(), [b'a', 0, 0, 0, 0, 0, 0, 0]);
161        assert_eq!(
162            t.transform(b"abcdefgh").to_le_bytes(),
163            [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
164        );
165        assert_eq!(
166            t.transform(b"abcdefghijk").to_le_bytes(),
167            [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
168        );
169    }
170
171    #[test]
172    #[should_panic(expected = "we should only ever call type-specific write methods")]
173    fn identity_hasher_panics_on_write_slice() {
174        let mut h = UintIdentity::default();
175        h.write(b"not an int");
176    }
177}