Skip to main content

jj_lib/
content_hash.rs

1//! Portable, stable hashing suitable for identifying values
2
3use blake2::Blake2b512;
4// Re-export DigestUpdate so that the ContentHash proc macro can be used in
5// external crates without directly depending on the digest crate.
6pub use digest::Update as DigestUpdate;
7use itertools::Itertools as _;
8pub use jj_lib_proc_macros::ContentHash;
9
10/// Portable, stable hashing suitable for identifying values
11///
12/// Variable-length sequences should hash a 64-bit little-endian representation
13/// of their length, then their elements in order. Unordered containers should
14/// order their elements according to their `Ord` implementation. Enums should
15/// hash a 32-bit little-endian encoding of the ordinal number of the enum
16/// variant, then the variant's fields in lexical order.
17///
18/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
19pub trait ContentHash {
20    /// Update the hasher state with this object's content
21    fn hash(&self, state: &mut impl DigestUpdate);
22}
23
24/// The 512-bit BLAKE2b content hash
25pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
26    use digest::Digest as _;
27    let mut hasher = Blake2b512::default();
28    x.hash(&mut hasher);
29    hasher.finalize()
30}
31
32impl ContentHash for () {
33    fn hash(&self, _: &mut impl DigestUpdate) {}
34}
35
36macro_rules! tuple_impls {
37    ($( ( $($n:tt $T:ident),+ ) )+) => {
38        $(
39            impl<$($T: ContentHash,)+> ContentHash for ($($T,)+) {
40                fn hash(&self, state: &mut impl DigestUpdate) {
41                    $(self.$n.hash(state);)+
42                }
43            }
44        )+
45    }
46}
47
48tuple_impls! {
49    (0 T0)
50    (0 T0, 1 T1)
51    (0 T0, 1 T1, 2 T2)
52    (0 T0, 1 T1, 2 T2, 3 T3)
53}
54
55impl ContentHash for bool {
56    fn hash(&self, state: &mut impl DigestUpdate) {
57        u8::from(*self).hash(state);
58    }
59}
60
61impl ContentHash for u8 {
62    fn hash(&self, state: &mut impl DigestUpdate) {
63        state.update(&[*self]);
64    }
65}
66
67impl ContentHash for u32 {
68    fn hash(&self, state: &mut impl DigestUpdate) {
69        state.update(&self.to_le_bytes());
70    }
71}
72
73impl ContentHash for i32 {
74    fn hash(&self, state: &mut impl DigestUpdate) {
75        state.update(&self.to_le_bytes());
76    }
77}
78
79impl ContentHash for u64 {
80    fn hash(&self, state: &mut impl DigestUpdate) {
81        state.update(&self.to_le_bytes());
82    }
83}
84
85impl ContentHash for i64 {
86    fn hash(&self, state: &mut impl DigestUpdate) {
87        state.update(&self.to_le_bytes());
88    }
89}
90
91// TODO: Specialize for [u8] once specialization exists
92impl<T: ContentHash> ContentHash for [T] {
93    fn hash(&self, state: &mut impl DigestUpdate) {
94        state.update(&(self.len() as u64).to_le_bytes());
95        for x in self {
96            x.hash(state);
97        }
98    }
99}
100
101impl<T: ContentHash> ContentHash for Vec<T> {
102    fn hash(&self, state: &mut impl DigestUpdate) {
103        self.as_slice().hash(state);
104    }
105}
106
107impl ContentHash for str {
108    fn hash(&self, state: &mut impl DigestUpdate) {
109        self.as_bytes().hash(state);
110    }
111}
112
113impl ContentHash for String {
114    fn hash(&self, state: &mut impl DigestUpdate) {
115        self.as_str().hash(state);
116    }
117}
118
119impl<T: ContentHash> ContentHash for Option<T> {
120    fn hash(&self, state: &mut impl DigestUpdate) {
121        match self {
122            None => state.update(&0u32.to_le_bytes()),
123            Some(x) => {
124                state.update(&1u32.to_le_bytes());
125                x.hash(state);
126            }
127        }
128    }
129}
130
131impl<K, V> ContentHash for std::collections::HashMap<K, V>
132where
133    K: ContentHash + Ord,
134    V: ContentHash,
135{
136    fn hash(&self, state: &mut impl DigestUpdate) {
137        state.update(&(self.len() as u64).to_le_bytes());
138        let mut kv = self.iter().collect_vec();
139        kv.sort_unstable_by_key(|&(k, _)| k);
140        for (k, v) in kv {
141            k.hash(state);
142            v.hash(state);
143        }
144    }
145}
146
147impl<K> ContentHash for std::collections::HashSet<K>
148where
149    K: ContentHash + Ord,
150{
151    fn hash(&self, state: &mut impl DigestUpdate) {
152        state.update(&(self.len() as u64).to_le_bytes());
153        for k in self.iter().sorted() {
154            k.hash(state);
155        }
156    }
157}
158
159impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
160where
161    K: ContentHash,
162    V: ContentHash,
163{
164    fn hash(&self, state: &mut impl DigestUpdate) {
165        state.update(&(self.len() as u64).to_le_bytes());
166        for (k, v) in self {
167            k.hash(state);
168            v.hash(state);
169        }
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use std::collections::BTreeMap;
176    use std::collections::HashMap;
177
178    use super::*;
179    use crate::hex_util;
180
181    #[test]
182    fn test_string_sanity() {
183        let a = "a".to_string();
184        let b = "b".to_string();
185        assert_eq!(hash(&a), hash(&a.clone()));
186        assert_ne!(hash(&a), hash(&b));
187        assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string()));
188    }
189
190    #[test]
191    fn test_hash_map_key_value_distinction() {
192        let a = [("ab".to_string(), "cd".to_string())]
193            .into_iter()
194            .collect::<HashMap<_, _>>();
195        let b = [("a".to_string(), "bcd".to_string())]
196            .into_iter()
197            .collect::<HashMap<_, _>>();
198
199        assert_ne!(hash(&a), hash(&b));
200    }
201
202    #[test]
203    fn test_btree_map_key_value_distinction() {
204        let a = [("ab".to_string(), "cd".to_string())]
205            .into_iter()
206            .collect::<BTreeMap<_, _>>();
207        let b = [("a".to_string(), "bcd".to_string())]
208            .into_iter()
209            .collect::<BTreeMap<_, _>>();
210
211        assert_ne!(hash(&a), hash(&b));
212    }
213
214    #[test]
215    fn test_tuple_sanity() {
216        #[derive(ContentHash)]
217        struct T1(i32);
218        #[derive(ContentHash)]
219        struct T2(i32, i32);
220        #[derive(ContentHash)]
221        struct T3(i32, i32, i32);
222        #[derive(ContentHash)]
223        struct T4(i32, i32, i32, i32);
224        assert_eq!(hash(&T1(0)), hash(&(0,)));
225        assert_eq!(hash(&T2(0, 1)), hash(&(0, 1)));
226        assert_eq!(hash(&T3(0, 1, 2)), hash(&(0, 1, 2)));
227        assert_eq!(hash(&T4(0, 1, 2, 3)), hash(&(0, 1, 2, 3)));
228    }
229
230    #[test]
231    fn test_struct_sanity() {
232        #[derive(ContentHash)]
233        struct Foo {
234            x: i32,
235        }
236        assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 }));
237    }
238
239    #[test]
240    fn test_option_sanity() {
241        assert_ne!(hash(&Some(42)), hash(&42));
242        assert_ne!(hash(&None::<i32>), hash(&42i32));
243    }
244
245    #[test]
246    fn test_slice_sanity() {
247        assert_ne!(hash(&[42i32][..]), hash(&[12i32][..]));
248        assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..]));
249        assert_ne!(hash(&([] as [i32; 0])[..]), hash(&()));
250        assert_ne!(hash(&42i32), hash(&[42i32][..]));
251    }
252
253    #[test]
254    fn test_consistent_hashing() {
255        #[derive(ContentHash)]
256        struct Foo {
257            x: Vec<Option<i32>>,
258            y: i64,
259        }
260        let foo_hash = hex_util::encode_hex(&hash(&Foo {
261            x: vec![None, Some(42)],
262            y: 17,
263        }));
264        insta::assert_snapshot!(
265            foo_hash,
266            @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4"
267        );
268
269        // Try again with an equivalent generic struct deriving ContentHash.
270        #[derive(ContentHash)]
271        struct GenericFoo<X, Y> {
272            x: X,
273            y: Y,
274        }
275        assert_eq!(
276            hex_util::encode_hex(&hash(&GenericFoo {
277                x: vec![None, Some(42)],
278                y: 17i64
279            })),
280            foo_hash
281        );
282    }
283
284    // Test that the derived version of `ContentHash` matches the that's
285    // manually implemented for `std::Option`.
286    #[test]
287    fn derive_for_enum() {
288        #[derive(ContentHash)]
289        enum MyOption<T> {
290            None,
291            Some(T),
292        }
293        assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None));
294        assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1)));
295    }
296
297    fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
298        blake2b_hash(x)
299    }
300}