1use blake2::Blake2b512;
4pub use digest::Update as DigestUpdate;
7use itertools::Itertools as _;
8pub use jj_lib_proc_macros::ContentHash;
9
10pub trait ContentHash {
20 fn hash(&self, state: &mut impl DigestUpdate);
22}
23
24pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
26 use digest::Digest as _;
27 let mut hasher = Blake2b512::default();
28 x.hash(&mut hasher);
29 hasher.finalize()
30}
31
32impl ContentHash for () {
33 fn hash(&self, _: &mut impl DigestUpdate) {}
34}
35
36macro_rules! tuple_impls {
37 ($( ( $($n:tt $T:ident),+ ) )+) => {
38 $(
39 impl<$($T: ContentHash,)+> ContentHash for ($($T,)+) {
40 fn hash(&self, state: &mut impl DigestUpdate) {
41 $(self.$n.hash(state);)+
42 }
43 }
44 )+
45 }
46}
47
48tuple_impls! {
49 (0 T0)
50 (0 T0, 1 T1)
51 (0 T0, 1 T1, 2 T2)
52 (0 T0, 1 T1, 2 T2, 3 T3)
53}
54
55impl ContentHash for bool {
56 fn hash(&self, state: &mut impl DigestUpdate) {
57 u8::from(*self).hash(state);
58 }
59}
60
61impl ContentHash for u8 {
62 fn hash(&self, state: &mut impl DigestUpdate) {
63 state.update(&[*self]);
64 }
65}
66
67impl ContentHash for u32 {
68 fn hash(&self, state: &mut impl DigestUpdate) {
69 state.update(&self.to_le_bytes());
70 }
71}
72
73impl ContentHash for i32 {
74 fn hash(&self, state: &mut impl DigestUpdate) {
75 state.update(&self.to_le_bytes());
76 }
77}
78
79impl ContentHash for u64 {
80 fn hash(&self, state: &mut impl DigestUpdate) {
81 state.update(&self.to_le_bytes());
82 }
83}
84
85impl ContentHash for i64 {
86 fn hash(&self, state: &mut impl DigestUpdate) {
87 state.update(&self.to_le_bytes());
88 }
89}
90
91impl<T: ContentHash> ContentHash for [T] {
93 fn hash(&self, state: &mut impl DigestUpdate) {
94 state.update(&(self.len() as u64).to_le_bytes());
95 for x in self {
96 x.hash(state);
97 }
98 }
99}
100
101impl<T: ContentHash> ContentHash for Vec<T> {
102 fn hash(&self, state: &mut impl DigestUpdate) {
103 self.as_slice().hash(state);
104 }
105}
106
107impl ContentHash for str {
108 fn hash(&self, state: &mut impl DigestUpdate) {
109 self.as_bytes().hash(state);
110 }
111}
112
113impl ContentHash for String {
114 fn hash(&self, state: &mut impl DigestUpdate) {
115 self.as_str().hash(state);
116 }
117}
118
119impl<T: ContentHash> ContentHash for Option<T> {
120 fn hash(&self, state: &mut impl DigestUpdate) {
121 match self {
122 None => state.update(&0u32.to_le_bytes()),
123 Some(x) => {
124 state.update(&1u32.to_le_bytes());
125 x.hash(state);
126 }
127 }
128 }
129}
130
131impl<K, V> ContentHash for std::collections::HashMap<K, V>
132where
133 K: ContentHash + Ord,
134 V: ContentHash,
135{
136 fn hash(&self, state: &mut impl DigestUpdate) {
137 state.update(&(self.len() as u64).to_le_bytes());
138 let mut kv = self.iter().collect_vec();
139 kv.sort_unstable_by_key(|&(k, _)| k);
140 for (k, v) in kv {
141 k.hash(state);
142 v.hash(state);
143 }
144 }
145}
146
147impl<K> ContentHash for std::collections::HashSet<K>
148where
149 K: ContentHash + Ord,
150{
151 fn hash(&self, state: &mut impl DigestUpdate) {
152 state.update(&(self.len() as u64).to_le_bytes());
153 for k in self.iter().sorted() {
154 k.hash(state);
155 }
156 }
157}
158
159impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
160where
161 K: ContentHash,
162 V: ContentHash,
163{
164 fn hash(&self, state: &mut impl DigestUpdate) {
165 state.update(&(self.len() as u64).to_le_bytes());
166 for (k, v) in self {
167 k.hash(state);
168 v.hash(state);
169 }
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use std::collections::BTreeMap;
176 use std::collections::HashMap;
177
178 use super::*;
179 use crate::hex_util;
180
181 #[test]
182 fn test_string_sanity() {
183 let a = "a".to_string();
184 let b = "b".to_string();
185 assert_eq!(hash(&a), hash(&a.clone()));
186 assert_ne!(hash(&a), hash(&b));
187 assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string()));
188 }
189
190 #[test]
191 fn test_hash_map_key_value_distinction() {
192 let a = [("ab".to_string(), "cd".to_string())]
193 .into_iter()
194 .collect::<HashMap<_, _>>();
195 let b = [("a".to_string(), "bcd".to_string())]
196 .into_iter()
197 .collect::<HashMap<_, _>>();
198
199 assert_ne!(hash(&a), hash(&b));
200 }
201
202 #[test]
203 fn test_btree_map_key_value_distinction() {
204 let a = [("ab".to_string(), "cd".to_string())]
205 .into_iter()
206 .collect::<BTreeMap<_, _>>();
207 let b = [("a".to_string(), "bcd".to_string())]
208 .into_iter()
209 .collect::<BTreeMap<_, _>>();
210
211 assert_ne!(hash(&a), hash(&b));
212 }
213
214 #[test]
215 fn test_tuple_sanity() {
216 #[derive(ContentHash)]
217 struct T1(i32);
218 #[derive(ContentHash)]
219 struct T2(i32, i32);
220 #[derive(ContentHash)]
221 struct T3(i32, i32, i32);
222 #[derive(ContentHash)]
223 struct T4(i32, i32, i32, i32);
224 assert_eq!(hash(&T1(0)), hash(&(0,)));
225 assert_eq!(hash(&T2(0, 1)), hash(&(0, 1)));
226 assert_eq!(hash(&T3(0, 1, 2)), hash(&(0, 1, 2)));
227 assert_eq!(hash(&T4(0, 1, 2, 3)), hash(&(0, 1, 2, 3)));
228 }
229
230 #[test]
231 fn test_struct_sanity() {
232 #[derive(ContentHash)]
233 struct Foo {
234 x: i32,
235 }
236 assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 }));
237 }
238
239 #[test]
240 fn test_option_sanity() {
241 assert_ne!(hash(&Some(42)), hash(&42));
242 assert_ne!(hash(&None::<i32>), hash(&42i32));
243 }
244
245 #[test]
246 fn test_slice_sanity() {
247 assert_ne!(hash(&[42i32][..]), hash(&[12i32][..]));
248 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..]));
249 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&()));
250 assert_ne!(hash(&42i32), hash(&[42i32][..]));
251 }
252
253 #[test]
254 fn test_consistent_hashing() {
255 #[derive(ContentHash)]
256 struct Foo {
257 x: Vec<Option<i32>>,
258 y: i64,
259 }
260 let foo_hash = hex_util::encode_hex(&hash(&Foo {
261 x: vec![None, Some(42)],
262 y: 17,
263 }));
264 insta::assert_snapshot!(
265 foo_hash,
266 @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4"
267 );
268
269 #[derive(ContentHash)]
271 struct GenericFoo<X, Y> {
272 x: X,
273 y: Y,
274 }
275 assert_eq!(
276 hex_util::encode_hex(&hash(&GenericFoo {
277 x: vec![None, Some(42)],
278 y: 17i64
279 })),
280 foo_hash
281 );
282 }
283
284 #[test]
287 fn derive_for_enum() {
288 #[derive(ContentHash)]
289 enum MyOption<T> {
290 None,
291 Some(T),
292 }
293 assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None));
294 assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1)));
295 }
296
297 fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
298 blake2b_hash(x)
299 }
300}