multiset_hash/lib.rs
1use curve25519_dalek::{ristretto::RistrettoPoint, scalar::Scalar};
2use digest::{
3 consts::{U32, U64},
4 generic_array::GenericArray,
5 Digest, FixedOutput, Reset, Update,
6};
7
8/// RistrettoHash represents a hash function for multi-sets.
9///
10/// A multi-set is a collection of objects, where any given object may appear
11/// multiple times. This hash function will accept the objects in different
12/// orders, and with different groupings, but always return the same result
13/// for the same multi-set.
14///
15/// This allows incrementally calculating such a multi-set hash without keeping
16/// the entire set in memory.
17///
18/// This struct is parameterized by a hash function with 512 bits of output.
19/// For example, SHA-512.
20///
21/// This is called `RistrettoHash`, because this function internally uses
22/// the Ristretto group to implement its commutative hashing.
23///
24/// # Examples
25///
26/// In the usual case, you add in different byte slices, with multiplicity For example,
27/// to hash the set `{cat, cat, dog, dog}`, you could do:
28///
29/// ```
30/// # use multiset_hash::RistrettoHash;
31/// use digest::Digest;
32/// use sha2::Sha512;
33///
34/// let mut hash = RistrettoHash::<Sha512>::default();
35/// hash.add(b"cat", 2);
36/// hash.add(b"dog", 2);
37/// ```
38///
39/// This is equivalent to hashing the same number of objects, with a different
40/// grouping and order:
41///
42/// ```
43/// # use multiset_hash::RistrettoHash;
44/// # use sha2::Sha512;
45/// use digest::Digest;
46///
47/// let mut hash = RistrettoHash::<Sha512>::default();
48/// hash.add(b"dog", 1);
49/// hash.add(b"cat", 1);
50/// hash.add(b"cat", 1);
51/// hash.add(b"dog", 1);
52/// ```
53///
54/// You can also hash objects in multiple pieces using the `update` method:
55///
56/// ```
57/// # use multiset_hash::RistrettoHash;
58/// # use sha2::Sha512;
59/// use digest::Digest;
60///
61/// let mut hash = RistrettoHash::<Sha512>::default();
62/// hash.update(b"d");
63/// hash.update(b"og");
64/// hash.end_update(2);
65/// hash.add(b"cat", 2);
66/// ```
67///
68/// If you use `update`, you must call `end_update` before adding another object,
69/// or calling `finalize` to get the output of the hash function.
70#[derive(Clone, Default)]
71pub struct RistrettoHash<H> {
72 hash: H,
73 // This flag will get set after we call update, and indicates that
74 // we need to finish that update with an explicit call to `end_update`.
75 updating: bool,
76 acc: RistrettoPoint,
77}
78
79impl<H: Digest<OutputSize = U64> + Default> RistrettoHash<H> {
80 /// This function adds a complete object to the hash.
81 ///
82 /// This function takes a multiplicity, which is equivalent to calling the function
83 /// multiple times, but is much more efficient.
84 pub fn add(&mut self, data: impl AsRef<[u8]>, multiplicity: u64) {
85 if self.updating {
86 panic!("add called before end_update");
87 }
88 self.hash.update(data);
89 self.end_update(multiplicity);
90 }
91
92 /// This function should be called to mark the end of an object provided with `update`.
93 ///
94 /// This must always be called after calls to `update`, otherwise panics will happen
95 /// when finalizing or adding new objects.
96 ///
97 /// If called without any prior calls to `update`, this function is equivalent
98 /// to calling `add` with an empty slice.
99 pub fn end_update(&mut self, multiplicity: u64) {
100 self.updating = false;
101
102 let old = std::mem::replace(&mut self.hash, H::default());
103 let h_point = RistrettoPoint::from_hash(old);
104 self.acc += Scalar::from(multiplicity) * h_point;
105 }
106}
107
108impl<H: Reset> FixedOutput for RistrettoHash<H> {
109 type OutputSize = U32;
110
111 fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
112 if self.updating {
113 panic!("end_update not called before finalizing");
114 }
115 out.copy_from_slice(&self.acc.compress().as_bytes()[..]);
116 }
117
118 fn finalize_into_reset(&mut self, out: &mut GenericArray<u8, Self::OutputSize>) {
119 if self.updating {
120 panic!("end_update not called before finalizing");
121 }
122 out.copy_from_slice(&self.acc.compress().as_bytes()[..]);
123 self.reset();
124 }
125}
126
127impl<H: Reset> Reset for RistrettoHash<H> {
128 fn reset(&mut self) {
129 self.hash.reset();
130 self.updating = false;
131 self.acc = RistrettoPoint::default();
132 }
133}
134
135impl<H: Update> Update for RistrettoHash<H> {
136 /// update hashes in part of an object.
137 ///
138 /// This method is used to hash an object in multiple different parts.
139 /// These updates must be finished by calling `end_update` to mark the end
140 /// of the object, and its multiplicity.
141 ///
142 /// Failing to call `end_update` before adding a new object with `add` or
143 /// finalizing the hash will panic.
144 fn update(&mut self, data: impl AsRef<[u8]>) {
145 self.updating = true;
146 self.hash.update(data);
147 }
148}
149
150#[cfg(test)]
151mod test {
152 use sha2::Sha512;
153
154 use super::RistrettoHash;
155 use digest::Digest;
156
157 #[test]
158 fn test_add_with_multiplicity() {
159 let data = b"test data";
160
161 let mut hash1 = RistrettoHash::<Sha512>::default();
162 let mut hash2 = hash1.clone();
163
164 hash1.add(data, 3);
165 hash2.add(data, 1);
166 hash2.add(data, 1);
167 hash2.add(data, 1);
168
169 let output1 = hash1.finalize();
170 let output2 = hash2.finalize();
171 assert_eq!(output1, output2)
172 }
173
174 #[test]
175 fn test_hash_commutative() {
176 let data_a = b"test data A";
177 let data_b = b"test data B";
178
179 let mut hash1 = RistrettoHash::<Sha512>::default();
180 let mut hash2 = hash1.clone();
181
182 hash1.add(data_a, 1);
183 hash1.add(data_b, 1);
184
185 hash2.add(data_b, 1);
186 hash2.add(data_a, 1);
187
188 let output1 = hash1.finalize();
189 let output2 = hash2.finalize();
190 assert_eq!(output1, output2)
191 }
192
193 #[test]
194 fn test_partial_updates() {
195 let mut hash1 = RistrettoHash::<Sha512>::default();
196 let mut hash2 = hash1.clone();
197
198 hash1.add("the full data", 3);
199 hash2.update("the");
200 hash2.update(" full");
201 hash2.update(" data");
202 hash2.end_update(3);
203
204 let output1 = hash1.finalize();
205 let output2 = hash2.finalize();
206 assert_eq!(output1, output2)
207 }
208
209 #[test]
210 #[should_panic]
211 fn test_add_before_end_update_panics() {
212 let mut hash = RistrettoHash::<Sha512>::default();
213 hash.update("some data");
214 hash.add("more data", 1);
215 }
216
217 #[test]
218 #[should_panic]
219 fn test_finalize_before_end_update_panics() {
220 let mut hash = RistrettoHash::<Sha512>::default();
221 hash.update("some data");
222 hash.finalize();
223 }
224}