multiset_hash/
lib.rs

1use curve25519_dalek::{ristretto::RistrettoPoint, scalar::Scalar};
2use digest::{
3    consts::{U32, U64},
4    generic_array::GenericArray,
5    Digest, FixedOutput, Reset, Update,
6};
7
8/// RistrettoHash represents a hash function for multi-sets.
9///
10/// A multi-set is a collection of objects, where any given object may appear
11/// multiple times. This hash function will accept the objects in different
12/// orders, and with different groupings, but always return the same result
13/// for the same multi-set.
14///
15/// This allows incrementally calculating such a multi-set hash without keeping
16/// the entire set in memory.
17///
18/// This struct is parameterized by a hash function with 512 bits of output.
19/// For example, SHA-512.
20///
21/// This is called `RistrettoHash`, because this function internally uses
22/// the Ristretto group to implement its commutative hashing.
23///
24/// # Examples
25///
26/// In the usual case, you add in different byte slices, with multiplicity For example,
27/// to hash the set `{cat, cat, dog, dog}`, you could do:
28///
29/// ```
30/// # use multiset_hash::RistrettoHash;
31/// use digest::Digest;
32/// use sha2::Sha512;
33///
34/// let mut hash = RistrettoHash::<Sha512>::default();
35/// hash.add(b"cat", 2);
36/// hash.add(b"dog", 2);
37/// ```
38///
39/// This is equivalent to hashing the same number of objects, with a different
40/// grouping and order:
41///
42/// ```
43/// # use multiset_hash::RistrettoHash;
44/// # use sha2::Sha512;
45/// use digest::Digest;
46///
47/// let mut hash = RistrettoHash::<Sha512>::default();
48/// hash.add(b"dog", 1);
49/// hash.add(b"cat", 1);
50/// hash.add(b"cat", 1);
51/// hash.add(b"dog", 1);
52/// ```
53///
54/// You can also hash objects in multiple pieces using the `update` method:
55///
56/// ```
57/// # use multiset_hash::RistrettoHash;
58/// # use sha2::Sha512;
59/// use digest::Digest;
60///
61/// let mut hash = RistrettoHash::<Sha512>::default();
62/// hash.update(b"d");
63/// hash.update(b"og");
64/// hash.end_update(2);
65/// hash.add(b"cat", 2);
66/// ```
67///
68/// If you use `update`, you must call `end_update` before adding another object,
69/// or calling `finalize` to get the output of the hash function.
70#[derive(Clone, Default)]
71pub struct RistrettoHash<H> {
72    hash: H,
73    // This flag will get set after we call update, and indicates that
74    // we need to finish that update with an explicit call to `end_update`.
75    updating: bool,
76    acc: RistrettoPoint,
77}
78
79impl<H: Digest<OutputSize = U64> + Default> RistrettoHash<H> {
80    /// This function adds a complete object to the hash.
81    ///
82    /// This function takes a multiplicity, which is equivalent to calling the function
83    /// multiple times, but is much more efficient.
84    pub fn add(&mut self, data: impl AsRef<[u8]>, multiplicity: u64) {
85        if self.updating {
86            panic!("add called before end_update");
87        }
88        self.hash.update(data);
89        self.end_update(multiplicity);
90    }
91
92    /// This function should be called to mark the end of an object provided with `update`.
93    ///
94    /// This must always be called after calls to `update`, otherwise panics will happen
95    /// when finalizing or adding new objects.
96    ///
97    /// If called without any prior calls to `update`, this function is equivalent
98    /// to calling `add` with an empty slice.
99    pub fn end_update(&mut self, multiplicity: u64) {
100        self.updating = false;
101
102        let old = std::mem::replace(&mut self.hash, H::default());
103        let h_point = RistrettoPoint::from_hash(old);
104        self.acc += Scalar::from(multiplicity) * h_point;
105    }
106}
107
108impl<H: Reset> FixedOutput for RistrettoHash<H> {
109    type OutputSize = U32;
110
111    fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
112        if self.updating {
113            panic!("end_update not called before finalizing");
114        }
115        out.copy_from_slice(&self.acc.compress().as_bytes()[..]);
116    }
117
118    fn finalize_into_reset(&mut self, out: &mut GenericArray<u8, Self::OutputSize>) {
119        if self.updating {
120            panic!("end_update not called before finalizing");
121        }
122        out.copy_from_slice(&self.acc.compress().as_bytes()[..]);
123        self.reset();
124    }
125}
126
127impl<H: Reset> Reset for RistrettoHash<H> {
128    fn reset(&mut self) {
129        self.hash.reset();
130        self.updating = false;
131        self.acc = RistrettoPoint::default();
132    }
133}
134
135impl<H: Update> Update for RistrettoHash<H> {
136    /// update hashes in part of an object.
137    ///
138    /// This method is used to hash an object in multiple different parts.
139    /// These updates must be finished by calling `end_update` to mark the end
140    /// of the object, and its multiplicity.
141    ///
142    /// Failing to call `end_update` before adding a new object with `add` or
143    /// finalizing the hash will panic.
144    fn update(&mut self, data: impl AsRef<[u8]>) {
145        self.updating = true;
146        self.hash.update(data);
147    }
148}
149
150#[cfg(test)]
151mod test {
152    use sha2::Sha512;
153
154    use super::RistrettoHash;
155    use digest::Digest;
156
157    #[test]
158    fn test_add_with_multiplicity() {
159        let data = b"test data";
160
161        let mut hash1 = RistrettoHash::<Sha512>::default();
162        let mut hash2 = hash1.clone();
163
164        hash1.add(data, 3);
165        hash2.add(data, 1);
166        hash2.add(data, 1);
167        hash2.add(data, 1);
168
169        let output1 = hash1.finalize();
170        let output2 = hash2.finalize();
171        assert_eq!(output1, output2)
172    }
173
174    #[test]
175    fn test_hash_commutative() {
176        let data_a = b"test data A";
177        let data_b = b"test data B";
178
179        let mut hash1 = RistrettoHash::<Sha512>::default();
180        let mut hash2 = hash1.clone();
181
182        hash1.add(data_a, 1);
183        hash1.add(data_b, 1);
184
185        hash2.add(data_b, 1);
186        hash2.add(data_a, 1);
187
188        let output1 = hash1.finalize();
189        let output2 = hash2.finalize();
190        assert_eq!(output1, output2)
191    }
192
193    #[test]
194    fn test_partial_updates() {
195        let mut hash1 = RistrettoHash::<Sha512>::default();
196        let mut hash2 = hash1.clone();
197
198        hash1.add("the full data", 3);
199        hash2.update("the");
200        hash2.update(" full");
201        hash2.update(" data");
202        hash2.end_update(3);
203
204        let output1 = hash1.finalize();
205        let output2 = hash2.finalize();
206        assert_eq!(output1, output2)
207    }
208
209    #[test]
210    #[should_panic]
211    fn test_add_before_end_update_panics() {
212        let mut hash = RistrettoHash::<Sha512>::default();
213        hash.update("some data");
214        hash.add("more data", 1);
215    }
216
217    #[test]
218    #[should_panic]
219    fn test_finalize_before_end_update_panics() {
220        let mut hash = RistrettoHash::<Sha512>::default();
221        hash.update("some data");
222        hash.finalize();
223    }
224}