kaspa_muhash/
lib.rs

1// Make u3072 public if we're fuzzing
2#[cfg(fuzzing)]
3pub mod u3072;
4#[cfg(not(fuzzing))]
5mod u3072;
6
7use crate::u3072::U3072;
8use kaspa_hashes::{Hash, Hasher, HasherBase, MuHashElementHash, MuHashFinalizeHash};
9use kaspa_math::Uint3072;
10use rand_chacha::rand_core::{RngCore, SeedableRng};
11use rand_chacha::ChaCha20Rng;
12use serde::{Deserialize, Serialize};
13use std::error::Error;
14use std::fmt::Display;
15
16pub const HASH_SIZE: usize = 32;
17pub const SERIALIZED_MUHASH_SIZE: usize = ELEMENT_BYTE_SIZE;
18// The hash of `NewMuHash().Finalize()`
19pub const EMPTY_MUHASH: Hash = Hash::from_bytes([
20    0x54, 0x4e, 0xb3, 0x14, 0x2c, 0x0, 0xf, 0xa, 0xd2, 0xc7, 0x6a, 0xc4, 0x1f, 0x42, 0x22, 0xab, 0xba, 0xba, 0xbe, 0xd8, 0x30, 0xee,
21    0xaf, 0xee, 0x4b, 0x6d, 0xc5, 0x6b, 0x52, 0xd5, 0xca, 0xc0,
22]);
23
24pub(crate) const ELEMENT_BIT_SIZE: usize = 3072;
25pub(crate) const ELEMENT_BYTE_SIZE: usize = ELEMENT_BIT_SIZE / 8;
26
27/// MuHash is a type used to create a Multiplicative Hash
28/// which is a rolling(homomorphic) hash that you can add and remove elements from
29/// and receive the same resulting hash as-if you never hashed them.
30/// Because of that the order of adding and removing elements doesn't matter.
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct MuHash {
33    numerator: U3072,
34    denominator: U3072,
35}
36
37#[derive(Debug, PartialEq, Eq)]
38pub struct OverflowError;
39
40impl Display for OverflowError {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        write!(f, "Overflow in the MuHash field")
43    }
44}
45
46impl Error for OverflowError {}
47
48impl MuHash {
49    #[inline]
50    /// return an empty initialized set.
51    /// when finalized it should be equal to a finalized set with all elements removed.
52    pub fn new() -> Self {
53        Self { numerator: U3072::one(), denominator: U3072::one() }
54    }
55
56    #[inline]
57    // hashes the data and adds it to the muhash.
58    // Supports arbitrary length data (subject to the underlying hash function(Blake2b) limits)
59    pub fn add_element(&mut self, data: &[u8]) {
60        let element = data_to_element(data);
61        self.numerator *= element;
62    }
63
64    #[inline]
65    // hashes the data and removes it from the muhash.
66    // Supports arbitrary length data (subject to the underlying hash function(Blake2b) limits)
67    pub fn remove_element(&mut self, data: &[u8]) {
68        let element = data_to_element(data);
69        self.denominator *= element;
70    }
71
72    #[inline]
73    // returns a hasher for hashing data which on `finalize` adds the finalized hash to the muhash.
74    pub fn add_element_builder(&mut self) -> MuHashElementBuilder {
75        MuHashElementBuilder::new(&mut self.numerator)
76    }
77
78    #[inline]
79    // returns a hasher for hashing data which on `finalize` removes the finalized hash from the muhash.
80    pub fn remove_element_builder(&mut self) -> MuHashElementBuilder {
81        MuHashElementBuilder::new(&mut self.denominator)
82    }
83
84    #[inline]
85    // will add the MuHash together. Equivalent to manually adding all the data elements
86    // from one set to the other.
87    pub fn combine(&mut self, other: &Self) {
88        self.numerator *= other.numerator;
89        self.denominator *= other.denominator;
90    }
91
92    #[inline]
93    pub fn finalize(&mut self) -> Hash {
94        let serialized = self.serialize();
95        MuHashFinalizeHash::hash(serialized)
96    }
97
98    #[inline]
99    fn normalize(&mut self) {
100        self.numerator /= self.denominator;
101        self.denominator = U3072::one();
102    }
103
104    #[inline]
105    pub fn serialize(&mut self) -> [u8; SERIALIZED_MUHASH_SIZE] {
106        self.normalize();
107        self.numerator.to_le_bytes()
108    }
109
110    #[inline]
111    pub fn deserialize(data: [u8; SERIALIZED_MUHASH_SIZE]) -> Result<Self, OverflowError> {
112        let numerator = U3072::from_le_bytes(data);
113        if numerator.is_overflow() {
114            Err(OverflowError)
115        } else {
116            Ok(Self { numerator, denominator: U3072::one() })
117        }
118    }
119}
120
121#[derive(Debug)]
122pub enum MuHashError {
123    NonNormalizedValue,
124}
125
126impl TryFrom<MuHash> for Uint3072 {
127    type Error = MuHashError;
128
129    fn try_from(value: MuHash) -> Result<Self, Self::Error> {
130        if value.denominator == U3072::one() {
131            Ok(value.numerator.into())
132        } else {
133            Err(MuHashError::NonNormalizedValue)
134        }
135    }
136}
137
138impl From<Uint3072> for MuHash {
139    fn from(u: Uint3072) -> Self {
140        MuHash { numerator: u.into(), denominator: U3072::one() }
141    }
142}
143
144pub struct MuHashElementBuilder<'a> {
145    muhash_field: &'a mut U3072,
146    element_hasher: MuHashElementHash,
147}
148
149impl<'a> HasherBase for MuHashElementBuilder<'a> {
150    fn update<A: AsRef<[u8]>>(&mut self, data: A) -> &mut Self {
151        self.element_hasher.write(data);
152        self
153    }
154}
155
156impl<'a> MuHashElementBuilder<'a> {
157    pub fn new(muhash_field: &'a mut U3072) -> Self {
158        Self { muhash_field, element_hasher: MuHashElementHash::new() }
159    }
160
161    pub fn finalize(self) {
162        let hash = self.element_hasher.finalize();
163        let mut stream = ChaCha20Rng::from_seed(hash.as_bytes());
164        let mut bytes = [0u8; ELEMENT_BYTE_SIZE];
165        stream.fill_bytes(&mut bytes);
166        *self.muhash_field *= U3072::from_le_bytes(bytes);
167    }
168}
169
170#[inline]
171fn data_to_element(data: &[u8]) -> U3072 {
172    let hash = MuHashElementHash::hash(data);
173    let mut stream = ChaCha20Rng::from_seed(hash.as_bytes());
174    let mut bytes = [0u8; ELEMENT_BYTE_SIZE];
175    stream.fill_bytes(&mut bytes);
176    U3072::from_le_bytes(bytes)
177}
178
179impl Default for MuHash {
180    #[inline]
181    fn default() -> Self {
182        Self::new()
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use crate::OverflowError;
189    use crate::{MuHash, EMPTY_MUHASH, U3072};
190    use kaspa_hashes::Hash;
191    use rand::{Rng, SeedableRng};
192    use rand_chacha::ChaCha8Rng;
193
194    struct TestVector {
195        data: &'static [u8],
196        multiset_hash: Hash,
197        cumulative_hash: Hash,
198    }
199
200    const TEST_VECTORS: [TestVector; 3] = [
201        TestVector {
202            data: &[
203                152, 32, 81, 253, 30, 75, 167, 68, 187, 190, 104, 14, 31, 238, 20, 103, 123, 161, 163, 195, 84, 11, 247, 177, 205,
204                182, 6, 232, 87, 35, 62, 14, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 242, 5, 42, 1, 0, 0, 0, 67, 65, 4, 150, 181, 56, 232, 83,
205                81, 156, 114, 106, 44, 145, 230, 30, 193, 22, 0, 174, 19, 144, 129, 58, 98, 124, 102, 251, 139, 231, 148, 123, 230,
206                60, 82, 218, 117, 137, 55, 149, 21, 212, 224, 166, 4, 248, 20, 23, 129, 230, 34, 148, 114, 17, 102, 191, 98, 30, 115,
207                168, 44, 191, 35, 66, 200, 88, 238, 172,
208            ],
209            multiset_hash: Hash::from_bytes([
210                44, 55, 150, 32, 253, 244, 236, 10, 194, 83, 203, 228, 186, 130, 194, 187, 220, 15, 237, 172, 127, 224, 228, 82, 149,
211                125, 147, 117, 123, 191, 245, 193,
212            ]),
213            cumulative_hash: Hash::from_bytes([
214                44, 55, 150, 32, 253, 244, 236, 10, 194, 83, 203, 228, 186, 130, 194, 187, 220, 15, 237, 172, 127, 224, 228, 82, 149,
215                125, 147, 117, 123, 191, 245, 193,
216            ]),
217        },
218        TestVector {
219            data: &[
220                213, 253, 204, 84, 30, 37, 222, 28, 122, 90, 221, 237, 242, 72, 88, 184, 187, 102, 92, 159, 54, 239, 116, 78, 228, 44,
221                49, 96, 34, 201, 15, 155, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 242, 5, 42, 1, 0, 0, 0, 67, 65, 4, 114, 17, 168, 36, 245, 91,
222                80, 82, 40, 228, 195, 213, 25, 76, 31, 207, 170, 21, 164, 86, 171, 223, 55, 249, 185, 217, 122, 64, 64, 175, 192, 115,
223                222, 230, 200, 144, 100, 152, 79, 3, 56, 82, 55, 217, 33, 103, 193, 62, 35, 100, 70, 180, 23, 171, 121, 160, 252, 174,
224                65, 42, 227, 49, 107, 119, 172,
225            ],
226            multiset_hash: Hash::from_bytes([
227                102, 139, 178, 146, 239, 21, 44, 84, 219, 15, 87, 20, 191, 69, 255, 141, 167, 177, 212, 28, 12, 80, 38, 173, 101, 91,
228                47, 158, 27, 230, 126, 33,
229            ]),
230            cumulative_hash: Hash::from_bytes([
231                177, 91, 209, 18, 74, 107, 82, 230, 78, 218, 60, 48, 35, 197, 135, 228, 85, 167, 158, 116, 140, 140, 149, 77, 215, 65,
232                29, 13, 189, 151, 56, 99,
233            ]),
234        },
235        TestVector {
236            data: &[
237                68, 246, 114, 34, 96, 144, 216, 93, 185, 169, 242, 251, 254, 95, 15, 150, 9, 179, 135, 175, 123, 229, 183, 251, 183,
238                161, 118, 124, 131, 28, 158, 153, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 242, 5, 42, 1, 0, 0, 0, 67, 65, 4, 148, 185, 211, 231,
239                108, 91, 22, 41, 236, 249, 127, 255, 149, 215, 164, 187, 218, 200, 124, 194, 96, 153, 173, 162, 128, 102, 198, 255,
240                30, 185, 25, 18, 35, 205, 137, 113, 148, 160, 141, 12, 39, 38, 197, 116, 127, 29, 180, 158, 140, 249, 14, 117, 220,
241                62, 53, 80, 174, 155, 48, 8, 111, 60, 213, 170, 172,
242            ],
243            multiset_hash: Hash::from_bytes([
244                244, 11, 32, 189, 196, 62, 242, 240, 26, 23, 59, 118, 124, 185, 198, 184, 219, 86, 2, 235, 83, 95, 203, 152, 39, 56,
245                95, 155, 14, 58, 250, 244,
246            ]),
247            cumulative_hash: Hash::from_bytes([
248                230, 156, 110, 5, 4, 16, 118, 22, 72, 206, 98, 118, 168, 28, 128, 68, 185, 239, 177, 113, 94, 166, 246, 251, 159, 140,
249                247, 168, 193, 232, 3, 150,
250            ]),
251        },
252    ];
253
254    fn element_from_byte(b: u8) -> [u8; 32] {
255        let mut out = [0u8; 32];
256        out[0] = b;
257        out
258    }
259
260    #[test]
261    fn test_random_muhash_arithmetic() {
262        let mut rng = ChaCha8Rng::seed_from_u64(1);
263        for _ in 0..10 {
264            let mut res = Hash::default();
265            let mut table = [0u8; 4];
266            rng.fill(&mut table[..]);
267
268            for order in 0..4 {
269                let mut acc = MuHash::new();
270                for i in 0..4 {
271                    let t = table[i ^ order];
272                    if (t & 4) != 0 {
273                        acc.remove_element(&element_from_byte(t & 3));
274                    } else {
275                        acc.add_element(&element_from_byte(t & 3));
276                    }
277                }
278                let out = acc.finalize();
279                if order == 0 {
280                    res = out;
281                } else {
282                    assert_eq!(res, out);
283                }
284            }
285            let x = element_from_byte(rng.gen()); // x=X
286            let y = element_from_byte(rng.gen()); // x=X, y=Y
287            let mut z = MuHash::new(); // x=X, y=X, z=1
288            let mut yx = MuHash::new(); // x=X, y=Y, z=1 yx=1
289            yx.add_element(&y); // x=X, y=X, z=1, yx=Y
290            yx.add_element(&x); // x=X, y=X, z=1, yx=Y*X
291            yx.normalize();
292            z.add_element(&x); // x=X, y=Y, z=X, yx=Y*X
293            z.add_element(&y); // x=X, y=Y, z=X*Y, yx = Y*X
294            z.denominator *= yx.numerator; // x=X, y=Y, z=1, yx=Y*X
295            assert_eq!(z.finalize(), EMPTY_MUHASH);
296        }
297    }
298
299    #[test]
300    fn test_empty_hash() {
301        let mut empty = MuHash::new();
302        assert_eq!(empty.finalize(), EMPTY_MUHASH);
303    }
304
305    #[test]
306    fn test_new_pre_computed() {
307        let expected = "b557f7cfc13cf9abc31374832715e7bff2cf5859897523337a0ead9dde012974";
308        let mut acc = MuHash::new();
309        acc.add_element(&element_from_byte(0));
310        acc.add_element(&element_from_byte(1));
311        acc.remove_element(&element_from_byte(2));
312        assert_eq!(acc.finalize().to_string(), expected);
313    }
314
315    #[test]
316    fn test_serialize() {
317        let expected = [
318            50, 5, 73, 166, 198, 210, 31, 202, 37, 64, 219, 222, 57, 158, 121, 89, 67, 188, 211, 73, 217, 251, 250, 178, 135, 196, 39,
319            250, 122, 202, 56, 228, 146, 233, 249, 16, 68, 9, 255, 158, 152, 84, 168, 146, 121, 81, 181, 60, 96, 141, 114, 26, 127,
320            140, 164, 90, 87, 187, 24, 4, 187, 151, 135, 91, 9, 249, 103, 124, 91, 55, 72, 202, 43, 241, 196, 243, 201, 237, 141, 158,
321            166, 125, 185, 26, 201, 232, 80, 72, 3, 7, 248, 152, 116, 148, 44, 250, 108, 167, 175, 61, 128, 159, 48, 148, 28, 247, 22,
322            158, 40, 130, 41, 154, 93, 184, 199, 177, 0, 170, 212, 159, 61, 233, 131, 243, 16, 17, 246, 132, 114, 31, 155, 37, 25, 97,
323            107, 11, 100, 17, 23, 61, 12, 218, 176, 129, 173, 148, 221, 6, 152, 157, 112, 106, 90, 5, 215, 0, 133, 133, 41, 241, 217,
324            237, 6, 202, 106, 252, 196, 244, 209, 141, 220, 236, 40, 221, 219, 122, 222, 96, 27, 189, 60, 69, 150, 124, 29, 78, 206,
325            249, 146, 179, 191, 11, 187, 178, 48, 114, 127, 155, 74, 137, 140, 109, 182, 88, 192, 120, 71, 141, 197, 93, 178, 179,
326            254, 252, 167, 251, 245, 77, 112, 186, 216, 30, 239, 147, 168, 67, 89, 96, 14, 102, 165, 187, 163, 232, 51, 77, 117, 134,
327            160, 254, 89, 201, 57, 113, 76, 137, 99, 101, 233, 35, 46, 213, 124, 38, 247, 12, 125, 203, 220, 54, 114, 68, 242, 192,
328            107, 216, 226, 140, 66, 78, 65, 166, 255, 4, 2, 89, 247, 184, 204, 145, 54, 105, 210, 209, 195, 248, 63, 207, 199, 218,
329            253, 92, 150, 190, 212, 216, 23, 121, 18, 14, 27, 35, 191, 203, 50, 238, 10, 190, 192, 47, 210, 100, 58, 38, 201, 103,
330            199, 59, 32, 72, 37, 221, 104, 87, 120, 222, 61, 144, 107, 107, 114, 27, 152, 88, 232, 113, 97, 184, 69, 116, 17, 59, 245,
331            151, 99, 140, 167, 85, 47, 28, 51, 198, 140, 233, 21, 92, 211, 79, 1, 68, 217, 131, 37, 19, 5, 107, 51, 219, 141, 109,
332            155, 196, 183, 148, 16, 113, 227, 141, 202, 215, 191, 50, 241, 244,
333        ];
334
335        let mut check = MuHash::new();
336        check.add_element(&element_from_byte(1));
337        check.add_element(&element_from_byte(2));
338        let ser = check.serialize();
339        assert_eq!(ser, expected);
340
341        let mut deserialized = MuHash::deserialize(ser).unwrap();
342        assert_eq!(deserialized.finalize(), check.finalize());
343        let overflow = [255; 384];
344        assert_eq!(MuHash::deserialize(overflow).unwrap_err(), OverflowError);
345
346        let mut zeroed = MuHash::new();
347        zeroed.numerator *= U3072::zero();
348        assert_eq!(zeroed.serialize(), [0u8; 384]);
349
350        let mut deserialized = MuHash::deserialize(zeroed.serialize()).unwrap();
351        zeroed.normalize();
352        deserialized.normalize();
353        assert_eq!(zeroed.numerator, deserialized.numerator);
354    }
355
356    #[test]
357    fn test_vectors_hash() {
358        for test in TEST_VECTORS {
359            let mut m = MuHash::new();
360            m.add_element(test.data);
361            assert_eq!(m.finalize(), test.multiset_hash);
362        }
363    }
364    #[test]
365    fn test_vectors_add_remove() {
366        let mut m = MuHash::new();
367
368        for test in TEST_VECTORS {
369            m.add_element(test.data);
370            assert_eq!(m.finalize(), test.cumulative_hash);
371        }
372
373        for (i, test) in TEST_VECTORS.iter().enumerate().rev() {
374            m.remove_element(test.data);
375            if i != 0 {
376                assert_eq!(m.finalize(), TEST_VECTORS[i - 1].cumulative_hash);
377            }
378        }
379        assert_eq!(m.finalize(), EMPTY_MUHASH);
380    }
381
382    #[test]
383    fn test_vectors_combine_subtract() {
384        let mut m1 = MuHash::new();
385        let mut m2 = MuHash::new();
386        for test in TEST_VECTORS {
387            m1.add_element(test.data);
388            m2.remove_element(test.data);
389        }
390        let m1_orig = m1.clone();
391        m1.combine(&m2);
392        m2.combine(&m1_orig);
393        assert_eq!(m1.finalize(), m2.finalize());
394        assert_eq!(m1.finalize(), EMPTY_MUHASH);
395    }
396
397    #[test]
398    fn test_vectors_commutativity() {
399        // Here we first remove an element from an empty multiset, and then add some other
400        // elements, and then we create a new empty multiset, then we add the same elements
401        // we added to the previous multiset, and then we remove the same element we remove
402        // the same element we removed from the previous multiset. According to commutativity
403        // laws, the result should be the same.
404        for (remove_index, _) in TEST_VECTORS.iter().enumerate() {
405            let remove_data = TEST_VECTORS[remove_index].data;
406            let mut m1 = MuHash::new();
407            let mut m2 = MuHash::new();
408            m1.remove_element(remove_data);
409            for (i, test) in TEST_VECTORS.iter().enumerate() {
410                if i != remove_index {
411                    m1.add_element(test.data);
412                    m2.add_element(test.data);
413                }
414            }
415            m2.remove_element(remove_data);
416            assert_eq!(m1.finalize(), m2.finalize());
417        }
418    }
419
420    #[test]
421    fn test_parse_muhash_fail() {
422        let mut serialized = [255; 384];
423        serialized[0..3].copy_from_slice(&[155, 40, 239]);
424
425        assert_eq!(MuHash::deserialize(serialized).unwrap_err(), OverflowError);
426
427        serialized[0] = 0;
428        let _ = MuHash::deserialize(serialized).unwrap();
429    }
430
431    #[test]
432    fn test_muhash_add_remove() {
433        const LOOPS: usize = 1024;
434        let mut rng = ChaCha8Rng::seed_from_u64(42);
435        let mut set = MuHash::new();
436        let list: Vec<_> = (0..LOOPS)
437            .map(|_| {
438                let mut data = [0u8; 100];
439                rng.fill(&mut data[..]);
440                set.add_element(&data);
441                data
442            })
443            .collect();
444
445        assert_ne!(set.finalize(), EMPTY_MUHASH);
446
447        for elem in list.iter() {
448            set.remove_element(elem);
449        }
450
451        assert_eq!(set.finalize(), EMPTY_MUHASH);
452    }
453}