vrf_wasm/
hash.rs

1// Copyright (c) 2022, Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4//! This module contains a selection of cryptographic hash functions implementing a common [HashFunction] trait.
5//!
6//! # Example
7//! ```
8//! # use vrf_wasm::*;
9//! let digest1 = Sha256::digest(b"Hello, world!");
10//!
11//! let mut hash_function = Sha256::default();
12//! hash_function.update(b"Hello, ");
13//! hash_function.update(b"world!");
14//! let digest2 = hash_function.finalize();
15//!
16//! assert_eq!(digest1, digest2);
17//! ```
18
19use core::fmt::Debug;
20use digest::OutputSizeUser;
21use generic_array::GenericArray;
22use serde::{Deserialize, Serialize};
23use serde_with::serde_as;
24use std::fmt;
25
26use crate::encoding::{Base64, Encoding};
27use crate::groups::ristretto255::RistrettoPoint;
28use crate::groups::HashToGroupElement;
29
30/// Represents a digest of `DIGEST_LEN` bytes.
31#[serde_as]
32#[derive(Hash, PartialEq, Eq, Clone, Serialize, Deserialize, Ord, PartialOrd, Copy)]
33pub struct Digest<const DIGEST_LEN: usize> {
34    #[serde_as(as = "[_; DIGEST_LEN]")]
35    pub digest: [u8; DIGEST_LEN],
36}
37
38impl<const DIGEST_LEN: usize> Digest<DIGEST_LEN> {
39    /// Create a new digest containing the given bytes
40    pub fn new(digest: [u8; DIGEST_LEN]) -> Self {
41        Digest { digest }
42    }
43
44    /// Copy the digest into a new vector.
45    pub fn to_vec(&self) -> Vec<u8> {
46        self.digest.to_vec()
47    }
48
49    /// The size of this digest in bytes.
50    pub fn size(&self) -> usize {
51        DIGEST_LEN
52    }
53}
54
55impl<const DIGEST_LEN: usize> fmt::Debug for Digest<DIGEST_LEN> {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
57        write!(f, "{}", Base64::encode(self.digest))
58    }
59}
60
61impl<const DIGEST_LEN: usize> fmt::Display for Digest<DIGEST_LEN> {
62    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
63        write!(f, "{}", Base64::encode(self.digest))
64    }
65}
66
67impl<const DIGEST_LEN: usize> AsRef<[u8]> for Digest<DIGEST_LEN> {
68    fn as_ref(&self) -> &[u8] {
69        self.digest.as_ref()
70    }
71}
72
73impl<const DIGEST_LEN: usize> From<Digest<DIGEST_LEN>> for [u8; DIGEST_LEN] {
74    fn from(digest: Digest<DIGEST_LEN>) -> Self {
75        digest.digest
76    }
77}
78
79/// Trait implemented by hash functions providing a output of fixed length
80pub trait HashFunction<const DIGEST_LENGTH: usize>: Default {
81    /// The length of this hash functions digests in bytes.
82    const OUTPUT_SIZE: usize = DIGEST_LENGTH;
83
84    /// Create a new hash function of the given type
85    fn new() -> Self {
86        Self::default()
87    }
88
89    /// Process the given data, and update the internal of the hash function.
90    fn update<Data: AsRef<[u8]>>(&mut self, data: Data);
91
92    /// Retrieve result and consume hash function.
93    fn finalize(self) -> Digest<DIGEST_LENGTH>;
94
95    /// Compute the digest of the given data and consume the hash function.
96    fn digest<Data: AsRef<[u8]>>(data: Data) -> Digest<DIGEST_LENGTH> {
97        let mut h = Self::default();
98        h.update(data);
99        h.finalize()
100    }
101
102    /// Compute a single digest from all slices in the iterator in order and consume the hash function.
103    fn digest_iterator<K: AsRef<[u8]>, I: Iterator<Item = K>>(iter: I) -> Digest<DIGEST_LENGTH> {
104        let mut h = Self::default();
105        iter.for_each(|item| h.update(item));
106        h.finalize()
107    }
108}
109
110/// This trait is implemented by all messages that can be hashed.
111pub trait Hash<const DIGEST_LEN: usize> {
112    /// The type of the digest when this is hashed.
113    type TypedDigest: Into<Digest<DIGEST_LEN>> + Eq + std::hash::Hash + Copy + Debug;
114
115    fn digest(&self) -> Self::TypedDigest;
116}
117
118/// This wraps a [digest::Digest] as a [HashFunction].
119#[derive(Default)]
120pub struct HashFunctionWrapper<Variant, const DIGEST_LEN: usize>(Variant);
121
122/// This trait allows using a [HashFunctionWrapper] where a [digest::Digest] was expected.
123pub trait ReverseWrapper {
124    type Variant: digest::core_api::CoreProxy + OutputSizeUser;
125}
126
127impl<Variant: digest::core_api::CoreProxy + OutputSizeUser, const DIGEST_LEN: usize> ReverseWrapper
128    for HashFunctionWrapper<Variant, DIGEST_LEN>
129{
130    type Variant = Variant;
131}
132
133impl<Variant: digest::Digest + Default, const DIGEST_LEN: usize> HashFunction<DIGEST_LEN>
134    for HashFunctionWrapper<Variant, DIGEST_LEN>
135{
136    fn update<Data: AsRef<[u8]>>(&mut self, data: Data) {
137        self.0.update(data);
138    }
139
140    fn finalize(self) -> Digest<DIGEST_LEN> {
141        let mut digest = [0u8; DIGEST_LEN];
142        self.0
143            .finalize_into(GenericArray::from_mut_slice(&mut digest));
144        Digest { digest }
145    }
146}
147
148// Impl std::io::Write for HashFunctionWrapper. Needed for compatibility in Sui.
149impl<Variant: digest::Digest + Default, const DIGEST_LEN: usize> std::io::Write
150    for HashFunctionWrapper<Variant, DIGEST_LEN>
151{
152    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
153        self.update(buf);
154        Ok(buf.len())
155    }
156
157    fn flush(&mut self) -> std::io::Result<()> {
158        Ok(())
159    }
160}
161
162/// The [SHA-2](https://en.wikipedia.org/wiki/SHA-2) hash function with 256 bit digests.
163pub type Sha256 = HashFunctionWrapper<sha2::Sha256, 32>;
164
165/// The [SHA-3](https://en.wikipedia.org/wiki/SHA-3) hash function with 256 bit digests.
166pub type Sha3_256 = HashFunctionWrapper<sha3::Sha3_256, 32>;
167
168/// The [SHA-512](https://en.wikipedia.org/wiki/SHA-2) hash function with 512 bit digests.
169pub type Sha512 = HashFunctionWrapper<sha2::Sha512, 64>;
170
171/// The [SHA-3](https://en.wikipedia.org/wiki/SHA-3) hash function with 512 bit digests.
172pub type Sha3_512 = HashFunctionWrapper<sha3::Sha3_512, 64>;
173
174/// The [KECCAK](https://keccak.team/files/Keccak-reference-3.0.pdf) hash function with 256 bit digests.
175pub type Keccak256 = HashFunctionWrapper<sha3::Keccak256, 32>;
176
177// /// The [BLAKE2-256](https://en.wikipedia.org/wiki/BLAKE_(hash_function)#BLAKE2) hash function with 256 bit digests.
178// pub type Blake2b256 = HashFunctionWrapper<blake2::Blake2b<typenum::U32>, 32>;
179
180/// A Multiset Hash is a homomorphic hash function, which hashes arbitrary multisets of objects such
181/// that the hash of the union of two multisets is easy to compute from the hashes of the two multisets.
182///
183/// The hash may be computed incrementally, adding items one at a time, and the order does not affect the
184/// result. The hash of two multisets can be compared by using the Eq trait impl'd for the given hash function,
185/// and the hash function should be collision resistant. Items may also be removed again.
186///
187/// See ["Incremental Multiset Hash Functions and Their Application to Memory Integrity Checking" by D. Clarke
188/// et al.](https://link.springer.com/chapter/10.1007/978-3-540-40061-5_12) for a discussion of this type of hash
189/// functions.
190///
191/// # Example
192/// ```
193/// use vrf_wasm::{EllipticCurveMultisetHash, MultisetHash};
194///
195/// let mut hash1 = EllipticCurveMultisetHash::default();
196/// hash1.insert(b"Hello");
197/// hash1.insert(b"World");
198///
199/// let mut hash2 = EllipticCurveMultisetHash::default();
200/// hash2.insert(b"World");
201/// hash2.insert(b"Hello");
202///
203/// assert_eq!(hash1, hash2);
204/// assert_eq!(hash1.digest(), hash2.digest());
205/// ```
206pub trait MultisetHash<const DIGEST_LENGTH: usize>: Eq {
207    /// Insert an item into this hash function.
208    fn insert<Data: AsRef<[u8]>>(&mut self, item: Data);
209
210    /// Insert multiple items into this hash function.
211    fn insert_all<It, Data>(&mut self, items: It)
212    where
213        It: IntoIterator<Item = Data>,
214        Data: AsRef<[u8]>;
215
216    /// Add all the elements of another hash function into this hash function.
217    fn union(&mut self, other: &Self);
218
219    // Note that the "remove" operation is safe even if an item has been removed
220    // more times than it has been inserted. To see why, consider the following
221    // example: Suppose an adversary has performed two sets of "insert(x)" and
222    // "remove(x)" operations resulting in the same hash, i.e., the sum of each set
223    // is \sum_x m_x H(x), where m_x is the difference between the number of times
224    // "x" was inserted and removed.
225    // Then, one can create two new sets with the same hash by taking the original
226    // sets and subtracting m_x H(x) from both sets for every item "x" such that m_x
227    // was negative in any of the original sets. Since we "subtract" (or actually
228    // insert) the same elements from both sets, the resulting hash will remain the
229    // same. Moreover, since none of the values of m_x in the new sets are negative,
230    // we can conclude that no item was removed more times than it was inserted in
231    // the new sets.
232    /// Remove an element from this hash function.
233    fn remove<Data: AsRef<[u8]>>(&mut self, item: Data);
234
235    /// Remove multiple items from this hash function.
236    fn remove_all<It, Data>(&mut self, items: It)
237    where
238        It: IntoIterator<Item = Data>,
239        Data: AsRef<[u8]>;
240
241    /// Generate a digest of the current state of this hash function.
242    fn digest(&self) -> Digest<DIGEST_LENGTH>;
243}
244
245/// `EllipticCurveMultisetHash` (ECMH) is a homomorphic multiset hash function. Concretely, each element is mapped
246/// to a point on an elliptic curve on which the DL problem is hard (the Ristretto group in Curve25519),
247/// and the hash is the sum of all such points.
248///
249/// For more information about the construction of ECMH and its security, see ["Elliptic Curve Multiset Hash" by J.
250/// Maitin-Shepard et al.](https://arxiv.org/abs/1601.06502).
251///
252/// Under the hood, it uses an Ristretto-flavoured Elligator 2 map to map a Sha512 hash of the provided
253/// data into points in the Ristretto group, and Sha256 to construct a digest from a serialization of
254/// the resulting RistrettoPoint, so digests are 32 bytes long.
255#[derive(Default, Clone, Serialize, Deserialize)]
256pub struct EllipticCurveMultisetHash {
257    accumulator: RistrettoPoint,
258}
259
260impl PartialEq for EllipticCurveMultisetHash {
261    fn eq(&self, other: &Self) -> bool {
262        self.accumulator == other.accumulator
263    }
264}
265
266impl Eq for EllipticCurveMultisetHash {}
267
268impl MultisetHash<32> for EllipticCurveMultisetHash {
269    fn insert<Data: AsRef<[u8]>>(&mut self, item: Data) {
270        self.accumulator += Self::hash_to_point(item);
271    }
272
273    fn insert_all<It, Data>(&mut self, items: It)
274    where
275        It: IntoIterator<Item = Data>,
276        Data: AsRef<[u8]>,
277    {
278        for i in items {
279            self.insert(i);
280        }
281    }
282
283    fn union(&mut self, other: &Self) {
284        self.accumulator += other.accumulator;
285    }
286
287    fn remove<Data: AsRef<[u8]>>(&mut self, item: Data) {
288        self.accumulator -= Self::hash_to_point(item);
289    }
290
291    fn remove_all<It, Data>(&mut self, items: It)
292    where
293        It: IntoIterator<Item = Data>,
294        Data: AsRef<[u8]>,
295    {
296        for i in items {
297            self.remove(i);
298        }
299    }
300
301    fn digest(&self) -> Digest<32> {
302        let serialized = &bincode::serialize(&self.accumulator).unwrap();
303        Sha256::digest(serialized)
304    }
305}
306
307impl EllipticCurveMultisetHash {
308    /// Hash the given item into a RistrettoPoint to be used by the insert and remove methods.
309    fn hash_to_point<Data: AsRef<[u8]>>(item: Data) -> RistrettoPoint {
310        RistrettoPoint::hash_to_group_element(item.as_ref())
311    }
312}
313
314impl Debug for EllipticCurveMultisetHash {
315    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316        f.debug_struct("Accumulator").finish()
317    }
318}