vrf_wasm/hash.rs
1// Copyright (c) 2022, Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4//! This module contains a selection of cryptographic hash functions implementing a common [HashFunction] trait.
5//!
6//! # Example
7//! ```
8//! # use vrf_wasm::*;
9//! let digest1 = Sha256::digest(b"Hello, world!");
10//!
11//! let mut hash_function = Sha256::default();
12//! hash_function.update(b"Hello, ");
13//! hash_function.update(b"world!");
14//! let digest2 = hash_function.finalize();
15//!
16//! assert_eq!(digest1, digest2);
17//! ```
18
19use core::fmt::Debug;
20use digest::OutputSizeUser;
21use generic_array::GenericArray;
22use serde::{Deserialize, Serialize};
23use serde_with::serde_as;
24use std::fmt;
25
26use crate::encoding::{Base64, Encoding};
27use crate::groups::ristretto255::RistrettoPoint;
28use crate::groups::HashToGroupElement;
29
30/// Represents a digest of `DIGEST_LEN` bytes.
31#[serde_as]
32#[derive(Hash, PartialEq, Eq, Clone, Serialize, Deserialize, Ord, PartialOrd, Copy)]
33pub struct Digest<const DIGEST_LEN: usize> {
34 #[serde_as(as = "[_; DIGEST_LEN]")]
35 pub digest: [u8; DIGEST_LEN],
36}
37
38impl<const DIGEST_LEN: usize> Digest<DIGEST_LEN> {
39 /// Create a new digest containing the given bytes
40 pub fn new(digest: [u8; DIGEST_LEN]) -> Self {
41 Digest { digest }
42 }
43
44 /// Copy the digest into a new vector.
45 pub fn to_vec(&self) -> Vec<u8> {
46 self.digest.to_vec()
47 }
48
49 /// The size of this digest in bytes.
50 pub fn size(&self) -> usize {
51 DIGEST_LEN
52 }
53}
54
55impl<const DIGEST_LEN: usize> fmt::Debug for Digest<DIGEST_LEN> {
56 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
57 write!(f, "{}", Base64::encode(self.digest))
58 }
59}
60
61impl<const DIGEST_LEN: usize> fmt::Display for Digest<DIGEST_LEN> {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
63 write!(f, "{}", Base64::encode(self.digest))
64 }
65}
66
67impl<const DIGEST_LEN: usize> AsRef<[u8]> for Digest<DIGEST_LEN> {
68 fn as_ref(&self) -> &[u8] {
69 self.digest.as_ref()
70 }
71}
72
73impl<const DIGEST_LEN: usize> From<Digest<DIGEST_LEN>> for [u8; DIGEST_LEN] {
74 fn from(digest: Digest<DIGEST_LEN>) -> Self {
75 digest.digest
76 }
77}
78
79/// Trait implemented by hash functions providing a output of fixed length
80pub trait HashFunction<const DIGEST_LENGTH: usize>: Default {
81 /// The length of this hash functions digests in bytes.
82 const OUTPUT_SIZE: usize = DIGEST_LENGTH;
83
84 /// Create a new hash function of the given type
85 fn new() -> Self {
86 Self::default()
87 }
88
89 /// Process the given data, and update the internal of the hash function.
90 fn update<Data: AsRef<[u8]>>(&mut self, data: Data);
91
92 /// Retrieve result and consume hash function.
93 fn finalize(self) -> Digest<DIGEST_LENGTH>;
94
95 /// Compute the digest of the given data and consume the hash function.
96 fn digest<Data: AsRef<[u8]>>(data: Data) -> Digest<DIGEST_LENGTH> {
97 let mut h = Self::default();
98 h.update(data);
99 h.finalize()
100 }
101
102 /// Compute a single digest from all slices in the iterator in order and consume the hash function.
103 fn digest_iterator<K: AsRef<[u8]>, I: Iterator<Item = K>>(iter: I) -> Digest<DIGEST_LENGTH> {
104 let mut h = Self::default();
105 iter.for_each(|item| h.update(item));
106 h.finalize()
107 }
108}
109
110/// This trait is implemented by all messages that can be hashed.
111pub trait Hash<const DIGEST_LEN: usize> {
112 /// The type of the digest when this is hashed.
113 type TypedDigest: Into<Digest<DIGEST_LEN>> + Eq + std::hash::Hash + Copy + Debug;
114
115 fn digest(&self) -> Self::TypedDigest;
116}
117
118/// This wraps a [digest::Digest] as a [HashFunction].
119#[derive(Default)]
120pub struct HashFunctionWrapper<Variant, const DIGEST_LEN: usize>(Variant);
121
122/// This trait allows using a [HashFunctionWrapper] where a [digest::Digest] was expected.
123pub trait ReverseWrapper {
124 type Variant: digest::core_api::CoreProxy + OutputSizeUser;
125}
126
127impl<Variant: digest::core_api::CoreProxy + OutputSizeUser, const DIGEST_LEN: usize> ReverseWrapper
128 for HashFunctionWrapper<Variant, DIGEST_LEN>
129{
130 type Variant = Variant;
131}
132
133impl<Variant: digest::Digest + Default, const DIGEST_LEN: usize> HashFunction<DIGEST_LEN>
134 for HashFunctionWrapper<Variant, DIGEST_LEN>
135{
136 fn update<Data: AsRef<[u8]>>(&mut self, data: Data) {
137 self.0.update(data);
138 }
139
140 fn finalize(self) -> Digest<DIGEST_LEN> {
141 let mut digest = [0u8; DIGEST_LEN];
142 self.0
143 .finalize_into(GenericArray::from_mut_slice(&mut digest));
144 Digest { digest }
145 }
146}
147
148// Impl std::io::Write for HashFunctionWrapper. Needed for compatibility in Sui.
149impl<Variant: digest::Digest + Default, const DIGEST_LEN: usize> std::io::Write
150 for HashFunctionWrapper<Variant, DIGEST_LEN>
151{
152 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
153 self.update(buf);
154 Ok(buf.len())
155 }
156
157 fn flush(&mut self) -> std::io::Result<()> {
158 Ok(())
159 }
160}
161
162/// The [SHA-2](https://en.wikipedia.org/wiki/SHA-2) hash function with 256 bit digests.
163pub type Sha256 = HashFunctionWrapper<sha2::Sha256, 32>;
164
165/// The [SHA-3](https://en.wikipedia.org/wiki/SHA-3) hash function with 256 bit digests.
166pub type Sha3_256 = HashFunctionWrapper<sha3::Sha3_256, 32>;
167
168/// The [SHA-512](https://en.wikipedia.org/wiki/SHA-2) hash function with 512 bit digests.
169pub type Sha512 = HashFunctionWrapper<sha2::Sha512, 64>;
170
171/// The [SHA-3](https://en.wikipedia.org/wiki/SHA-3) hash function with 512 bit digests.
172pub type Sha3_512 = HashFunctionWrapper<sha3::Sha3_512, 64>;
173
174/// The [KECCAK](https://keccak.team/files/Keccak-reference-3.0.pdf) hash function with 256 bit digests.
175pub type Keccak256 = HashFunctionWrapper<sha3::Keccak256, 32>;
176
177// /// The [BLAKE2-256](https://en.wikipedia.org/wiki/BLAKE_(hash_function)#BLAKE2) hash function with 256 bit digests.
178// pub type Blake2b256 = HashFunctionWrapper<blake2::Blake2b<typenum::U32>, 32>;
179
180/// A Multiset Hash is a homomorphic hash function, which hashes arbitrary multisets of objects such
181/// that the hash of the union of two multisets is easy to compute from the hashes of the two multisets.
182///
183/// The hash may be computed incrementally, adding items one at a time, and the order does not affect the
184/// result. The hash of two multisets can be compared by using the Eq trait impl'd for the given hash function,
185/// and the hash function should be collision resistant. Items may also be removed again.
186///
187/// See ["Incremental Multiset Hash Functions and Their Application to Memory Integrity Checking" by D. Clarke
188/// et al.](https://link.springer.com/chapter/10.1007/978-3-540-40061-5_12) for a discussion of this type of hash
189/// functions.
190///
191/// # Example
192/// ```
193/// use vrf_wasm::{EllipticCurveMultisetHash, MultisetHash};
194///
195/// let mut hash1 = EllipticCurveMultisetHash::default();
196/// hash1.insert(b"Hello");
197/// hash1.insert(b"World");
198///
199/// let mut hash2 = EllipticCurveMultisetHash::default();
200/// hash2.insert(b"World");
201/// hash2.insert(b"Hello");
202///
203/// assert_eq!(hash1, hash2);
204/// assert_eq!(hash1.digest(), hash2.digest());
205/// ```
206pub trait MultisetHash<const DIGEST_LENGTH: usize>: Eq {
207 /// Insert an item into this hash function.
208 fn insert<Data: AsRef<[u8]>>(&mut self, item: Data);
209
210 /// Insert multiple items into this hash function.
211 fn insert_all<It, Data>(&mut self, items: It)
212 where
213 It: IntoIterator<Item = Data>,
214 Data: AsRef<[u8]>;
215
216 /// Add all the elements of another hash function into this hash function.
217 fn union(&mut self, other: &Self);
218
219 // Note that the "remove" operation is safe even if an item has been removed
220 // more times than it has been inserted. To see why, consider the following
221 // example: Suppose an adversary has performed two sets of "insert(x)" and
222 // "remove(x)" operations resulting in the same hash, i.e., the sum of each set
223 // is \sum_x m_x H(x), where m_x is the difference between the number of times
224 // "x" was inserted and removed.
225 // Then, one can create two new sets with the same hash by taking the original
226 // sets and subtracting m_x H(x) from both sets for every item "x" such that m_x
227 // was negative in any of the original sets. Since we "subtract" (or actually
228 // insert) the same elements from both sets, the resulting hash will remain the
229 // same. Moreover, since none of the values of m_x in the new sets are negative,
230 // we can conclude that no item was removed more times than it was inserted in
231 // the new sets.
232 /// Remove an element from this hash function.
233 fn remove<Data: AsRef<[u8]>>(&mut self, item: Data);
234
235 /// Remove multiple items from this hash function.
236 fn remove_all<It, Data>(&mut self, items: It)
237 where
238 It: IntoIterator<Item = Data>,
239 Data: AsRef<[u8]>;
240
241 /// Generate a digest of the current state of this hash function.
242 fn digest(&self) -> Digest<DIGEST_LENGTH>;
243}
244
245/// `EllipticCurveMultisetHash` (ECMH) is a homomorphic multiset hash function. Concretely, each element is mapped
246/// to a point on an elliptic curve on which the DL problem is hard (the Ristretto group in Curve25519),
247/// and the hash is the sum of all such points.
248///
249/// For more information about the construction of ECMH and its security, see ["Elliptic Curve Multiset Hash" by J.
250/// Maitin-Shepard et al.](https://arxiv.org/abs/1601.06502).
251///
252/// Under the hood, it uses an Ristretto-flavoured Elligator 2 map to map a Sha512 hash of the provided
253/// data into points in the Ristretto group, and Sha256 to construct a digest from a serialization of
254/// the resulting RistrettoPoint, so digests are 32 bytes long.
255#[derive(Default, Clone, Serialize, Deserialize)]
256pub struct EllipticCurveMultisetHash {
257 accumulator: RistrettoPoint,
258}
259
260impl PartialEq for EllipticCurveMultisetHash {
261 fn eq(&self, other: &Self) -> bool {
262 self.accumulator == other.accumulator
263 }
264}
265
266impl Eq for EllipticCurveMultisetHash {}
267
268impl MultisetHash<32> for EllipticCurveMultisetHash {
269 fn insert<Data: AsRef<[u8]>>(&mut self, item: Data) {
270 self.accumulator += Self::hash_to_point(item);
271 }
272
273 fn insert_all<It, Data>(&mut self, items: It)
274 where
275 It: IntoIterator<Item = Data>,
276 Data: AsRef<[u8]>,
277 {
278 for i in items {
279 self.insert(i);
280 }
281 }
282
283 fn union(&mut self, other: &Self) {
284 self.accumulator += other.accumulator;
285 }
286
287 fn remove<Data: AsRef<[u8]>>(&mut self, item: Data) {
288 self.accumulator -= Self::hash_to_point(item);
289 }
290
291 fn remove_all<It, Data>(&mut self, items: It)
292 where
293 It: IntoIterator<Item = Data>,
294 Data: AsRef<[u8]>,
295 {
296 for i in items {
297 self.remove(i);
298 }
299 }
300
301 fn digest(&self) -> Digest<32> {
302 let serialized = &bincode::serialize(&self.accumulator).unwrap();
303 Sha256::digest(serialized)
304 }
305}
306
307impl EllipticCurveMultisetHash {
308 /// Hash the given item into a RistrettoPoint to be used by the insert and remove methods.
309 fn hash_to_point<Data: AsRef<[u8]>>(item: Data) -> RistrettoPoint {
310 RistrettoPoint::hash_to_group_element(item.as_ref())
311 }
312}
313
314impl Debug for EllipticCurveMultisetHash {
315 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316 f.debug_struct("Accumulator").finish()
317 }
318}