stable_hash/
lib.rs

1//! This crate provides a stable, structured hash with backward compatibility features.
2//! What does that mean?
3//!  * Stable: The value of the hash will not change across minor versions of this library,
4//!    even when the compiler, process, architecture, or std lib does change.
5//!  * Structured: Hashes structs, rather than streams
6//!  * Backward compatibility: It is possible to make limited changes to a struct's schema
7//!    without changing the value of the hash. One change is that you can add new fields.
8//!    This is accomplished by skipping default values when hashing. For example, the values
9//!    None, 0, false, and vec![] do not contribute to the hash. Therefore structs Old { a: 1 }
10//!    and New { a: 1, b: None } hash to the same value. Another feature enabling backward compatibility
11//!    is that the size of an integer can be increased without changing the value. For example,
12//!    Old { a: 1u16 } and New { a: 1u32 } hash to the same value. Note that even though two structs
13//!    with different schemas are allowed to collide, two structs with the same schema never collide
14//!    (where collide is defined as contribution to the hash is injective in respect to the encoding. It is
15//!    still possible to find collisions in the final output, especially for the non-cryptographic version)
16
17pub mod crypto;
18pub mod fast;
19mod impls;
20mod macros;
21pub mod prelude;
22pub mod utils;
23mod verification;
24use prelude::*;
25
26/// Like Hasher, but consistent across:
27/// * builds (independent of rustc version or std implementation details)
28/// * platforms (eg: 32 bit & 64 bit, x68 and ARM)
29/// * processes (multiple runs of the same program)
30pub trait StableHasher {
31    /// The type of value returned when finishing
32    type Out;
33
34    /// The type used when identifying where a value is located in a struct
35    type Addr: FieldAddress;
36
37    /// Create an empty hasher
38    fn new() -> Self;
39
40    /// Add a single field to the hash
41    fn write(&mut self, field_address: Self::Addr, bytes: &[u8]);
42
43    /// Adds all fields from another hasher
44    fn mixin(&mut self, other: &Self);
45
46    /// Removes all fields from another hasher
47    fn unmix(&mut self, _other: &Self) {
48        unimplemented!()
49    }
50
51    /// Finalize the digest
52    fn finish(&self) -> Self::Out;
53
54    /// Used when serializing
55    type Bytes: AsRef<[u8]>;
56
57    /// Serialize
58    fn to_bytes(&self) -> Self::Bytes;
59
60    /// Deserialize
61    fn from_bytes(bytes: Self::Bytes) -> Self;
62}
63
64/// Like Hash, but consistent across:
65/// * builds (independent of rustc version or std implementation details)
66/// * platforms (eg: 32 bit & 64 bit, x68 and ARM)
67/// * processes (multiple runs of the same program)
68///
69/// For examples of best practices when implementing:
70/// See also d3ba3adc-6e9b-4586-a7e7-6b542df39462
71pub trait StableHash {
72    fn stable_hash<H: StableHasher>(&self, field_address: H::Addr, state: &mut H);
73}
74
75/// Tracks the path from the root of a struct to a member value. For example,
76/// within the value vec![ { num: 0, string: "Alice" }, { num: 1, string: "Bob" } ],
77/// the value Alice exists at the path:
78///
79///    FieldAddress::root()  // Vec
80///       .child(0)          // [0]
81///       .child(1)          // .string
82///
83/// Because default values do not contribute to the final hash in order to support
84/// backward compatibility, this concept is necessary to disambiguate in cases where default
85/// values exist in a struct with multiple fields. For example given the struct:
86///
87/// Struct {
88///    a: 0,
89///    b: 1,
90/// }
91///
92/// and
93///
94/// Struct {
95///    a: 1,
96///    b: 0,
97/// }
98///
99/// if we naively write out the struct as a series of fields without addresses each would produce
100/// the following encoded stream (default values omitted):
101///
102/// {1}
103/// {1}
104///
105/// But, with field addresses you get this encoding instead:
106///
107/// {(1, 0)}
108/// {(0, 1)}
109///
110/// which fixes the collision.
111pub trait FieldAddress: Sized {
112    /// The starting value
113    fn root() -> Self;
114    /// A nesting along the path. When calling this function,
115    /// a consistent number should be supplied to identify a given field.
116    /// To maintain backward compatibility, this number should remain consistent
117    /// even as fields are added, removed, or re-ordered.
118    fn child(&self, number: u64) -> Self;
119    /// This one is tricky, as it involves hashing a set online.
120    /// In this case, each member of the set has the same FieldAddress, but work must
121    /// be done to relate multiple field addresses within the same set. The first return
122    /// value is for a member of the set, and the second for relating members within the set.
123    /// This is confusing. Consider the following example _without_ having two values returned
124    /// here:
125    ///
126    /// { ("a", 1), ("b", 2) }
127    /// { ("b", 1), ("a", 2) }
128    ///
129    /// See that these collide unless we do something to "relate" children within different
130    /// members of the same set even while the members must have the same address to keep the
131    /// set unordered.
132    ///
133    /// Understand ye, and despair. The other option was to sort the set. But, this has two
134    /// drawbacks. First, it would require `StableHash: Ord`. (Ironically, unordered sets like
135    /// HashMap and HashSet where this is relevant do not implement Ord... so this is a no-go but
136    /// even if they did we still would prefer to not have this constraint). The second problem
137    /// with sorting is that it is less online. Within the graph-node Proof of Indexing (the flagship
138    /// use-case which this crate was implemented for) we need to be able to add and remove items
139    /// from very large unordered sets without re-sorting and re-calculating everything up to the root.
140    /// This ability becomes necessary when indexing off-chain data sources with unreliable availability.
141    ///
142    /// Please avoid implementing or calling this function unless you know what you are doing.
143    /// See also a817fb02-7c77-41d6-98e4-dee123884287
144    fn unordered(&self) -> (Self, Self);
145}
146
147pub fn fast_stable_hash<T: StableHash>(value: &T) -> u128 {
148    profile_fn!(fast_stable_hash);
149    generic_stable_hash::<T, crate::fast::FastStableHasher>(value)
150}
151
152pub fn crypto_stable_hash<T: StableHash>(value: &T) -> [u8; 32] {
153    profile_fn!(crypto_stable_hash);
154    generic_stable_hash::<T, crate::crypto::CryptoStableHasher>(value)
155}
156
157#[cfg(test)]
158mod tests {
159    use std::fmt::Debug;
160
161    use rand::thread_rng as rng;
162    use rand::Rng as _;
163
164    use crate::crypto::CryptoStableHasher;
165    use crate::fast::FastStableHasher;
166    use crate::StableHasher;
167
168    #[test]
169    fn unmix_fast() {
170        unmix_fuzz(1000, FastStableHasher::rand);
171    }
172
173    #[test]
174    fn unmix_crypto() {
175        unmix_fuzz(30, CryptoStableHasher::rand);
176    }
177
178    fn unmix_fuzz<T, F>(count: u32, f: F)
179    where
180        F: Fn() -> T,
181        T: StableHasher + Eq + Debug + Clone,
182    {
183        let rand_vec = || {
184            let mut v = Vec::new();
185            for _ in 0..rng().gen_range(0..15) {
186                v.push(f());
187            }
188            v
189        };
190        let take_rand = |v: &mut Vec<T>| {
191            if v.len() == 0 {
192                return None;
193            }
194            let i = rng().gen_range(0..v.len());
195            Some(v.swap_remove(i))
196        };
197
198        for _ in 0..count {
199            let mut mixins = rand_vec();
200            let mut mixouts = Vec::<T>::new();
201
202            let mut mixin_only = T::new();
203            let mut complete = T::new();
204
205            while mixins.len() + mixouts.len() > 0 {
206                if rng().gen() {
207                    if let Some(mixin) = take_rand(&mut mixins) {
208                        // Include duplicates sometimes to demonstrate this is a multiset.
209                        if rng().gen_range(0..5) == 0 {
210                            mixins.push(mixin.clone());
211                        }
212                        complete.mixin(&mixin);
213                        if rng().gen() {
214                            mixin_only.mixin(&mixin);
215                        } else {
216                            mixouts.push(mixin);
217                        }
218                    }
219                } else {
220                    if let Some(mixout) = take_rand(&mut mixouts) {
221                        complete.unmix(&mixout);
222                    }
223                }
224            }
225
226            assert_eq!(complete, mixin_only);
227        }
228    }
229}