stable_hash/lib.rs
1//! This crate provides a stable, structured hash with backward compatibility features.
2//! What does that mean?
3//! * Stable: The value of the hash will not change across minor versions of this library,
4//! even when the compiler, process, architecture, or std lib does change.
5//! * Structured: Hashes structs, rather than streams
6//! * Backward compatibility: It is possible to make limited changes to a struct's schema
7//! without changing the value of the hash. One change is that you can add new fields.
8//! This is accomplished by skipping default values when hashing. For example, the values
9//! None, 0, false, and vec![] do not contribute to the hash. Therefore structs Old { a: 1 }
10//! and New { a: 1, b: None } hash to the same value. Another feature enabling backward compatibility
11//! is that the size of an integer can be increased without changing the value. For example,
12//! Old { a: 1u16 } and New { a: 1u32 } hash to the same value. Note that even though two structs
13//! with different schemas are allowed to collide, two structs with the same schema never collide
14//! (where collide is defined as contribution to the hash is injective in respect to the encoding. It is
15//! still possible to find collisions in the final output, especially for the non-cryptographic version)
16
17pub mod crypto;
18pub mod fast;
19mod impls;
20mod macros;
21pub mod prelude;
22pub mod utils;
23mod verification;
24use prelude::*;
25
26/// Like Hasher, but consistent across:
27/// * builds (independent of rustc version or std implementation details)
28/// * platforms (eg: 32 bit & 64 bit, x68 and ARM)
29/// * processes (multiple runs of the same program)
30pub trait StableHasher {
31 /// The type of value returned when finishing
32 type Out;
33
34 /// The type used when identifying where a value is located in a struct
35 type Addr: FieldAddress;
36
37 /// Create an empty hasher
38 fn new() -> Self;
39
40 /// Add a single field to the hash
41 fn write(&mut self, field_address: Self::Addr, bytes: &[u8]);
42
43 /// Adds all fields from another hasher
44 fn mixin(&mut self, other: &Self);
45
46 /// Removes all fields from another hasher
47 fn unmix(&mut self, _other: &Self) {
48 unimplemented!()
49 }
50
51 /// Finalize the digest
52 fn finish(&self) -> Self::Out;
53
54 /// Used when serializing
55 type Bytes: AsRef<[u8]>;
56
57 /// Serialize
58 fn to_bytes(&self) -> Self::Bytes;
59
60 /// Deserialize
61 fn from_bytes(bytes: Self::Bytes) -> Self;
62}
63
64/// Like Hash, but consistent across:
65/// * builds (independent of rustc version or std implementation details)
66/// * platforms (eg: 32 bit & 64 bit, x68 and ARM)
67/// * processes (multiple runs of the same program)
68///
69/// For examples of best practices when implementing:
70/// See also d3ba3adc-6e9b-4586-a7e7-6b542df39462
71pub trait StableHash {
72 fn stable_hash<H: StableHasher>(&self, field_address: H::Addr, state: &mut H);
73}
74
75/// Tracks the path from the root of a struct to a member value. For example,
76/// within the value vec![ { num: 0, string: "Alice" }, { num: 1, string: "Bob" } ],
77/// the value Alice exists at the path:
78///
79/// FieldAddress::root() // Vec
80/// .child(0) // [0]
81/// .child(1) // .string
82///
83/// Because default values do not contribute to the final hash in order to support
84/// backward compatibility, this concept is necessary to disambiguate in cases where default
85/// values exist in a struct with multiple fields. For example given the struct:
86///
87/// Struct {
88/// a: 0,
89/// b: 1,
90/// }
91///
92/// and
93///
94/// Struct {
95/// a: 1,
96/// b: 0,
97/// }
98///
99/// if we naively write out the struct as a series of fields without addresses each would produce
100/// the following encoded stream (default values omitted):
101///
102/// {1}
103/// {1}
104///
105/// But, with field addresses you get this encoding instead:
106///
107/// {(1, 0)}
108/// {(0, 1)}
109///
110/// which fixes the collision.
111pub trait FieldAddress: Sized {
112 /// The starting value
113 fn root() -> Self;
114 /// A nesting along the path. When calling this function,
115 /// a consistent number should be supplied to identify a given field.
116 /// To maintain backward compatibility, this number should remain consistent
117 /// even as fields are added, removed, or re-ordered.
118 fn child(&self, number: u64) -> Self;
119 /// This one is tricky, as it involves hashing a set online.
120 /// In this case, each member of the set has the same FieldAddress, but work must
121 /// be done to relate multiple field addresses within the same set. The first return
122 /// value is for a member of the set, and the second for relating members within the set.
123 /// This is confusing. Consider the following example _without_ having two values returned
124 /// here:
125 ///
126 /// { ("a", 1), ("b", 2) }
127 /// { ("b", 1), ("a", 2) }
128 ///
129 /// See that these collide unless we do something to "relate" children within different
130 /// members of the same set even while the members must have the same address to keep the
131 /// set unordered.
132 ///
133 /// Understand ye, and despair. The other option was to sort the set. But, this has two
134 /// drawbacks. First, it would require `StableHash: Ord`. (Ironically, unordered sets like
135 /// HashMap and HashSet where this is relevant do not implement Ord... so this is a no-go but
136 /// even if they did we still would prefer to not have this constraint). The second problem
137 /// with sorting is that it is less online. Within the graph-node Proof of Indexing (the flagship
138 /// use-case which this crate was implemented for) we need to be able to add and remove items
139 /// from very large unordered sets without re-sorting and re-calculating everything up to the root.
140 /// This ability becomes necessary when indexing off-chain data sources with unreliable availability.
141 ///
142 /// Please avoid implementing or calling this function unless you know what you are doing.
143 /// See also a817fb02-7c77-41d6-98e4-dee123884287
144 fn unordered(&self) -> (Self, Self);
145}
146
147pub fn fast_stable_hash<T: StableHash>(value: &T) -> u128 {
148 profile_fn!(fast_stable_hash);
149 generic_stable_hash::<T, crate::fast::FastStableHasher>(value)
150}
151
152pub fn crypto_stable_hash<T: StableHash>(value: &T) -> [u8; 32] {
153 profile_fn!(crypto_stable_hash);
154 generic_stable_hash::<T, crate::crypto::CryptoStableHasher>(value)
155}
156
157#[cfg(test)]
158mod tests {
159 use std::fmt::Debug;
160
161 use rand::thread_rng as rng;
162 use rand::Rng as _;
163
164 use crate::crypto::CryptoStableHasher;
165 use crate::fast::FastStableHasher;
166 use crate::StableHasher;
167
168 #[test]
169 fn unmix_fast() {
170 unmix_fuzz(1000, FastStableHasher::rand);
171 }
172
173 #[test]
174 fn unmix_crypto() {
175 unmix_fuzz(30, CryptoStableHasher::rand);
176 }
177
178 fn unmix_fuzz<T, F>(count: u32, f: F)
179 where
180 F: Fn() -> T,
181 T: StableHasher + Eq + Debug + Clone,
182 {
183 let rand_vec = || {
184 let mut v = Vec::new();
185 for _ in 0..rng().gen_range(0..15) {
186 v.push(f());
187 }
188 v
189 };
190 let take_rand = |v: &mut Vec<T>| {
191 if v.len() == 0 {
192 return None;
193 }
194 let i = rng().gen_range(0..v.len());
195 Some(v.swap_remove(i))
196 };
197
198 for _ in 0..count {
199 let mut mixins = rand_vec();
200 let mut mixouts = Vec::<T>::new();
201
202 let mut mixin_only = T::new();
203 let mut complete = T::new();
204
205 while mixins.len() + mixouts.len() > 0 {
206 if rng().gen() {
207 if let Some(mixin) = take_rand(&mut mixins) {
208 // Include duplicates sometimes to demonstrate this is a multiset.
209 if rng().gen_range(0..5) == 0 {
210 mixins.push(mixin.clone());
211 }
212 complete.mixin(&mixin);
213 if rng().gen() {
214 mixin_only.mixin(&mixin);
215 } else {
216 mixouts.push(mixin);
217 }
218 }
219 } else {
220 if let Some(mixout) = take_rand(&mut mixouts) {
221 complete.unmix(&mixout);
222 }
223 }
224 }
225
226 assert_eq!(complete, mixin_only);
227 }
228 }
229}