rustc_stable_hash/
stable_hasher.rs

1//! Stable hasher adapted for cross-platform independent hash.
2
3use std::fmt;
4use std::hash::Hasher;
5
6#[cfg(test)]
7mod tests;
8
9/// Extended [`Hasher`] trait for use with [`StableHasher`].
10///
11/// It permits returning an arbitrary type as the [`Self::Hash`] type
12/// contrary to the [`Hasher`] trait which can only return `u64`. This
13/// is useful when the hasher uses a different representation.
14///
15/// # Example
16///
17/// ```
18/// use std::hash::Hasher;
19/// use rustc_stable_hash::ExtendedHasher;
20///
21/// struct BogusHasher(u128);
22///
23/// impl Hasher for BogusHasher {
24///     fn write(&mut self, a: &[u8]) {
25///         # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0;
26///         // ...
27///     }
28///
29///     fn finish(&self) -> u64 {
30///         self.0 as u64 // really bogus
31///     }
32/// }
33///
34/// impl ExtendedHasher for BogusHasher {
35///     type Hash = u128;
36///
37///     fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
38///         self.write(&bytes)
39///     }
40///
41///     fn finish(self) -> Self::Hash {
42///         self.0
43///     }
44/// }
45/// ```
46pub trait ExtendedHasher: Hasher {
47    /// Type returned by the hasher.
48    type Hash;
49
50    /// Optimized version of [`Hasher::write`] but for small write.
51    fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
52        self.write(&bytes);
53    }
54
55    /// Finalization method of the hasher to return the [`Hash`].
56    fn finish(self) -> Self::Hash;
57}
58
59/// A Stable Hasher adapted for cross-platform independent hash.
60///
61/// When hashing something that ends up affecting properties like symbol names,
62/// we want these symbol names to be calculated independently of other factors
63/// like what architecture you're compiling *from*.
64///
65/// To that end we always convert integers to little-endian format before
66/// hashing and the architecture dependent `isize` and `usize` types are
67/// extended to 64 bits if needed.
68///
69/// # Example
70///
71/// ```
72/// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash};
73/// use rustc_stable_hash::{StableHasher, FromStableHash};
74/// use std::hash::Hasher;
75///
76/// struct Hash128([u64; 2]);
77/// impl FromStableHash for Hash128 {
78///     type Hash = SipHasher128Hash;
79///
80///     fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 {
81///         Hash128(hash)
82///     }
83/// }
84///
85/// let mut hasher = StableSipHasher128::new();
86/// hasher.write_usize(0xFA);
87///
88/// let hash: Hash128 = hasher.finish();
89/// ```
90#[must_use]
91#[derive(Clone)]
92pub struct StableHasher<H: ExtendedHasher> {
93    state: H,
94}
95
96/// Trait for processing the result of the stable hashing operation.
97///
98/// # Example
99///
100/// ```
101/// use rustc_stable_hash::{StableHasher, FromStableHash};
102///
103/// struct Hash128(u128);
104///
105/// impl FromStableHash for Hash128 {
106///     type Hash = [u64; 2];
107///
108///     fn from(hash: [u64; 2]) -> Hash128 {
109///         let upper: u128 = hash[0] as u128;
110///         let lower: u128 = hash[1] as u128;
111///
112///         Hash128((upper << 64) | lower)
113///     }
114/// }
115/// ```
116pub trait FromStableHash: Sized {
117    type Hash;
118
119    /// Convert the finalized state of a [`StableHasher`] and construct
120    /// an [`Self`] containing the processed hash.
121    fn from(hash: Self::Hash) -> Self;
122}
123
124impl<H: ExtendedHasher + Default> StableHasher<H> {
125    /// Creates a new [`StableHasher`].
126    ///
127    /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
128    #[inline]
129    pub fn new() -> Self {
130        Default::default()
131    }
132}
133
134impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
135    /// Creates a new [`StableHasher`].
136    ///
137    /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
138    #[inline]
139    fn default() -> Self {
140        StableHasher {
141            state: Default::default(),
142        }
143    }
144}
145
146impl<H: ExtendedHasher> StableHasher<H> {
147    /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`].
148    ///
149    /// Useful when wanting to initialize a hasher with different parameters/keys.
150    ///
151    /// **Important**: Any use of the hasher before being given to a [`StableHasher`]
152    /// is not covered by this crate guarentees and will make the resulting hash
153    /// NOT platform independent.
154    #[inline]
155    pub fn with_hasher(state: H) -> Self {
156        StableHasher { state }
157    }
158
159    /// Returns the typed-hash value for the values written.
160    ///
161    /// The resulting typed-hash value is constructed from an
162    /// [`FromStableHash`] implemenation.
163    ///
164    /// To be used in-place of [`Hasher::finish`].
165    #[inline]
166    #[must_use]
167    pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W {
168        W::from(self.state.finish())
169    }
170}
171
172impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
173    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174        write!(f, "{:?}", self.state)
175    }
176}
177
178impl<H: ExtendedHasher> Hasher for StableHasher<H> {
179    /// Returns a combined hash.
180    ///
181    /// For greater precision use instead [`StableHasher::finish`].
182    fn finish(&self) -> u64 {
183        Hasher::finish(&self.state)
184    }
185
186    #[inline]
187    fn write(&mut self, bytes: &[u8]) {
188        self.state.write(bytes);
189    }
190
191    #[cfg(feature = "nightly")]
192    #[inline]
193    fn write_str(&mut self, s: &str) {
194        self.state.write_str(s);
195    }
196
197    #[cfg(feature = "nightly")]
198    #[inline]
199    fn write_length_prefix(&mut self, len: usize) {
200        // Our impl for `usize` will extend it if needed.
201        self.write_usize(len);
202    }
203
204    #[inline]
205    fn write_u8(&mut self, i: u8) {
206        self.state.write_u8(i);
207    }
208
209    #[inline]
210    fn write_u16(&mut self, i: u16) {
211        self.state.short_write(i.to_le_bytes());
212    }
213
214    #[inline]
215    fn write_u32(&mut self, i: u32) {
216        self.state.short_write(i.to_le_bytes());
217    }
218
219    #[inline]
220    fn write_u64(&mut self, i: u64) {
221        self.state.short_write(i.to_le_bytes());
222    }
223
224    #[inline]
225    fn write_u128(&mut self, i: u128) {
226        self.write_u64(i as u64);
227        self.write_u64((i >> 64) as u64);
228    }
229
230    #[inline]
231    fn write_usize(&mut self, i: usize) {
232        // Always treat usize as u64 so we get the same results on 32 and 64 bit
233        // platforms. This is important for symbol hashes when cross compiling,
234        // for example.
235        self.state.short_write((i as u64).to_le_bytes());
236    }
237
238    #[inline]
239    fn write_i8(&mut self, i: i8) {
240        self.state.write_i8(i);
241    }
242
243    #[inline]
244    fn write_i16(&mut self, i: i16) {
245        self.state.short_write((i as u16).to_le_bytes());
246    }
247
248    #[inline]
249    fn write_i32(&mut self, i: i32) {
250        self.state.short_write((i as u32).to_le_bytes());
251    }
252
253    #[inline]
254    fn write_i64(&mut self, i: i64) {
255        self.state.short_write((i as u64).to_le_bytes());
256    }
257
258    #[inline]
259    fn write_i128(&mut self, i: i128) {
260        self.state.write(&(i as u128).to_le_bytes());
261    }
262
263    #[inline]
264    fn write_isize(&mut self, i: isize) {
265        // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit
266        // platforms. This is important for symbol hashes when cross compiling,
267        // for example. Sign extending here is preferable as it means that the
268        // same negative number hashes the same on both 32 and 64 bit platforms.
269        let value = i as u64;
270
271        // Cold path
272        #[cold]
273        #[inline(never)]
274        fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
275            state.write_u8(0xFF);
276            state.short_write(value.to_le_bytes());
277        }
278
279        // `isize` values often seem to have a small (positive) numeric value in practice.
280        // To exploit this, if the value is small, we will hash a smaller amount of bytes.
281        // However, we cannot just skip the leading zero bytes, as that would produce the same hash
282        // e.g. if you hash two values that have the same bit pattern when they are swapped.
283        // See https://github.com/rust-lang/rust/pull/93014 for context.
284        //
285        // Therefore, we employ the following strategy:
286        // 1) When we encounter a value that fits within a single byte (the most common case), we
287        // hash just that byte. This is the most common case that is being optimized. However, we do
288        // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
289        // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
290        // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
291        // `isize`s that fit within a different amount of bytes, they should always produce a different
292        // byte stream for the hasher.
293        if value < 0xFF {
294            self.state.write_u8(value as u8);
295        } else {
296            hash_value(&mut self.state, value);
297        }
298    }
299}