1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
//! Stable hasher adapted for cross-platform independent hash.

use std::fmt;
use std::hash::Hasher;

#[cfg(test)]
mod tests;

/// Extended [`Hasher`] trait for use with [`StableHasher`].
///
/// It permits returning an arbitrary type as the [`Self::Hash`] type
/// contrary to the [`Hasher`] trait which can only return `u64`. This
/// is useful when the hasher uses a different representation.
///
/// # Example
///
/// ```
/// use std::hash::Hasher;
/// use rustc_stable_hash::ExtendedHasher;
///
/// struct BogusHasher(u128);
///
/// impl Hasher for BogusHasher {
///     fn write(&mut self, a: &[u8]) {
///         # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0;
///         // ...
///     }
///
///     fn finish(&self) -> u64 {
///         self.0 as u64 // really bogus
///     }
/// }
///
/// impl ExtendedHasher for BogusHasher {
///     type Hash = u128;
///
///     fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
///         self.write(&bytes)
///     }
///
///     fn finish(self) -> Self::Hash {
///         self.0
///     }
/// }
/// ```
pub trait ExtendedHasher: Hasher {
    /// Type returned by the hasher.
    type Hash;

    /// Optimized version of [`Hasher::write`] but for small write.
    fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
        self.write(&bytes);
    }

    /// Finalization method of the hasher to return the [`Hash`].
    fn finish(self) -> Self::Hash;
}

/// A Stable Hasher adapted for cross-platform independent hash.
///
/// When hashing something that ends up affecting properties like symbol names,
/// we want these symbol names to be calculated independently of other factors
/// like what architecture you're compiling *from*.
///
/// To that end we always convert integers to little-endian format before
/// hashing and the architecture dependent `isize` and `usize` types are
/// extended to 64 bits if needed.
///
/// # Example
///
/// ```
/// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash};
/// use rustc_stable_hash::{StableHasher, FromStableHash};
/// use std::hash::Hasher;
///
/// struct Hash128([u64; 2]);
/// impl FromStableHash for Hash128 {
///     type Hash = SipHasher128Hash;
///
///     fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 {
///         Hash128(hash)
///     }
/// }
///
/// let mut hasher = StableSipHasher128::new();
/// hasher.write_usize(0xFA);
///
/// let hash: Hash128 = hasher.finish();
/// ```
#[must_use]
pub struct StableHasher<H: ExtendedHasher> {
    state: H,
}

/// Trait for processing the result of the stable hashing operation.
///
/// # Example
///
/// ```
/// use rustc_stable_hash::{StableHasher, FromStableHash};
///
/// struct Hash128(u128);
///
/// impl FromStableHash for Hash128 {
///     type Hash = [u64; 2];
///
///     fn from(hash: [u64; 2]) -> Hash128 {
///         let upper: u128 = hash[0] as u128;
///         let lower: u128 = hash[1] as u128;
///
///         Hash128((upper << 64) | lower)
///     }
/// }
/// ```
pub trait FromStableHash: Sized {
    type Hash;

    /// Convert the finalized state of a [`StableHasher`] and construct
    /// an [`Self`] containing the processed hash.
    fn from(hash: Self::Hash) -> Self;
}

impl<H: ExtendedHasher + Default> StableHasher<H> {
    /// Creates a new [`StableHasher`].
    ///
    /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
    #[inline]
    pub fn new() -> Self {
        Default::default()
    }
}

impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
    /// Creates a new [`StableHasher`].
    ///
    /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
    #[inline]
    fn default() -> Self {
        StableHasher {
            state: Default::default(),
        }
    }
}

impl<H: ExtendedHasher> StableHasher<H> {
    /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`].
    ///
    /// Useful when wanting to initialize a hasher with different parameters/keys.
    ///
    /// **Important**: Any use of the hasher before being given to a [`StableHasher`]
    /// is not covered by this crate guarentees and will make the resulting hash
    /// NOT platform independent.
    #[inline]
    pub fn with_hasher(state: H) -> Self {
        StableHasher { state }
    }

    /// Returns the typed-hash value for the values written.
    ///
    /// The resulting typed-hash value is constructed from an
    /// [`FromStableHash`] implemenation.
    ///
    /// To be used in-place of [`Hasher::finish`].
    #[inline]
    #[must_use]
    pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W {
        W::from(self.state.finish())
    }
}

impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{:?}", self.state)
    }
}

impl<H: ExtendedHasher> Hasher for StableHasher<H> {
    /// Returns a combined hash.
    ///
    /// For greater precision use instead [`StableHasher::finish`].
    fn finish(&self) -> u64 {
        Hasher::finish(&self.state)
    }

    #[inline]
    fn write(&mut self, bytes: &[u8]) {
        self.state.write(bytes);
    }

    #[cfg(feature = "nightly")]
    #[inline]
    fn write_str(&mut self, s: &str) {
        self.state.write_str(s);
    }

    #[cfg(feature = "nightly")]
    #[inline]
    fn write_length_prefix(&mut self, len: usize) {
        // Our impl for `usize` will extend it if needed.
        self.write_usize(len);
    }

    #[inline]
    fn write_u8(&mut self, i: u8) {
        self.state.write_u8(i);
    }

    #[inline]
    fn write_u16(&mut self, i: u16) {
        self.state.short_write(i.to_le_bytes());
    }

    #[inline]
    fn write_u32(&mut self, i: u32) {
        self.state.short_write(i.to_le_bytes());
    }

    #[inline]
    fn write_u64(&mut self, i: u64) {
        self.state.short_write(i.to_le_bytes());
    }

    #[inline]
    fn write_u128(&mut self, i: u128) {
        self.write_u64(i as u64);
        self.write_u64((i >> 64) as u64);
    }

    #[inline]
    fn write_usize(&mut self, i: usize) {
        // Always treat usize as u64 so we get the same results on 32 and 64 bit
        // platforms. This is important for symbol hashes when cross compiling,
        // for example.
        self.state.short_write((i as u64).to_le_bytes());
    }

    #[inline]
    fn write_i8(&mut self, i: i8) {
        self.state.write_i8(i);
    }

    #[inline]
    fn write_i16(&mut self, i: i16) {
        self.state.short_write((i as u16).to_le_bytes());
    }

    #[inline]
    fn write_i32(&mut self, i: i32) {
        self.state.short_write((i as u32).to_le_bytes());
    }

    #[inline]
    fn write_i64(&mut self, i: i64) {
        self.state.short_write((i as u64).to_le_bytes());
    }

    #[inline]
    fn write_i128(&mut self, i: i128) {
        self.state.write(&(i as u128).to_le_bytes());
    }

    #[inline]
    fn write_isize(&mut self, i: isize) {
        // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit
        // platforms. This is important for symbol hashes when cross compiling,
        // for example. Sign extending here is preferable as it means that the
        // same negative number hashes the same on both 32 and 64 bit platforms.
        let value = i as u64;

        // Cold path
        #[cold]
        #[inline(never)]
        fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
            state.write_u8(0xFF);
            state.short_write(value.to_le_bytes());
        }

        // `isize` values often seem to have a small (positive) numeric value in practice.
        // To exploit this, if the value is small, we will hash a smaller amount of bytes.
        // However, we cannot just skip the leading zero bytes, as that would produce the same hash
        // e.g. if you hash two values that have the same bit pattern when they are swapped.
        // See https://github.com/rust-lang/rust/pull/93014 for context.
        //
        // Therefore, we employ the following strategy:
        // 1) When we encounter a value that fits within a single byte (the most common case), we
        // hash just that byte. This is the most common case that is being optimized. However, we do
        // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
        // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
        // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
        // `isize`s that fit within a different amount of bytes, they should always produce a different
        // byte stream for the hasher.
        if value < 0xFF {
            self.state.write_u8(value as u8);
        } else {
            hash_value(&mut self.state, value);
        }
    }
}