Skip to main content

typst_utils/
hash.rs

1use std::fmt::{self, Debug};
2use std::hash::{Hash, Hasher};
3use std::ops::{Deref, DerefMut};
4use std::sync::atomic::Ordering;
5
6use portable_atomic::AtomicU128;
7use siphasher::sip128::{Hasher128, SipHasher13};
8
9/// Calculate a 128-bit siphash of a value.
10///
11/// To make the hash stable between 64-bit and 32-bit architectures, usize is
12/// hashed as u64.
13pub fn hash128<T: Hash + ?Sized>(value: &T) -> u128 {
14    struct StableHasher(SipHasher13);
15
16    impl Hasher for StableHasher {
17        fn finish(&self) -> u64 {
18            self.0.finish()
19        }
20
21        fn write(&mut self, bytes: &[u8]) {
22            self.0.write(bytes);
23        }
24
25        fn write_usize(&mut self, i: usize) {
26            self.0.write_u64(i as u64);
27        }
28    }
29
30    let mut state = StableHasher(SipHasher13::new());
31    value.hash(&mut state);
32    state.0.finish128().as_u128()
33}
34
35/// A wrapper type with lazily-computed hash.
36///
37/// This is useful if you want to pass large values of `T` to memoized
38/// functions. Especially recursive structures like trees benefit from
39/// intermediate prehashed nodes.
40///
41/// Note that for a value `v` of type `T`, `hash(v)` is not necessarily equal to
42/// `hash(LazyHash::new(v))`. Writing the precomputed hash into a hasher's
43/// state produces different output than writing the value's parts directly.
44/// However, that seldom matters as you are typically either dealing with values
45/// of type `T` or with values of type `LazyHash<T>`, not a mix of both.
46///
47/// # Equality
48/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
49/// hash collision is reduced to an absolute minimum. Therefore, this type
50/// additionally provides `PartialEq` and `Eq` implementations that compare by
51/// hash instead of by value. For this to be correct, your hash implementation
52/// **must feed all information relevant to the `PartialEq` impl to the
53/// hasher.**
54///
55/// # Usage
56/// If the value is expected to be cloned, it is best used inside of an `Arc`
57/// or `Rc` to best re-use the hash once it has been computed.
58///
59/// # Unsized coercions
60/// The `LazyHash` type supports unsized payload types and coercions to such.
61/// For instance, a `LazyHash<&'static str>` can be coerced to a
62/// `LazyHash<dyn YourTrait>` when `&'static str: YourTrait`. When it is hashed,
63/// a `LazyHash` will always use the [`Hash`] impl of the underlying type. This
64/// underlying type changes through an unsized coercion. When coercing a
65/// [`LazyHash`] that has an already populated internal hash, you'll thus get a
66/// cached hash that was hashed with another impl than a fresh hash would have
67/// used. To avoid this, when performing unsized coercions, avoid hashing the
68/// value before the coercion and overall try to minimize the timespan in which
69/// the original type is active. Typical usages of unsized coercions have a very
70/// minimal lifetime of the original type only upon construction.
71#[derive(Clone)]
72pub struct LazyHash<T: ?Sized> {
73    /// The hash for the value.
74    hash: HashLock,
75    /// The underlying value.
76    value: T,
77}
78
79impl<T: Default> Default for LazyHash<T> {
80    #[inline]
81    fn default() -> Self {
82        Self::new(Default::default())
83    }
84}
85
86impl<T> LazyHash<T> {
87    /// Wraps an item without pre-computed hash.
88    #[inline]
89    pub fn new(value: T) -> Self {
90        Self { hash: HashLock::new(), value }
91    }
92
93    /// Returns the wrapped value.
94    #[inline]
95    pub fn into_inner(self) -> T {
96        self.value
97    }
98}
99
100impl<T: Hash + ?Sized + 'static> LazyHash<T> {
101    /// Get the hash or compute it if not set yet.
102    #[inline]
103    fn load_or_compute_hash(&self) -> u128 {
104        self.hash.get_or_insert_with(|| hash128(&self.value))
105    }
106}
107
108impl<T: Hash + ?Sized + 'static> Hash for LazyHash<T> {
109    #[inline]
110    fn hash<H: Hasher>(&self, state: &mut H) {
111        state.write_u128(self.load_or_compute_hash());
112    }
113}
114
115impl<T> From<T> for LazyHash<T> {
116    #[inline]
117    fn from(value: T) -> Self {
118        Self::new(value)
119    }
120}
121
122impl<T: Hash + ?Sized + 'static> Eq for LazyHash<T> {}
123
124impl<T: Hash + ?Sized + 'static> PartialEq for LazyHash<T> {
125    #[inline]
126    fn eq(&self, other: &Self) -> bool {
127        self.load_or_compute_hash() == other.load_or_compute_hash()
128    }
129}
130
131impl<T: ?Sized> Deref for LazyHash<T> {
132    type Target = T;
133
134    #[inline]
135    fn deref(&self) -> &Self::Target {
136        &self.value
137    }
138}
139
140impl<T: ?Sized + 'static> DerefMut for LazyHash<T> {
141    #[inline]
142    fn deref_mut(&mut self) -> &mut Self::Target {
143        self.hash.reset();
144        &mut self.value
145    }
146}
147
148impl<T: Debug> Debug for LazyHash<T> {
149    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
150        self.value.fmt(f)
151    }
152}
153
154/// A wrapper type with a manually computed hash.
155///
156/// This can be used to turn an unhashable type into a hashable one where the
157/// hash is provided manually. Typically, the hash is derived from the data
158/// which was used to construct to the unhashable type.
159///
160/// For instance, you could hash the bytes that were parsed into an unhashable
161/// data structure.
162///
163/// # Equality
164/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
165/// hash collision is reduced to an absolute minimum. Therefore, this type
166/// additionally provides `PartialEq` and `Eq` implementations that compare by
167/// hash instead of by value. For this to be correct, your hash implementation
168/// **must feed all information relevant to the `PartialEq` impl to the
169/// hasher.**
170#[derive(Clone)]
171pub struct ManuallyHash<T: ?Sized> {
172    /// A manually computed hash.
173    hash: u128,
174    /// The underlying value.
175    value: T,
176}
177
178impl<T> ManuallyHash<T> {
179    /// Wraps an item with a pre-computed hash.
180    ///
181    /// The hash should be computed with `typst_utils::hash128`.
182    #[inline]
183    pub fn new(value: T, hash: u128) -> Self {
184        Self { hash, value }
185    }
186
187    /// Returns the wrapped value.
188    #[inline]
189    pub fn into_inner(self) -> T {
190        self.value
191    }
192}
193
194impl<T: ?Sized> Hash for ManuallyHash<T> {
195    #[inline]
196    fn hash<H: Hasher>(&self, state: &mut H) {
197        state.write_u128(self.hash);
198    }
199}
200
201impl<T: ?Sized> Eq for ManuallyHash<T> {}
202
203impl<T: ?Sized> PartialEq for ManuallyHash<T> {
204    #[inline]
205    fn eq(&self, other: &Self) -> bool {
206        self.hash == other.hash
207    }
208}
209
210impl<T: ?Sized> Deref for ManuallyHash<T> {
211    type Target = T;
212
213    #[inline]
214    fn deref(&self) -> &Self::Target {
215        &self.value
216    }
217}
218
219impl<T: Debug> Debug for ManuallyHash<T> {
220    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221        self.value.fmt(f)
222    }
223}
224
225/// Storage for lazy hash computation.
226pub struct HashLock(AtomicU128);
227
228impl HashLock {
229    /// Create a new unset hash cell.
230    pub const fn new() -> Self {
231        Self(AtomicU128::new(0))
232    }
233
234    /// Get the hash or compute it if not set yet.
235    #[inline]
236    pub fn get_or_insert_with(&self, f: impl FnOnce() -> u128) -> u128 {
237        let mut hash = self.get();
238        if hash == 0 {
239            hash = f();
240            self.0.store(hash, Ordering::Relaxed);
241        }
242        hash
243    }
244
245    /// Reset the hash to unset.
246    #[inline]
247    pub fn reset(&mut self) {
248        // Because we have a mutable reference, we can skip the atomic.
249        *self.0.get_mut() = 0;
250    }
251
252    /// Get the hash, returns zero if not computed yet.
253    #[inline]
254    fn get(&self) -> u128 {
255        // We only need atomicity and no synchronization of other operations, so
256        // `Relaxed` is fine.
257        self.0.load(Ordering::Relaxed)
258    }
259}
260
261impl Default for HashLock {
262    fn default() -> Self {
263        Self::new()
264    }
265}
266
267impl Clone for HashLock {
268    fn clone(&self) -> Self {
269        Self(AtomicU128::new(self.get()))
270    }
271}