typst_utils/
hash.rs

1use std::any::Any;
2use std::fmt::{self, Debug};
3use std::hash::{Hash, Hasher};
4use std::ops::{Deref, DerefMut};
5use std::sync::atomic::Ordering;
6
7use portable_atomic::AtomicU128;
8use siphasher::sip128::{Hasher128, SipHasher13};
9
10/// A wrapper type with lazily-computed hash.
11///
12/// This is useful if you want to pass large values of `T` to memoized
13/// functions. Especially recursive structures like trees benefit from
14/// intermediate prehashed nodes.
15///
16/// Note that for a value `v` of type `T`, `hash(v)` is not necessarily equal to
17/// `hash(LazyHash::new(v))`. Writing the precomputed hash into a hasher's
18/// state produces different output than writing the value's parts directly.
19/// However, that seldom matters as you are typically either dealing with values
20/// of type `T` or with values of type `LazyHash<T>`, not a mix of both.
21///
22/// # Equality
23/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
24/// hash collision is reduced to an absolute minimum. Therefore, this type
25/// additionally provides `PartialEq` and `Eq` implementations that compare by
26/// hash instead of by value. For this to be correct, your hash implementation
27/// **must feed all information relevant to the `PartialEq` impl to the
28/// hasher.**
29///
30/// # Usage
31/// If the value is expected to be cloned, it is best used inside of an `Arc`
32/// or `Rc` to best re-use the hash once it has been computed.
33#[derive(Clone)]
34pub struct LazyHash<T: ?Sized> {
35    /// The hash for the value.
36    hash: HashLock,
37    /// The underlying value.
38    value: T,
39}
40
41impl<T: Default> Default for LazyHash<T> {
42    #[inline]
43    fn default() -> Self {
44        Self::new(Default::default())
45    }
46}
47
48impl<T> LazyHash<T> {
49    /// Wraps an item without pre-computed hash.
50    #[inline]
51    pub fn new(value: T) -> Self {
52        Self { hash: HashLock::new(), value }
53    }
54
55    /// Returns the wrapped value.
56    #[inline]
57    pub fn into_inner(self) -> T {
58        self.value
59    }
60}
61
62impl<T: Hash + ?Sized + 'static> LazyHash<T> {
63    /// Get the hash or compute it if not set yet.
64    #[inline]
65    fn load_or_compute_hash(&self) -> u128 {
66        self.hash.get_or_insert_with(|| hash_item(&self.value))
67    }
68}
69
70/// Hash the item.
71#[inline]
72fn hash_item<T: Hash + ?Sized + 'static>(item: &T) -> u128 {
73    // Also hash the TypeId because the type might be converted
74    // through an unsized coercion.
75    let mut state = SipHasher13::new();
76    item.type_id().hash(&mut state);
77    item.hash(&mut state);
78    state.finish128().as_u128()
79}
80
81impl<T: Hash + ?Sized + 'static> Hash for LazyHash<T> {
82    #[inline]
83    fn hash<H: Hasher>(&self, state: &mut H) {
84        state.write_u128(self.load_or_compute_hash());
85    }
86}
87
88impl<T> From<T> for LazyHash<T> {
89    #[inline]
90    fn from(value: T) -> Self {
91        Self::new(value)
92    }
93}
94
95impl<T: Hash + ?Sized + 'static> Eq for LazyHash<T> {}
96
97impl<T: Hash + ?Sized + 'static> PartialEq for LazyHash<T> {
98    #[inline]
99    fn eq(&self, other: &Self) -> bool {
100        self.load_or_compute_hash() == other.load_or_compute_hash()
101    }
102}
103
104impl<T: ?Sized> Deref for LazyHash<T> {
105    type Target = T;
106
107    #[inline]
108    fn deref(&self) -> &Self::Target {
109        &self.value
110    }
111}
112
113impl<T: ?Sized + 'static> DerefMut for LazyHash<T> {
114    #[inline]
115    fn deref_mut(&mut self) -> &mut Self::Target {
116        self.hash.reset();
117        &mut self.value
118    }
119}
120
121impl<T: Debug> Debug for LazyHash<T> {
122    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123        self.value.fmt(f)
124    }
125}
126
127/// A wrapper type with a manually computed hash.
128///
129/// This can be used to turn an unhashable type into a hashable one where the
130/// hash is provided manually. Typically, the hash is derived from the data
131/// which was used to construct to the unhashable type.
132///
133/// For instance, you could hash the bytes that were parsed into an unhashable
134/// data structure.
135///
136/// # Equality
137/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
138/// hash collision is reduced to an absolute minimum. Therefore, this type
139/// additionally provides `PartialEq` and `Eq` implementations that compare by
140/// hash instead of by value. For this to be correct, your hash implementation
141/// **must feed all information relevant to the `PartialEq` impl to the
142/// hasher.**
143#[derive(Clone)]
144pub struct ManuallyHash<T: ?Sized> {
145    /// A manually computed hash.
146    hash: u128,
147    /// The underlying value.
148    value: T,
149}
150
151impl<T> ManuallyHash<T> {
152    /// Wraps an item with a pre-computed hash.
153    ///
154    /// The hash should be computed with `typst_utils::hash128`.
155    #[inline]
156    pub fn new(value: T, hash: u128) -> Self {
157        Self { hash, value }
158    }
159
160    /// Returns the wrapped value.
161    #[inline]
162    pub fn into_inner(self) -> T {
163        self.value
164    }
165}
166
167impl<T: ?Sized> Hash for ManuallyHash<T> {
168    #[inline]
169    fn hash<H: Hasher>(&self, state: &mut H) {
170        state.write_u128(self.hash);
171    }
172}
173
174impl<T: ?Sized> Eq for ManuallyHash<T> {}
175
176impl<T: ?Sized> PartialEq for ManuallyHash<T> {
177    #[inline]
178    fn eq(&self, other: &Self) -> bool {
179        self.hash == other.hash
180    }
181}
182
183impl<T: ?Sized> Deref for ManuallyHash<T> {
184    type Target = T;
185
186    #[inline]
187    fn deref(&self) -> &Self::Target {
188        &self.value
189    }
190}
191
192impl<T: Debug> Debug for ManuallyHash<T> {
193    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
194        self.value.fmt(f)
195    }
196}
197
198/// Storage for lazy hash computation.
199pub struct HashLock(AtomicU128);
200
201impl HashLock {
202    /// Create a new unset hash cell.
203    pub const fn new() -> Self {
204        Self(AtomicU128::new(0))
205    }
206
207    /// Get the hash or compute it if not set yet.
208    #[inline]
209    pub fn get_or_insert_with(&self, f: impl FnOnce() -> u128) -> u128 {
210        let mut hash = self.get();
211        if hash == 0 {
212            hash = f();
213            self.0.store(hash, Ordering::Relaxed);
214        }
215        hash
216    }
217
218    /// Reset the hash to unset.
219    #[inline]
220    pub fn reset(&mut self) {
221        // Because we have a mutable reference, we can skip the atomic.
222        *self.0.get_mut() = 0;
223    }
224
225    /// Get the hash, returns zero if not computed yet.
226    #[inline]
227    fn get(&self) -> u128 {
228        // We only need atomicity and no synchronization of other operations, so
229        // `Relaxed` is fine.
230        self.0.load(Ordering::Relaxed)
231    }
232}
233
234impl Default for HashLock {
235    fn default() -> Self {
236        Self::new()
237    }
238}
239
240impl Clone for HashLock {
241    fn clone(&self) -> Self {
242        Self(AtomicU128::new(self.get()))
243    }
244}