typst_utils/
hash.rs

1use std::any::Any;
2use std::fmt::{self, Debug};
3use std::hash::{Hash, Hasher};
4use std::ops::{Deref, DerefMut};
5use std::sync::atomic::Ordering;
6
7use portable_atomic::AtomicU128;
8use siphasher::sip128::{Hasher128, SipHasher13};
9
10/// A wrapper type with lazily-computed hash.
11///
12/// This is useful if you want to pass large values of `T` to memoized
13/// functions. Especially recursive structures like trees benefit from
14/// intermediate prehashed nodes.
15///
16/// Note that for a value `v` of type `T`, `hash(v)` is not necessarily equal to
17/// `hash(LazyHash::new(v))`. Writing the precomputed hash into a hasher's
18/// state produces different output than writing the value's parts directly.
19/// However, that seldom matters as you are typically either dealing with values
20/// of type `T` or with values of type `LazyHash<T>`, not a mix of both.
21///
22/// # Equality
23/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
24/// hash collision is reduced to an absolute minimum. Therefore, this type
25/// additionally provides `PartialEq` and `Eq` implementations that compare by
26/// hash instead of by value. For this to be correct, your hash implementation
27/// **must feed all information relevant to the `PartialEq` impl to the
28/// hasher.**
29///
30/// # Usage
31/// If the value is expected to be cloned, it is best used inside of an `Arc`
32/// or `Rc` to best re-use the hash once it has been computed.
33pub struct LazyHash<T: ?Sized> {
34    /// The hash for the value.
35    hash: AtomicU128,
36    /// The underlying value.
37    value: T,
38}
39
40impl<T: Default> Default for LazyHash<T> {
41    #[inline]
42    fn default() -> Self {
43        Self::new(Default::default())
44    }
45}
46
47impl<T> LazyHash<T> {
48    /// Wraps an item without pre-computed hash.
49    #[inline]
50    pub fn new(value: T) -> Self {
51        Self { hash: AtomicU128::new(0), value }
52    }
53
54    /// Wrap an item with a pre-computed hash.
55    ///
56    /// **Important:** The hash must be correct for the value. This cannot be
57    /// enforced at compile time, so use with caution.
58    #[inline]
59    pub fn reuse<U: ?Sized>(value: T, existing: &LazyHash<U>) -> Self {
60        LazyHash { hash: AtomicU128::new(existing.load_hash()), value }
61    }
62
63    /// Returns the wrapped value.
64    #[inline]
65    pub fn into_inner(self) -> T {
66        self.value
67    }
68}
69
70impl<T: ?Sized> LazyHash<T> {
71    /// Get the hash, returns zero if not computed yet.
72    #[inline]
73    fn load_hash(&self) -> u128 {
74        // We only need atomicity and no synchronization of other operations, so
75        // `Relaxed` is fine.
76        self.hash.load(Ordering::Relaxed)
77    }
78}
79
80impl<T: Hash + ?Sized + 'static> LazyHash<T> {
81    /// Get the hash or compute it if not set yet.
82    #[inline]
83    fn load_or_compute_hash(&self) -> u128 {
84        let mut hash = self.load_hash();
85        if hash == 0 {
86            hash = hash_item(&self.value);
87            self.hash.store(hash, Ordering::Relaxed);
88        }
89        hash
90    }
91
92    /// Reset the hash to zero.
93    #[inline]
94    fn reset_hash(&mut self) {
95        // Because we have a mutable reference, we can skip the atomic.
96        *self.hash.get_mut() = 0;
97    }
98}
99
100/// Hash the item.
101#[inline]
102fn hash_item<T: Hash + ?Sized + 'static>(item: &T) -> u128 {
103    // Also hash the TypeId because the type might be converted
104    // through an unsized coercion.
105    let mut state = SipHasher13::new();
106    item.type_id().hash(&mut state);
107    item.hash(&mut state);
108    state.finish128().as_u128()
109}
110
111impl<T: Hash + ?Sized + 'static> Hash for LazyHash<T> {
112    #[inline]
113    fn hash<H: Hasher>(&self, state: &mut H) {
114        state.write_u128(self.load_or_compute_hash());
115    }
116}
117
118impl<T> From<T> for LazyHash<T> {
119    #[inline]
120    fn from(value: T) -> Self {
121        Self::new(value)
122    }
123}
124
125impl<T: Hash + ?Sized + 'static> Eq for LazyHash<T> {}
126
127impl<T: Hash + ?Sized + 'static> PartialEq for LazyHash<T> {
128    #[inline]
129    fn eq(&self, other: &Self) -> bool {
130        self.load_or_compute_hash() == other.load_or_compute_hash()
131    }
132}
133
134impl<T: ?Sized> Deref for LazyHash<T> {
135    type Target = T;
136
137    #[inline]
138    fn deref(&self) -> &Self::Target {
139        &self.value
140    }
141}
142
143impl<T: Hash + ?Sized + 'static> DerefMut for LazyHash<T> {
144    #[inline]
145    fn deref_mut(&mut self) -> &mut Self::Target {
146        self.reset_hash();
147        &mut self.value
148    }
149}
150
151impl<T: Hash + Clone + 'static> Clone for LazyHash<T> {
152    fn clone(&self) -> Self {
153        Self {
154            hash: AtomicU128::new(self.load_hash()),
155            value: self.value.clone(),
156        }
157    }
158}
159
160impl<T: Debug> Debug for LazyHash<T> {
161    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
162        self.value.fmt(f)
163    }
164}
165
166/// A wrapper type with a manually computed hash.
167///
168/// This can be used to turn an unhashable type into a hashable one where the
169/// hash is provided manually. Typically, the hash is derived from the data
170/// which was used to construct to the unhashable type.
171///
172/// For instance, you could hash the bytes that were parsed into an unhashable
173/// data structure.
174///
175/// # Equality
176/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
177/// hash collision is reduced to an absolute minimum. Therefore, this type
178/// additionally provides `PartialEq` and `Eq` implementations that compare by
179/// hash instead of by value. For this to be correct, your hash implementation
180/// **must feed all information relevant to the `PartialEq` impl to the
181/// hasher.**
182#[derive(Clone)]
183pub struct ManuallyHash<T: ?Sized> {
184    /// A manually computed hash.
185    hash: u128,
186    /// The underlying value.
187    value: T,
188}
189
190impl<T> ManuallyHash<T> {
191    /// Wraps an item with a pre-computed hash.
192    ///
193    /// The hash should be computed with `typst_utils::hash128`.
194    #[inline]
195    pub fn new(value: T, hash: u128) -> Self {
196        Self { hash, value }
197    }
198
199    /// Returns the wrapped value.
200    #[inline]
201    pub fn into_inner(self) -> T {
202        self.value
203    }
204}
205
206impl<T: ?Sized> Hash for ManuallyHash<T> {
207    #[inline]
208    fn hash<H: Hasher>(&self, state: &mut H) {
209        state.write_u128(self.hash);
210    }
211}
212
213impl<T: ?Sized> Eq for ManuallyHash<T> {}
214
215impl<T: ?Sized> PartialEq for ManuallyHash<T> {
216    #[inline]
217    fn eq(&self, other: &Self) -> bool {
218        self.hash == other.hash
219    }
220}
221
222impl<T: ?Sized> Deref for ManuallyHash<T> {
223    type Target = T;
224
225    #[inline]
226    fn deref(&self) -> &Self::Target {
227        &self.value
228    }
229}
230
231impl<T: Debug> Debug for ManuallyHash<T> {
232    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
233        self.value.fmt(f)
234    }
235}