typst_utils/hash.rs
1use std::fmt::{self, Debug};
2use std::hash::{Hash, Hasher};
3use std::ops::{Deref, DerefMut};
4use std::sync::atomic::Ordering;
5
6use portable_atomic::AtomicU128;
7use siphasher::sip128::{Hasher128, SipHasher13};
8
9/// Calculate a 128-bit siphash of a value.
10///
11/// To make the hash stable between 64-bit and 32-bit architectures, usize is
12/// hashed as u64.
13pub fn hash128<T: Hash + ?Sized>(value: &T) -> u128 {
14 struct StableHasher(SipHasher13);
15
16 impl Hasher for StableHasher {
17 fn finish(&self) -> u64 {
18 self.0.finish()
19 }
20
21 fn write(&mut self, bytes: &[u8]) {
22 self.0.write(bytes);
23 }
24
25 fn write_usize(&mut self, i: usize) {
26 self.0.write_u64(i as u64);
27 }
28 }
29
30 let mut state = StableHasher(SipHasher13::new());
31 value.hash(&mut state);
32 state.0.finish128().as_u128()
33}
34
35/// A wrapper type with lazily-computed hash.
36///
37/// This is useful if you want to pass large values of `T` to memoized
38/// functions. Especially recursive structures like trees benefit from
39/// intermediate prehashed nodes.
40///
41/// Note that for a value `v` of type `T`, `hash(v)` is not necessarily equal to
42/// `hash(LazyHash::new(v))`. Writing the precomputed hash into a hasher's
43/// state produces different output than writing the value's parts directly.
44/// However, that seldom matters as you are typically either dealing with values
45/// of type `T` or with values of type `LazyHash<T>`, not a mix of both.
46///
47/// # Equality
48/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
49/// hash collision is reduced to an absolute minimum. Therefore, this type
50/// additionally provides `PartialEq` and `Eq` implementations that compare by
51/// hash instead of by value. For this to be correct, your hash implementation
52/// **must feed all information relevant to the `PartialEq` impl to the
53/// hasher.**
54///
55/// # Usage
56/// If the value is expected to be cloned, it is best used inside of an `Arc`
57/// or `Rc` to best re-use the hash once it has been computed.
58///
59/// # Unsized coercions
60/// The `LazyHash` type supports unsized payload types and coercions to such.
61/// For instance, a `LazyHash<&'static str>` can be coerced to a
62/// `LazyHash<dyn YourTrait>` when `&'static str: YourTrait`. When it is hashed,
63/// a `LazyHash` will always use the [`Hash`] impl of the underlying type. This
64/// underlying type changes through an unsized coercion. When coercing a
65/// [`LazyHash`] that has an already populated internal hash, you'll thus get a
66/// cached hash that was hashed with another impl than a fresh hash would have
67/// used. To avoid this, when performing unsized coercions, avoid hashing the
68/// value before the coercion and overall try to minimize the timespan in which
69/// the original type is active. Typical usages of unsized coercions have a very
70/// minimal lifetime of the original type only upon construction.
71#[derive(Clone)]
72pub struct LazyHash<T: ?Sized> {
73 /// The hash for the value.
74 hash: HashLock,
75 /// The underlying value.
76 value: T,
77}
78
79impl<T: Default> Default for LazyHash<T> {
80 #[inline]
81 fn default() -> Self {
82 Self::new(Default::default())
83 }
84}
85
86impl<T> LazyHash<T> {
87 /// Wraps an item without pre-computed hash.
88 #[inline]
89 pub fn new(value: T) -> Self {
90 Self { hash: HashLock::new(), value }
91 }
92
93 /// Returns the wrapped value.
94 #[inline]
95 pub fn into_inner(self) -> T {
96 self.value
97 }
98}
99
100impl<T: Hash + ?Sized + 'static> LazyHash<T> {
101 /// Get the hash or compute it if not set yet.
102 #[inline]
103 fn load_or_compute_hash(&self) -> u128 {
104 self.hash.get_or_insert_with(|| hash128(&self.value))
105 }
106}
107
108impl<T: Hash + ?Sized + 'static> Hash for LazyHash<T> {
109 #[inline]
110 fn hash<H: Hasher>(&self, state: &mut H) {
111 state.write_u128(self.load_or_compute_hash());
112 }
113}
114
115impl<T> From<T> for LazyHash<T> {
116 #[inline]
117 fn from(value: T) -> Self {
118 Self::new(value)
119 }
120}
121
122impl<T: Hash + ?Sized + 'static> Eq for LazyHash<T> {}
123
124impl<T: Hash + ?Sized + 'static> PartialEq for LazyHash<T> {
125 #[inline]
126 fn eq(&self, other: &Self) -> bool {
127 self.load_or_compute_hash() == other.load_or_compute_hash()
128 }
129}
130
131impl<T: ?Sized> Deref for LazyHash<T> {
132 type Target = T;
133
134 #[inline]
135 fn deref(&self) -> &Self::Target {
136 &self.value
137 }
138}
139
140impl<T: ?Sized + 'static> DerefMut for LazyHash<T> {
141 #[inline]
142 fn deref_mut(&mut self) -> &mut Self::Target {
143 self.hash.reset();
144 &mut self.value
145 }
146}
147
148impl<T: Debug> Debug for LazyHash<T> {
149 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
150 self.value.fmt(f)
151 }
152}
153
154/// A wrapper type with a manually computed hash.
155///
156/// This can be used to turn an unhashable type into a hashable one where the
157/// hash is provided manually. Typically, the hash is derived from the data
158/// which was used to construct to the unhashable type.
159///
160/// For instance, you could hash the bytes that were parsed into an unhashable
161/// data structure.
162///
163/// # Equality
164/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
165/// hash collision is reduced to an absolute minimum. Therefore, this type
166/// additionally provides `PartialEq` and `Eq` implementations that compare by
167/// hash instead of by value. For this to be correct, your hash implementation
168/// **must feed all information relevant to the `PartialEq` impl to the
169/// hasher.**
170#[derive(Clone)]
171pub struct ManuallyHash<T: ?Sized> {
172 /// A manually computed hash.
173 hash: u128,
174 /// The underlying value.
175 value: T,
176}
177
178impl<T> ManuallyHash<T> {
179 /// Wraps an item with a pre-computed hash.
180 ///
181 /// The hash should be computed with `typst_utils::hash128`.
182 #[inline]
183 pub fn new(value: T, hash: u128) -> Self {
184 Self { hash, value }
185 }
186
187 /// Returns the wrapped value.
188 #[inline]
189 pub fn into_inner(self) -> T {
190 self.value
191 }
192}
193
194impl<T: ?Sized> Hash for ManuallyHash<T> {
195 #[inline]
196 fn hash<H: Hasher>(&self, state: &mut H) {
197 state.write_u128(self.hash);
198 }
199}
200
201impl<T: ?Sized> Eq for ManuallyHash<T> {}
202
203impl<T: ?Sized> PartialEq for ManuallyHash<T> {
204 #[inline]
205 fn eq(&self, other: &Self) -> bool {
206 self.hash == other.hash
207 }
208}
209
210impl<T: ?Sized> Deref for ManuallyHash<T> {
211 type Target = T;
212
213 #[inline]
214 fn deref(&self) -> &Self::Target {
215 &self.value
216 }
217}
218
219impl<T: Debug> Debug for ManuallyHash<T> {
220 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221 self.value.fmt(f)
222 }
223}
224
225/// Storage for lazy hash computation.
226pub struct HashLock(AtomicU128);
227
228impl HashLock {
229 /// Create a new unset hash cell.
230 pub const fn new() -> Self {
231 Self(AtomicU128::new(0))
232 }
233
234 /// Get the hash or compute it if not set yet.
235 #[inline]
236 pub fn get_or_insert_with(&self, f: impl FnOnce() -> u128) -> u128 {
237 let mut hash = self.get();
238 if hash == 0 {
239 hash = f();
240 self.0.store(hash, Ordering::Relaxed);
241 }
242 hash
243 }
244
245 /// Reset the hash to unset.
246 #[inline]
247 pub fn reset(&mut self) {
248 // Because we have a mutable reference, we can skip the atomic.
249 *self.0.get_mut() = 0;
250 }
251
252 /// Get the hash, returns zero if not computed yet.
253 #[inline]
254 fn get(&self) -> u128 {
255 // We only need atomicity and no synchronization of other operations, so
256 // `Relaxed` is fine.
257 self.0.load(Ordering::Relaxed)
258 }
259}
260
261impl Default for HashLock {
262 fn default() -> Self {
263 Self::new()
264 }
265}
266
267impl Clone for HashLock {
268 fn clone(&self) -> Self {
269 Self(AtomicU128::new(self.get()))
270 }
271}