tor_basic_utils/intern.rs
1//! Declare types for interning various objects.
2
3use std::fmt::Debug;
4use std::hash::Hash;
5use std::sync::{Arc, Mutex, MutexGuard, OnceLock, Weak};
6
7use derive_deftly::define_derive_deftly;
8use derive_more::{Deref, Display, Into};
9
10/// Alias to force use of RandomState, regardless of features enabled in `weak_tables`.
11///
12/// See <https://github.com/tov/weak-table-rs/issues/23> for discussion.
13type WeakHashSet<T> = weak_table::WeakHashSet<T, std::hash::RandomState>;
14
15/// A wrapper around [`Arc`] representing owned [`InternCache`] entries.
16///
17/// The wrapper type serves the purpose of semantic meaning only, implying that
18/// this value is cached in some way or another by this module.
19///
20/// We only conveniently allow obtaining the underlying [`Arc`] with a [`From`] but not the
21/// other way around. This means that interfacing code can make the type to
22/// "forget" it originated from an [`InternCache`] but not the other way around,
23/// i.e. cannot accidentally create fake entries that look like they came from an
24/// [`InternCache`]. If one really has to circumvent this, then the
25/// [`Intern::new_uncached_uninterned()`] method exists.
26///
27/// This ensures that interning is done everywhere that it's expected,
28/// avoiding excess memory usage.
29//
30// Right now, this is the bare minimum of derives; it may need more in the
31// future. If so, just add them.
32#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Display, Into, Deref)]
33pub struct Intern<T: ?Sized>(Arc<T>);
34
35impl<T: ?Sized> Intern<T> {
36 /// Creates an [`Intern`] from an arbitrary [`Arc`].
37 ///
38 /// The use of this is generally discouraged, as it effectively destroys
39 /// the boundary of implying that certain cache entries come from the
40 /// [`InternCache`].
41 pub fn new_uncached_uninterned(value: Arc<T>) -> Intern<T> {
42 Intern(value)
43 }
44}
45
46// Some Arti code is pretty keen on using &Arc<T>.
47impl<'a, T: ?Sized> From<&'a Intern<T>> for &'a Arc<T> {
48 fn from(value: &'a Intern<T>) -> Self {
49 &value.0
50 }
51}
52
53/// Offers access to globally available cache for [`InternCache`].
54///
55/// Typically derived using [`crate::derive_deftly_template_GloballyInternable`].
56pub trait GloballyInternable: Sized {
57 /// Returns a reference to the global cache instance of this type.
58 ///
59 /// Implemented by implementors of this trait.
60 /// Users of the trait should usually use [`GloballyInternable::into_intern()`].
61 fn intern_cache() -> &'static InternCache<Self>;
62
63 /// Places `self` into the global cache.
64 ///
65 /// Please use this instead of `T::intern_cache().intern(value)`.
66 fn into_intern(self) -> Intern<Self>
67 where
68 Self: Eq + Hash + 'static,
69 {
70 Self::intern_cache().intern(self)
71 }
72}
73
74define_derive_deftly! {
75 /// Implement the [`GloballyInternable`] trait for a specific type.
76 ///
77 /// The implementation in itself is trivial and straightforward with this
78 /// macro primarily serving as a convenience method.
79 export GloballyInternable for struct:
80
81 impl $crate::intern::GloballyInternable for $ttype {
82 fn intern_cache() -> &'static $crate::intern::InternCache<Self> {
83 static S: $crate::intern::InternCache::<$ttype> = $crate::intern::InternCache::new();
84 &S
85 }
86 }
87}
88
89/// An InternCache is a lazily-constructed weak set of objects.
90///
91/// Let's break that down! It's "lazily constructed" because it
92/// doesn't actually allocate anything until you use it for the first
93/// time. That allows it to have a const [`new`](InternCache::new)
94/// method, so you can make these static.
95///
96/// It's "weak" because it only holds weak references to its objects;
97/// once every strong reference is gone, the object is unallocated.
98/// Later, the hash entry is (lazily) removed.
99pub struct InternCache<T: ?Sized> {
100 /// Underlying hashset for interned objects
101 //
102 // TODO: If WeakHashSet::new is someday const, we can do away with OnceLock here.
103 cache: OnceLock<Mutex<WeakHashSet<Weak<T>>>>,
104}
105
106impl<T: ?Sized> InternCache<T> {
107 /// Create a new, empty, InternCache.
108 pub const fn new() -> Self {
109 InternCache {
110 cache: OnceLock::new(),
111 }
112 }
113}
114
115impl<T: ?Sized> Default for InternCache<T> {
116 fn default() -> Self {
117 Self::new()
118 }
119}
120
121impl<T: Eq + Hash + ?Sized> InternCache<T> {
122 /// Helper: initialize the cache if needed, then lock it.
123 fn cache(&self) -> MutexGuard<'_, WeakHashSet<Weak<T>>> {
124 let cache = self.cache.get_or_init(|| Mutex::new(WeakHashSet::new()));
125 cache.lock().expect("Poisoned lock lock for cache")
126 }
127}
128
129impl<T: Eq + Hash> InternCache<T> {
130 /// Intern a given value into this cache.
131 ///
132 /// If `value` is already stored in this cache, we return a
133 /// reference to the stored value. Otherwise, we insert `value`
134 /// into the cache, and return that.
135 pub fn intern(&self, value: T) -> Intern<T> {
136 let mut cache = self.cache();
137 if let Some(pp) = cache.get(&value) {
138 Intern(pp)
139 } else {
140 let arc = Arc::new(value);
141 cache.insert(Arc::clone(&arc));
142 Intern(arc)
143 }
144 }
145}
146
147impl<T: Hash + Eq + ?Sized> InternCache<T> {
148 /// Intern an object by reference.
149 ///
150 /// Works with unsized types, but requires that the reference implements
151 /// `Into<Arc<T>>`.
152 pub fn intern_ref<'a, V>(&self, value: &'a V) -> Intern<T>
153 where
154 V: Hash + Eq + ?Sized,
155 &'a V: Into<Arc<T>>,
156 T: std::borrow::Borrow<V>,
157 {
158 let mut cache = self.cache();
159 if let Some(arc) = cache.get(value) {
160 Intern(arc)
161 } else {
162 let arc = value.into();
163 cache.insert(Arc::clone(&arc));
164 Intern(arc)
165 }
166 }
167}
168
169#[cfg(test)]
170mod test {
171 // @@ begin test lint list maintained by maint/add_warning @@
172 #![allow(clippy::bool_assert_comparison)]
173 #![allow(clippy::clone_on_copy)]
174 #![allow(clippy::dbg_macro)]
175 #![allow(clippy::mixed_attributes_style)]
176 #![allow(clippy::print_stderr)]
177 #![allow(clippy::print_stdout)]
178 #![allow(clippy::single_char_pattern)]
179 #![allow(clippy::unwrap_used)]
180 #![allow(clippy::unchecked_time_subtraction)]
181 #![allow(clippy::useless_vec)]
182 #![allow(clippy::needless_pass_by_value)]
183 #![allow(clippy::string_slice)] // See arti#2571
184 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
185 use super::*;
186
187 #[test]
188 fn interning_by_value() {
189 // "intern" case.
190 let c: InternCache<String> = InternCache::new();
191
192 let s1: Arc<String> = c.intern("abc".to_string()).into();
193 let s2 = c.intern("def".to_string()).into();
194 let s3 = c.intern("abc".to_string()).into();
195 assert!(Arc::ptr_eq(&s1, &s3));
196 assert!(!Arc::ptr_eq(&s1, &s2));
197 assert_eq!(s2.as_ref(), "def");
198 assert_eq!(s3.as_ref(), "abc");
199 }
200
201 #[test]
202 fn interning_by_ref() {
203 // "intern" case.
204 let c: InternCache<str> = InternCache::new();
205
206 let s1: Arc<str> = c.intern_ref("abc").into();
207 let s2 = c.intern_ref("def").into();
208 let s3 = c.intern_ref("abc").into();
209 assert!(Arc::ptr_eq(&s1, &s3));
210 assert!(!Arc::ptr_eq(&s1, &s2));
211 assert_eq!(&*s2, "def");
212 assert_eq!(&*s3, "abc");
213 }
214}