refuse_pool/
lib.rs

1//! Garbage-collected "interned" strings.
2//!
3//! Interning is a process of making many equal things share the same underlying
4//! resource. This crate introduces two types that are powered by the
5//! [Refuse][refuse] garbage collector:
6//!
7//! - [`RootString`]: A `Root<String>`-like type that ensures all instances of
8//!       the same exact byte sequence refer to the same allocation.
9//! - [`RefString`]: A `Ref<String>` type that is a reference to a
10//!       [`RootString`].
11//!
12//! ```rust
13//! use refuse::CollectionGuard;
14//! use refuse_pool::{RefString, RootString};
15//!
16//! let a = RootString::from("a");
17//! let a_again = RootString::from(String::from("a"));
18//!
19//! // Both a and a_again point to the same underlying storage.
20//! assert_eq!(a.root_count(), 2);
21//! // Comparing two RootStrings is cheap.
22//! assert_eq!(a, a_again);
23//!
24//! // a_ref can be used to gain a reference to a string,
25//! // but only until the string is unreachable.
26//! let a_ref = a.downgrade();
27//!
28//! let mut guard = CollectionGuard::acquire();
29//! assert_eq!(a_ref.load(&guard), Some("a"));
30//!
31//! drop(a);
32//! drop(a_again);
33//! guard.collect();
34//! assert_eq!(a_ref.load(&guard), None);
35//! ```
36//!
37//! [refuse]: https://github.com/khonsulabs/refuse
38
39use std::borrow::Cow;
40use std::fmt::{Debug, Display};
41use std::hash::{Hash, Hasher};
42use std::ops::Deref;
43use std::sync::OnceLock;
44
45use ahash::AHasher;
46use hashbrown::{hash_table, HashTable};
47use parking_lot::Mutex;
48use refuse::{AnyRef, AnyRoot, CollectionGuard, LocalPool, Ref, Root, SimpleType, Trace};
49
50enum PoolEntry {
51    Rooted(RootString),
52    Weak(RefString, u64),
53}
54
55impl PoolEntry {
56    fn equals(&self, s: &str, guard: &CollectionGuard<'_>) -> bool {
57        match self {
58            PoolEntry::Rooted(r) => r == s,
59            PoolEntry::Weak(r, _) => r.load(guard).map_or(false, |r| r == s),
60        }
61    }
62
63    fn hash(&self) -> u64 {
64        match self {
65            PoolEntry::Rooted(r) => r.0.hash,
66            PoolEntry::Weak(_, hash) => *hash,
67        }
68    }
69}
70
71impl PartialEq<Root<PooledString>> for PoolEntry {
72    fn eq(&self, other: &Root<PooledString>) -> bool {
73        match self {
74            PoolEntry::Rooted(this) => this.0 == *other,
75            PoolEntry::Weak(this, _) => this.0 == *other,
76        }
77    }
78}
79
80#[derive(Default)]
81struct StringPool {
82    allocator: LocalPool,
83    strings: HashTable<PoolEntry>,
84}
85
86impl StringPool {
87    fn global() -> &'static Mutex<StringPool> {
88        static POOL: OnceLock<Mutex<StringPool>> = OnceLock::new();
89        POOL.get_or_init(Mutex::default)
90    }
91
92    fn intern(&mut self, key: Cow<'_, str>, hash: u64, guard: &CollectionGuard) -> &RootString {
93        match self
94            .strings
95            .entry(hash, |a| a.equals(&key, guard), PoolEntry::hash)
96        {
97            hash_table::Entry::Occupied(entry) => {
98                let entry = entry.into_mut();
99                match entry {
100                    PoolEntry::Rooted(root) => root,
101                    PoolEntry::Weak(weak, _) => {
102                        if let Some(upgraded) = weak.as_root(guard) {
103                            *entry = PoolEntry::Rooted(upgraded);
104                        } else {
105                            *entry = PoolEntry::Rooted(RootString(Root::new(
106                                PooledString::new(hash, key),
107                                guard,
108                            )));
109                        }
110                        let PoolEntry::Rooted(entry) = entry else {
111                            unreachable!("just set")
112                        };
113                        entry
114                    }
115                }
116            }
117            hash_table::Entry::Vacant(entry) => {
118                let PoolEntry::Rooted(root) = entry
119                    .insert(PoolEntry::Rooted(RootString(Root::new(
120                        PooledString::new(hash, key),
121                        &self.allocator.enter(),
122                    ))))
123                    .into_mut()
124                else {
125                    unreachable!("just set")
126                };
127                root
128            }
129        }
130    }
131}
132
133fn hash_str(str: &str) -> u64 {
134    let mut hasher = AHasher::default();
135    str.hash(&mut hasher);
136    hasher.finish()
137}
138
139/// A "root" reference to a garbage collected, interned string.
140///
141/// This type is cheap to check equality because it ensures each unique string
142/// is allocated only once, and references are reused automatically.
143#[derive(Clone, Trace)]
144pub struct RootString(Root<PooledString>);
145
146impl RootString {
147    /// Returns a root reference to a garbage collected string that contains
148    /// `s`.
149    ///
150    /// If another [`RootString`] or [`RefString`] exists already with the same
151    /// contents as `s`, it will be returned and `s` will be dropped.
152    pub fn new<'a>(s: impl Into<Cow<'a, str>>, guard: &impl AsRef<CollectionGuard<'a>>) -> Self {
153        let s = s.into();
154        let hash = hash_str(&s);
155        let mut pool = StringPool::global().lock();
156        pool.intern(s, hash, guard.as_ref()).clone()
157    }
158
159    /// Returns a reference to this root string.
160    #[must_use]
161    pub const fn downgrade(&self) -> RefString {
162        RefString(self.0.downgrade())
163    }
164
165    /// Returns a typeless reference to this string.
166    #[must_use]
167    pub const fn downgrade_any(&self) -> AnyRef {
168        self.0.downgrade_any()
169    }
170
171    /// Returns the number of root references to this string, `self` included.
172    #[must_use]
173    pub fn root_count(&self) -> u64 {
174        // We subtract one because the string pool always contains a reference.
175        // Including it in the publicly viewable count would just lead to
176        // confusion, as it did for @ecton when writing the first doctest for
177        // this crate.
178        self.0.root_count() - 1
179    }
180
181    /// Try to convert a typeless root reference to a root string reference.
182    ///
183    /// # Errors
184    ///
185    /// Returns `Err(root)` if `root` does not contain a pooled string.
186    pub fn try_from_any<'a>(
187        root: AnyRoot,
188        guard: &impl AsRef<CollectionGuard<'a>>,
189    ) -> Result<Self, AnyRoot> {
190        Root::try_from_any(root, guard).map(Self)
191    }
192}
193
194impl Drop for RootString {
195    fn drop(&mut self) {
196        if self.0.root_count() == 2 {
197            // This is the last `RootString` aside from the one stored in the
198            // pool, so we should remove the pool entry.
199            let mut pool = StringPool::global().lock();
200            let entry = pool
201                .strings
202                .find_entry(self.0.hash, |s| s == &self.0)
203                .ok()
204                .map(|mut entry| {
205                    let PoolEntry::Rooted(root) = entry.get() else {
206                        return None;
207                    };
208                    let weak = root.downgrade();
209                    let hash = root.0.hash;
210                    Some(std::mem::replace(
211                        entry.get_mut(),
212                        PoolEntry::Weak(weak, hash),
213                    ))
214                });
215            drop(pool);
216            // We delay dropping the removed entry to ensure that we don't
217            // re-enter this block and cause a deadlock.
218            drop(entry);
219        }
220    }
221}
222
223impl Debug for RootString {
224    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
225        Debug::fmt(&self.0.string, f)
226    }
227}
228
229impl Display for RootString {
230    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231        Display::fmt(&self.0.string, f)
232    }
233}
234
235impl From<&'_ str> for RootString {
236    fn from(value: &'_ str) -> Self {
237        let guard = CollectionGuard::acquire();
238        Self::new(value, &guard)
239    }
240}
241
242impl From<&'_ String> for RootString {
243    fn from(value: &'_ String) -> Self {
244        let guard = CollectionGuard::acquire();
245        Self::new(value, &guard)
246    }
247}
248
249impl From<String> for RootString {
250    fn from(value: String) -> Self {
251        let guard = CollectionGuard::acquire();
252        Self::new(value, &guard)
253    }
254}
255
256impl Hash for RootString {
257    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
258        self.0.downgrade().hash(state);
259    }
260}
261
262impl Eq for RootString {}
263
264impl PartialEq for RootString {
265    fn eq(&self, other: &Self) -> bool {
266        Root::ptr_eq(&self.0, &other.0)
267    }
268}
269
270impl PartialEq<str> for RootString {
271    fn eq(&self, other: &str) -> bool {
272        &*self.0.string == other
273    }
274}
275
276impl PartialEq<&'_ str> for RootString {
277    fn eq(&self, other: &&'_ str) -> bool {
278        self == *other
279    }
280}
281
282impl PartialEq<String> for RootString {
283    fn eq(&self, other: &String) -> bool {
284        self == other.as_str()
285    }
286}
287
288impl PartialEq<&'_ String> for RootString {
289    fn eq(&self, other: &&'_ String) -> bool {
290        self == *other
291    }
292}
293
294impl Ord for RootString {
295    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
296        self.0.string.cmp(&other.0.string)
297    }
298}
299
300impl PartialOrd for RootString {
301    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
302        Some(self.cmp(other))
303    }
304}
305
306impl Deref for RootString {
307    type Target = str;
308
309    fn deref(&self) -> &Self::Target {
310        &self.0
311    }
312}
313
314#[derive(Eq, PartialEq, Debug)]
315struct PooledString {
316    hash: u64,
317    string: Box<str>,
318}
319
320impl PooledString {
321    fn new(hash: u64, s: Cow<'_, str>) -> Self {
322        Self {
323            hash,
324            string: match s {
325                Cow::Borrowed(str) => Box::from(str),
326                Cow::Owned(str) => str.into_boxed_str(),
327            },
328        }
329    }
330}
331
332impl SimpleType for PooledString {}
333
334impl Deref for PooledString {
335    type Target = str;
336
337    fn deref(&self) -> &Self::Target {
338        &self.string
339    }
340}
341
342/// A weak reference to a garbage collected, interned string.
343#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Trace)]
344pub struct RefString(Ref<PooledString>);
345
346impl RefString {
347    /// Returns a reference to a garbage collected string that contains `s`.
348    ///
349    /// If another [`RootString`] or [`RefString`] exists already with the same
350    /// contents as `s`, it will be returned and `s` will be dropped.
351    pub fn new<'a>(s: impl Into<Cow<'a, str>>) -> Self {
352        let s = s.into();
353        let hash = hash_str(&s);
354        let guard = CollectionGuard::acquire();
355        let mut pool = StringPool::global().lock();
356        pool.intern(s, hash, &guard).downgrade()
357    }
358
359    /// Upgrades a typeless reference to a pooled string reference.
360    ///
361    /// Returns `None` if `any` is not a pooled string.
362    pub fn from_any(any: AnyRef) -> Option<Self> {
363        any.downcast_checked().map(Self)
364    }
365
366    /// Loads a reference to the underlying string, if the string hasn't been
367    /// freed.
368    #[must_use]
369    pub fn load<'guard>(&self, guard: &'guard CollectionGuard) -> Option<&'guard str> {
370        self.0.load(guard).map(|pooled| &*pooled.string)
371    }
372
373    /// Loads this string as a root, if the string hasn't been freed.
374    #[must_use]
375    pub fn as_root(&self, guard: &CollectionGuard) -> Option<RootString> {
376        self.0.as_root(guard).map(RootString)
377    }
378
379    /// Returns a typeless reference to this string.
380    #[must_use]
381    pub fn as_any(&self) -> AnyRef {
382        self.0.as_any()
383    }
384}
385
386impl PartialEq<RootString> for RefString {
387    fn eq(&self, other: &RootString) -> bool {
388        self.0 == other.0.downgrade()
389    }
390}
391
392impl PartialEq<RefString> for RootString {
393    fn eq(&self, other: &RefString) -> bool {
394        *other == *self
395    }
396}
397
398impl From<&'_ str> for RefString {
399    fn from(value: &'_ str) -> Self {
400        Self::new(value)
401    }
402}
403
404impl From<&'_ String> for RefString {
405    fn from(value: &'_ String) -> Self {
406        Self::new(value)
407    }
408}
409
410impl From<String> for RefString {
411    fn from(value: String) -> Self {
412        Self::new(value)
413    }
414}
415
416impl Debug for RefString {
417    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
418        if let Some(guard) = CollectionGuard::try_acquire() {
419            if let Some(s) = self.load(&guard) {
420                Debug::fmt(s, f)
421            } else {
422                f.write_str("string freed")
423            }
424        } else {
425            f.write_str("RefString(<gc locked>)")
426        }
427    }
428}
429
430#[test]
431fn intern() {
432    let mut guard = CollectionGuard::acquire();
433    let a = RootString::from("a");
434    let b = RootString::from("a");
435    assert!(Root::ptr_eq(&a.0, &b.0));
436
437    let as_ref = a.downgrade();
438    drop(a);
439    drop(b);
440    assert_eq!(as_ref.load(&guard), Some("a"));
441
442    guard.collect();
443
444    let _a = RootString::from("a");
445    assert!(as_ref.load(&guard).is_none());
446}
447#[test]
448fn reintern_nocollect() {
449    let guard = CollectionGuard::acquire();
450    let a = RootString::from("reintern");
451    let original = a.downgrade();
452    drop(a);
453    let a = RootString::from("reintern");
454    assert_eq!(a.0, original.0);
455    drop(guard);
456}