str_intern/
sync.rs

1/*!
2 * A thread-safe variant of the interner.
3 * Also provides a global interner (when the `global` feature is enabled), which comes with a free function `intern`, as well as an `intern` method for a few string types.
4 */
5
6use std::cmp::Ordering;
7use std::collections::HashSet;
8use std::collections::hash_map::RandomState;
9use std::collections::hash_set::{Iter as SetIter, IntoIter as SetIntoIter};
10use std::fmt::{self, Debug, Formatter};
11use std::hash::BuildHasher;
12use std::iter::{Sum, Product, FusedIterator};
13#[cfg(feature = "global")]
14use std::ops::Deref;
15use std::sync::{Arc, OnceLock, Mutex, MutexGuard};
16
17/**
18 * The type of strings that have been interned.
19 * 
20 * Currently just a type alias, but I might change that if I find a good reason.
21 */
22pub type InternedStr = Arc<str>;
23
24/**
25 * An interner will keep track of strings and ensure there is only one allocation for any given string contents.
26 * 
27 * For example:
28 * ```rust
29 * # use str_intern::sync::{Interner, InternedStr};
30 * let interner = Interner::new();
31 * let foo0 = interner.intern(String::from("foo"));
32 * let foo1 = interner.intern(String::from("foo"));
33 * assert!(InternedStr::ptr_eq(&foo0, &foo1));
34 * ```
35 * Because `foo0` and `foo1` have the same contents, they become a single allocation.
36 * 
37 * Interned strings are immutable, which means that you must construct the finished string before interning it.
38 * 
39 * This is useful if you have many instances of the same strings
40 * (e.g., if 200 different structs contain the string `"foo"`, an interner allows there to be 200 pointers to one allocation, rather than 200 different allocations).
41 * 
42 * This `Interner` is thread-safe, meaning that it implements both [`Send`] and [`Sync`] (when S implements [`Send`], which the default does).
43 */
44#[repr(transparent)]
45pub struct Interner<S = RandomState> {
46  
47  strings: Mutex<HashSet<InternedStr, S>>
48  
49}
50
51impl Interner {
52  
53  /**
54   * Constructs a new `Interner`.
55   */
56  pub fn new() -> Self {
57    Self::from_set(HashSet::new())
58  }
59  
60}
61
62impl<S> Interner<S> {
63  
64  const POISON_MESSAGE: &'static str = "Interner mutex was poisoned";
65  
66  /**
67   * Constructs a new `Interner` with the given hasher. See [`BuildHasher`] for more information.
68   */
69  pub fn with_hasher(hasher: S) -> Self {
70    Self::from_set(HashSet::with_hasher(hasher))
71  }
72  
73  /**
74   * Construct a new `Interner` with the given set's contents already interned.
75   * The new `Interner` will also use the given set's hasher.
76   */
77  pub fn from_set(strings: HashSet<InternedStr, S>) -> Self {
78    Self { strings: Mutex::new(strings) }
79  }
80  
81  /**
82   * Consume this `Interner` and return a set containing all of strings that were interned.
83   * The returned set also uses the same hasher.
84   * 
85   * # Panics
86   * This method panics if this `Interner` has been poisoned.
87   */
88  pub fn into_set(self) -> HashSet<InternedStr, S> {
89    self.strings.into_inner().expect(Self::POISON_MESSAGE)
90  }
91  
92  fn strings(&self) -> MutexGuard<HashSet<InternedStr, S>> {
93    self.strings.lock().expect(Self::POISON_MESSAGE)
94  }
95  
96  /**
97   * Locks this `Interner` and removes all of the interned strings, or blocks until it is able to do so.
98   * 
99   * `interner.clear()` is equivalent to `intenerer.lock().clear()`.
100   * (See [`LockedInterner::clear`].)
101   * 
102   * # Panics
103   * This method panics if this `Interner` has been poisoned, and it may panic if this `Interner` is already locked on this thread.
104   */
105  pub fn clear(&self) {
106    self.strings().clear();
107  }
108  
109  /**
110   * Locks this `Interner` on the current thread until the returned [`LockedInterner`] is dropped, or blocks until it is able to do so.
111   * 
112   * While it is locked, the current thread has exclusive access to this `Interner`'s methods
113   * (accessible from the [`LockedInterner`]; any methods used directly on `self` may panic).
114   * This enables some additional functionality, most notably [`LockedInterner::iter`].
115   * 
116   * If a panic occurs on the current thread while this `Interner` is locked, it will become [poisoned](https://doc.rust-lang.org/std/sync/struct.Mutex.html#poisoning).
117   * 
118   * # Panics
119   * This method panics if this `Interner` has been poisoned, and it may panic if this `Interner` is already locked on this thread.
120   */
121  pub fn lock(&self) -> LockedInterner<S> {
122    LockedInterner::new(self.strings())
123  }
124  
125}
126
127impl<S: BuildHasher> Interner<S> {
128  
129  /**
130   * Locks this `Interner`, saves the given string if it is not already saved, and returns a reference to the saved allocation, or blocks until it is able to do so.
131   * 
132   * `interner.intern(string)` is equivalent to `interner.lock().intern(string)`.
133   * (See [`LockedInterner::intern`].)
134   * 
135   * # Panics
136   * This method panics if this `Interner` has been poisoned, and it may panic if this `Interner` is already locked on this thread.
137   */
138  pub fn intern(&self, string: impl AsRef<str>) -> InternedStr where S: BuildHasher {
139    self.lock().intern(string)
140  }
141  
142  /**
143   * Returns whether the given string has already been saved, or blocks until it is able to do so.
144   */
145  pub fn contains(&self, string: impl AsRef<str>) -> bool {
146    self.lock().contains(string)
147  }
148  
149  /**
150   * If the given string has already been saved, returns a reference to the saved allocation, or `None` otherwise, or blocks until it is able to do so.
151   */
152  pub fn get(&self, string: impl AsRef<str>) -> Option<InternedStr> {
153    self.lock().get(string)
154  }
155  
156}
157
158impl<S: Clone> Clone for Interner<S> {
159  
160  fn clone(&self) -> Self {
161    Interner { strings: Mutex::new(self.strings().clone()) }
162  }
163  
164  fn clone_from(&mut self, source: &Self) {
165    self.strings().clone_from(&source.strings())
166  }
167  
168}
169
170impl<S: BuildHasher> PartialEq for Interner<S> {
171  
172  fn eq(&self, other: &Self) -> bool {
173    self.strings().eq(&other.strings())
174  }
175  
176  fn ne(&self, other: &Self) -> bool {
177    self.strings().ne(&other.strings())
178  }
179  
180}
181
182impl<S: BuildHasher> Eq for Interner<S> {}
183
184impl<S> Debug for Interner<S> {
185  
186  fn fmt(&self, f: &mut Formatter) -> fmt::Result {
187    f.debug_tuple("Interner").field(&self.strings()).finish()
188  }
189  
190}
191
192impl<S: Default> Default for Interner<S> {
193  
194  fn default() -> Self {
195    Self { strings: Mutex::default() }
196  }
197  
198}
199
200impl<S> IntoIterator for Interner<S> {
201  
202  type Item = InternedStr;
203  type IntoIter = IntoIter;
204  
205  fn into_iter(self) -> IntoIter {
206    IntoIter::new(self.into_set().into_iter())
207  }
208  
209}
210
211impl<A, S> FromIterator<A> for Interner<S> where HashSet<InternedStr, S>: FromIterator<A> {
212  
213  fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
214    Self::from_set(HashSet::from_iter(iter))
215  }
216  
217}
218
219/**
220 * A locked [`Interner`]. This `struct` is created by [`Interner::lock`]; see its documentation for more details.
221 */
222#[repr(transparent)]
223pub struct LockedInterner<'a, S = RandomState> {
224  
225  strings: MutexGuard<'a, HashSet<InternedStr, S>>
226  
227}
228
229impl<'a, S> LockedInterner<'a, S> {
230  
231  fn new(strings: MutexGuard<'a, HashSet<InternedStr, S>>) -> Self {
232    Self { strings }
233  }
234  
235  /**
236   * Removes all of the interned strings.
237   */
238  pub fn clear(&mut self) {
239    self.strings.clear();
240  }
241  
242  /**
243   * An iterator over all of the currently interned strings.
244   */
245  pub fn iter(&self) -> Iter {
246    Iter::new(self.strings.iter())
247  }
248  
249}
250
251impl<'a, S: BuildHasher> LockedInterner<'a, S> {
252  
253  /**
254   * Saves the given string if it is not already saved, and returns a reference to the saved allocation.
255   */
256  pub fn intern(&mut self, string: impl AsRef<str>) -> InternedStr {
257    // Sorrow abounds, for behold: HashSet::get_or_insert_with doesn't exist yet.
258    let string = string.as_ref();
259    match self.strings.get(string) {
260      Some(string) => string.clone(),
261      None => {
262        let string = InternedStr::from(string);
263        self.strings.insert(InternedStr::clone(&string));
264        string
265      }
266    }
267  }
268  
269  /**
270   * Returns whether the given string has already been saved.
271   */
272  pub fn contains(&self, string: impl AsRef<str>) -> bool {
273    self.strings.contains(string.as_ref())
274  }
275  
276  /**
277   * If the given string has already been saved, returns a reference to the saved allocation, or `None` otherwise.
278   */
279  pub fn get(&self, string: impl AsRef<str>) -> Option<InternedStr> {
280    self.strings.get(string.as_ref()).cloned()
281  }
282  
283}
284
285impl<'a, S: BuildHasher> PartialEq for LockedInterner<'a, S> {
286  
287  fn eq(&self, other: &Self) -> bool {
288    self.strings.eq(&other.strings)
289  }
290  
291  fn ne(&self, other: &Self) -> bool {
292    self.strings.ne(&other.strings)
293  }
294  
295}
296
297impl<'a, S: BuildHasher> Eq for LockedInterner<'a, S> {}
298
299impl<'a, S> Debug for LockedInterner<'a, S> {
300  
301  fn fmt(&self, f: &mut Formatter) -> fmt::Result {
302    f.debug_tuple("Interner").field(&self.strings).finish()
303  }
304  
305}
306
307impl<'a, 'b, S> IntoIterator for &'b LockedInterner<'a, S> {
308  
309  type Item = &'b InternedStr;
310  type IntoIter = Iter<'b>;
311  
312  fn into_iter(self) -> Iter<'b> {
313    Iter::new(self.strings.iter())
314  }
315  
316}
317
318/**
319 * An iterator over the strings in a `LockedInterner`.
320 * 
321 * This `struct` is created by the [`iter`](LockedInterner::iter) method on `LockedInterner`.
322 */
323#[repr(transparent)]
324#[derive(Clone, Debug)]
325pub struct Iter<'a> {
326  
327  iter: SetIter<'a, InternedStr>
328  
329}
330
331impl<'a> Iter<'a> {
332  
333  fn new(iter: SetIter<'a, InternedStr>) -> Self {
334    Self { iter }
335  }
336  
337}
338
339impl<'a> Iterator for Iter<'a> {
340  
341  type Item =  &'a InternedStr;
342  
343  fn next(&mut self) -> Option<Self::Item> {
344    self.iter.next()
345  }
346  
347  fn size_hint(&self) -> (usize, Option<usize>) {
348    self.iter.size_hint()
349  }
350  
351  fn count(self) -> usize {
352    self.iter.count()
353  }
354  
355  fn last(self) -> Option<Self::Item> {
356    self.iter.last()
357  }
358  
359  fn nth(&mut self, n: usize) -> Option<Self::Item> {
360    self.iter.nth(n)
361  }
362  
363  fn for_each<F: FnMut(Self::Item)>(self, f: F) {
364    self.iter.for_each(f)
365  }
366  
367  fn collect<B: FromIterator<Self::Item>>(self) -> B {
368    self.iter.collect()
369  }
370  
371  fn partition<B: Default + Extend<Self::Item>, F: FnMut(&Self::Item) -> bool>(self, f: F) -> (B, B) {
372    self.iter.partition(f)
373  }
374  
375  fn fold<B, F: FnMut(B, Self::Item) -> B>(self, init: B, f: F) -> B {
376    self.iter.fold(init, f)
377  }
378  
379  fn reduce<F: FnMut(Self::Item, Self::Item) -> Self::Item>(self, f: F) -> Option<Self::Item> {
380    self.iter.reduce(f)
381  }
382  
383  fn all<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
384    self.iter.all(f)
385  }
386  
387  fn any<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
388    self.iter.any(f)
389  }
390  
391  fn find<P: FnMut(&Self::Item) -> bool>(&mut self, predicate: P) -> Option<Self::Item> {
392    self.iter.find(predicate)
393  }
394  
395  fn find_map<B, F: FnMut(Self::Item) -> Option<B>>(&mut self, f: F) -> Option<B> {
396    self.iter.find_map(f)
397  }
398  
399  fn position<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
400    self.iter.position(predicate)
401  }
402  
403  fn max(self) -> Option<Self::Item> where Self::Item: Ord {
404    self.iter.max()
405  }
406  
407  fn min(self) -> Option<Self::Item> where Self::Item: Ord {
408    self.iter.min()
409  }
410  
411  fn max_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
412    self.iter.max_by_key(f)
413  }
414  
415  fn max_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
416    self.iter.max_by(compare)
417  }
418  
419  fn min_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
420    self.iter.min_by_key(f)
421  }
422  
423  fn min_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
424    self.iter.min_by(compare)
425  }
426  
427  fn sum<S: Sum<Self::Item>>(self) -> S {
428    self.iter.sum()
429  }
430  
431  fn product<P: Product<Self::Item>>(self) -> P {
432    self.iter.product()
433  }
434  
435  fn cmp<I: IntoIterator<Item = Self::Item>>(self, other: I) -> Ordering where Self::Item: Ord {
436    self.iter.cmp(other)
437  }
438  
439  fn partial_cmp<I: IntoIterator>(self, other: I) -> Option<Ordering> where Self::Item: PartialOrd<I::Item> {
440    self.iter.partial_cmp(other)
441  }
442  
443  fn eq<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
444    self.iter.eq(other)
445  }
446  
447  fn ne<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
448    self.iter.ne(other)
449  }
450  
451  fn lt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
452    self.iter.lt(other)
453  }
454  
455  fn le<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
456    self.iter.le(other)
457  }
458  
459  fn gt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
460    self.iter.gt(other)
461  }
462  
463  fn ge<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
464    self.iter.ge(other)
465  }
466  
467}
468
469impl<'a> ExactSizeIterator for Iter<'a> {
470  
471  fn len(&self) -> usize {
472    self.iter.len()
473  }
474  
475}
476
477impl<'a> FusedIterator for Iter<'a> {}
478
479/**
480 * An owning iterator over the strings that were in an `Interner`.
481 * 
482 * This `struct` is created by the [`into_iter`](IntoIterator::into_iter) method on [`Interner`]
483 * (provided by the [`IntoIterator`] trait).
484 */
485#[repr(transparent)]
486#[derive(Debug)]
487pub struct IntoIter {
488  
489  iter: SetIntoIter<InternedStr>
490  
491}
492
493impl IntoIter {
494  
495  fn new(iter: SetIntoIter<InternedStr>) -> Self {
496    Self { iter }
497  }
498  
499}
500
501impl Iterator for IntoIter {
502  
503  type Item = InternedStr;
504  
505  fn next(&mut self) -> Option<Self::Item> {
506    self.iter.next()
507  }
508  
509  fn size_hint(&self) -> (usize, Option<usize>) {
510    self.iter.size_hint()
511  }
512  
513  fn count(self) -> usize {
514    self.iter.count()
515  }
516  
517  fn last(self) -> Option<Self::Item> {
518    self.iter.last()
519  }
520  
521  fn nth(&mut self, n: usize) -> Option<Self::Item> {
522    self.iter.nth(n)
523  }
524  
525  fn for_each<F: FnMut(Self::Item)>(self, f: F) {
526    self.iter.for_each(f)
527  }
528  
529  fn collect<B: FromIterator<Self::Item>>(self) -> B {
530    self.iter.collect()
531  }
532  
533  fn partition<B: Default + Extend<Self::Item>, F: FnMut(&Self::Item) -> bool>(self, f: F) -> (B, B) {
534    self.iter.partition(f)
535  }
536  
537  fn fold<B, F: FnMut(B, Self::Item) -> B>(self, init: B, f: F) -> B {
538    self.iter.fold(init, f)
539  }
540  
541  fn reduce<F: FnMut(Self::Item, Self::Item) -> Self::Item>(self, f: F) -> Option<Self::Item> {
542    self.iter.reduce(f)
543  }
544  
545  fn all<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
546    self.iter.all(f)
547  }
548  
549  fn any<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
550    self.iter.any(f)
551  }
552  
553  fn find<P: FnMut(&Self::Item) -> bool>(&mut self, predicate: P) -> Option<Self::Item> {
554    self.iter.find(predicate)
555  }
556  
557  fn find_map<B, F: FnMut(Self::Item) -> Option<B>>(&mut self, f: F) -> Option<B> {
558    self.iter.find_map(f)
559  }
560  
561  fn position<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
562    self.iter.position(predicate)
563  }
564  
565  fn max(self) -> Option<Self::Item> where Self::Item: Ord {
566    self.iter.max()
567  }
568  
569  fn min(self) -> Option<Self::Item> where Self::Item: Ord {
570    self.iter.min()
571  }
572  
573  fn max_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
574    self.iter.max_by_key(f)
575  }
576  
577  fn max_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
578    self.iter.max_by(compare)
579  }
580  
581  fn min_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
582    self.iter.min_by_key(f)
583  }
584  
585  fn min_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
586    self.iter.min_by(compare)
587  }
588  
589  fn sum<S: Sum<Self::Item>>(self) -> S {
590    self.iter.sum()
591  }
592  
593  fn product<P: Product<Self::Item>>(self) -> P {
594    self.iter.product()
595  }
596  
597  fn cmp<I: IntoIterator<Item = Self::Item>>(self, other: I) -> Ordering where Self::Item: Ord {
598    self.iter.cmp(other)
599  }
600  
601  fn partial_cmp<I: IntoIterator>(self, other: I) -> Option<Ordering> where Self::Item: PartialOrd<I::Item> {
602    self.iter.partial_cmp(other)
603  }
604  
605  fn eq<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
606    self.iter.eq(other)
607  }
608  
609  fn ne<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
610    self.iter.ne(other)
611  }
612  
613  fn lt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
614    self.iter.lt(other)
615  }
616  
617  fn le<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
618    self.iter.le(other)
619  }
620  
621  fn gt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
622    self.iter.gt(other)
623  }
624  
625  fn ge<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
626    self.iter.ge(other)
627  }
628  
629}
630
631impl ExactSizeIterator for IntoIter {
632  
633  fn len(&self) -> usize {
634    self.iter.len()
635  }
636  
637}
638
639impl FusedIterator for IntoIter {}
640
641#[cfg(feature = "global")]
642static GLOBAL: OnceLock<Interner> = OnceLock::new();
643
644/**
645 * A global [`Interner`], just for convenience.
646 * 
647 * `GlobalInterner` functions just like any other `Interner`,
648 * so a string interned in another interner will not be automatically interned into this one.
649 * 
650 * For most purposes, [`intern`] will be sufficient.
651 */
652#[cfg(feature = "global")]
653pub struct GlobalInterner;
654
655#[cfg(feature = "global")]
656impl Deref for GlobalInterner {
657  
658  type Target = Interner;
659  
660  fn deref(&self) -> &Interner {
661    GLOBAL.get_or_init(Interner::new)
662  }
663  
664}
665
666/**
667 * Locks the [`GlobalInterner`], saves the given string if it is not already saved, and returns the saved string, or blocks until it is able to do so.
668 * 
669 * `intern(string)` is equivalent to `GlobalInterner.intern(string)`, which is transitively equivalent to `GlobalInterner.lock().intern(string)`.
670 * (See [`Interner::intern`] and [`LockedInterner::intern`].)
671 * 
672 * # Panics
673 * This method panics if the [`GlobalInterner`] has been poisoned, and it may panic if the [`GlobalInterner`] is already locked on this thread.
674 */
675#[cfg(feature = "global")]
676#[inline]
677pub fn intern(string: impl AsRef<str>) -> InternedStr {
678  GlobalInterner.intern(string)
679}
680
681/**
682 * An "extension trait" to add a the [`intern`](InternExt::intern) method to [`str`],
683 * which effectively adds it to all types that directly or transitively implement [`Deref<Target = str>`](std::ops::Deref),
684 * which includes [`String`], references, and  smart pointers to [`str`] or [`String`].
685 * 
686 * Ideally, I would like to ban [`Rc`](std::rc::Rc), but that would require auto traits or negative `impl`s or something.
687 * My reasoning for this is that I suspect it will be a bit of a footgun,
688 * or at least an unintuitive behavior if [`Rc`](std::rc::Rc) becomes an [`Arc`] when it gets interned.
689 */
690#[cfg(feature = "global")]
691pub trait InternExt: AsRef<str> {
692  
693  /**
694   * Equivalent to `intern(self)`.
695   * 
696   * See [`intern`].
697   */
698  #[inline]
699  fn intern(&self) -> InternedStr {
700    intern(self)
701  }
702  
703}
704
705#[cfg(feature = "global")]
706impl InternExt for str {}