str_intern/
lib.rs

1#![cfg_attr(docs_rs, feature(doc_auto_cfg))]
2#![warn(missing_docs)]
3#![doc = include_str!("../README.md")]
4
5pub mod sync;
6
7use std::cmp::Ordering;
8use std::collections::HashSet;
9use std::collections::hash_map::RandomState;
10use std::collections::hash_set::{Iter as SetIter, IntoIter as SetIntoIter};
11use std::fmt::{self, Debug, Formatter};
12use std::hash::BuildHasher;
13use std::iter::{Sum, Product, FusedIterator};
14use std::rc::Rc;
15
16/**
17 * The type of strings that have been interned.
18 * 
19 * Currently just a type alias, but I might change that if I find a good reason.
20 */
21pub type InternedStr = Rc<str>;
22
23/**
24 * An interner will keep track of strings and ensure there is only one allocation for any given string contents.
25 * 
26 * For example:
27 * ```rust
28 * # use str_intern::{Interner, InternedStr};
29 * let mut interner = Interner::new();
30 * let foo0 = interner.intern(String::from("foo"));
31 * let foo1 = interner.intern(String::from("foo"));
32 * assert!(InternedStr::ptr_eq(&foo0, &foo1));
33 * ```
34 * Because `foo0` and `foo1` have the same contents, they become a single allocation.
35 * 
36 * Interned strings are immutable, which means that you must construct the finished string before interning it.
37 * 
38 * This is useful if you have many instances of the same strings
39 * (e.g., if 200 different structs contain the string `"foo"`, an interner allows there to be 200 pointers to one allocation, rather than 200 different allocations).
40 * 
41 * This `Interner` is not thread-safe (which is to say, it is implements neither [`Send`] nor [`Sync`]). For a thread-safe variant, see the [`sync`] module.
42 */
43#[repr(transparent)]
44pub struct Interner<S = RandomState> {
45  
46  strings: HashSet<InternedStr, S>
47  
48}
49
50impl Interner {
51  
52  /**
53   * Constructs a new `Interner`.
54   */
55  pub fn new() -> Self {
56    Self::from_set(HashSet::new())
57  }
58  
59}
60
61impl<S> Interner<S> {
62  
63  /**
64   * Constructs a new `Interner` with the given hasher. See [`BuildHasher`] for more information.
65   */
66  pub fn with_hasher(hasher: S) -> Self {
67    Self::from_set(HashSet::with_hasher(hasher))
68  }
69  
70  /**
71   * Construct a new `Interner` with the given set's contents already interned.
72   * The new `Interner` will also use the given set's hasher.
73   */
74  pub fn from_set(strings: HashSet<InternedStr, S>) -> Self {
75    Self { strings }
76  }
77  
78  /**
79   * Consume this `Interner` and return a set containing all of strings that were interned.
80   * The returned set also uses the same hasher.
81   */
82  pub fn into_set(self) -> HashSet<InternedStr, S> {
83    self.strings
84  }
85  
86  /**
87   * Removes all of the interned strings.
88   */
89  pub fn clear(&mut self) {
90    self.strings.clear();
91  }
92  
93  /**
94   * An iterator over all of the currently interned strings.
95   */
96  pub fn iter(&self) -> Iter {
97    Iter::new(self.strings.iter())
98  }
99  
100}
101
102impl<S: BuildHasher> Interner<S> {
103  
104  /**
105   * Saves the given string if it is not already saved, and returns a reference the saved allocation.
106   */
107  pub fn intern(&mut self, string: impl AsRef<str>) -> InternedStr {
108    // Sorrow abounds, for behold: HashSet::get_or_insert_with doesn't exist yet.
109    let string = string.as_ref();
110    match self.strings.get(string) {
111      Some(string) => string.clone(),
112      None => {
113        let string = InternedStr::from(string);
114        self.strings.insert(InternedStr::clone(&string));
115        string
116      }
117    }
118  }
119  
120  /**
121   * Returns whether the given string has already been saved.
122   */
123  pub fn contains(&self, string: impl AsRef<str>) -> bool {
124    self.strings.contains(string.as_ref())
125  }
126  
127  /**
128   * If the given string has already been saved, returns a reference to the saved allocation, or `None` otherwise.
129   */
130  pub fn get(&self, string: impl AsRef<str>) -> Option<InternedStr> {
131    self.strings.get(string.as_ref()).cloned()
132  }
133  
134}
135
136impl<S: Clone> Clone for Interner<S> {
137  
138  fn clone(&self) -> Self {
139    Interner { strings: self.strings.clone() }
140  }
141  
142  fn clone_from(&mut self, source: &Self) {
143    self.strings.clone_from(&source.strings)
144  }
145  
146}
147
148impl<S: BuildHasher> PartialEq for Interner<S> {
149  
150  fn eq(&self, other: &Self) -> bool {
151    self.strings.eq(&other.strings)
152  }
153  
154  fn ne(&self, other: &Self) -> bool {
155    self.strings.ne(&other.strings)
156  }
157  
158}
159
160impl<S: BuildHasher> Eq for Interner<S> {}
161
162impl<S> Debug for Interner<S> {
163  
164  fn fmt(&self, f: &mut Formatter) -> fmt::Result {
165    f.debug_tuple("Interner").field(&self.strings).finish()
166  }
167  
168}
169
170impl<S: Default> Default for Interner<S> {
171  
172  fn default() -> Self {
173    Self { strings: HashSet::default() }
174  }
175  
176}
177
178impl<S> IntoIterator for Interner<S> {
179  
180  type Item = InternedStr;
181  type IntoIter = IntoIter;
182  
183  fn into_iter(self) -> IntoIter {
184    IntoIter::new(self.strings.into_iter())
185  }
186  
187}
188
189impl<'a, S> IntoIterator for &'a Interner<S> {
190  
191  type Item = &'a InternedStr;
192  type IntoIter = Iter<'a>;
193  
194  fn into_iter(self) -> Iter<'a> {
195    Iter::new(self.strings.iter())
196  }
197  
198}
199
200impl<A, S> FromIterator<A> for Interner<S> where HashSet<InternedStr, S>: FromIterator<A> {
201  
202  fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
203    Self::from_set(HashSet::from_iter(iter))
204  }
205  
206}
207
208/**
209 * An iterator over the strings in an `Interner`.
210 * 
211 * This `struct` is created by the [`iter`](Interner::iter) method on `Interner`.
212 */
213#[repr(transparent)]
214#[derive(Clone, Debug)]
215pub struct Iter<'a> {
216  
217  iter: SetIter<'a, InternedStr>
218  
219}
220
221impl<'a> Iter<'a> {
222  
223  fn new(iter: SetIter<'a, InternedStr>) -> Self {
224    Self { iter }
225  }
226  
227}
228
229impl<'a> Iterator for Iter<'a> {
230  
231  type Item =  &'a InternedStr;
232  
233  fn next(&mut self) -> Option<Self::Item> {
234    self.iter.next()
235  }
236  
237  fn size_hint(&self) -> (usize, Option<usize>) {
238    self.iter.size_hint()
239  }
240  
241  fn count(self) -> usize {
242    self.iter.count()
243  }
244  
245  fn last(self) -> Option<Self::Item> {
246    self.iter.last()
247  }
248  
249  fn nth(&mut self, n: usize) -> Option<Self::Item> {
250    self.iter.nth(n)
251  }
252  
253  fn for_each<F: FnMut(Self::Item)>(self, f: F) {
254    self.iter.for_each(f)
255  }
256  
257  fn collect<B: FromIterator<Self::Item>>(self) -> B {
258    self.iter.collect()
259  }
260  
261  fn partition<B: Default + Extend<Self::Item>, F: FnMut(&Self::Item) -> bool>(self, f: F) -> (B, B) {
262    self.iter.partition(f)
263  }
264  
265  fn fold<B, F: FnMut(B, Self::Item) -> B>(self, init: B, f: F) -> B {
266    self.iter.fold(init, f)
267  }
268  
269  fn reduce<F: FnMut(Self::Item, Self::Item) -> Self::Item>(self, f: F) -> Option<Self::Item> {
270    self.iter.reduce(f)
271  }
272  
273  fn all<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
274    self.iter.all(f)
275  }
276  
277  fn any<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
278    self.iter.any(f)
279  }
280  
281  fn find<P: FnMut(&Self::Item) -> bool>(&mut self, predicate: P) -> Option<Self::Item> {
282    self.iter.find(predicate)
283  }
284  
285  fn find_map<B, F: FnMut(Self::Item) -> Option<B>>(&mut self, f: F) -> Option<B> {
286    self.iter.find_map(f)
287  }
288  
289  fn position<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
290    self.iter.position(predicate)
291  }
292  
293  fn max(self) -> Option<Self::Item> where Self::Item: Ord {
294    self.iter.max()
295  }
296  
297  fn min(self) -> Option<Self::Item> where Self::Item: Ord {
298    self.iter.min()
299  }
300  
301  fn max_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
302    self.iter.max_by_key(f)
303  }
304  
305  fn max_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
306    self.iter.max_by(compare)
307  }
308  
309  fn min_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
310    self.iter.min_by_key(f)
311  }
312  
313  fn min_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
314    self.iter.min_by(compare)
315  }
316  
317  fn sum<S: Sum<Self::Item>>(self) -> S {
318    self.iter.sum()
319  }
320  
321  fn product<P: Product<Self::Item>>(self) -> P {
322    self.iter.product()
323  }
324  
325  fn cmp<I: IntoIterator<Item = Self::Item>>(self, other: I) -> Ordering where Self::Item: Ord {
326    self.iter.cmp(other)
327  }
328  
329  fn partial_cmp<I: IntoIterator>(self, other: I) -> Option<Ordering> where Self::Item: PartialOrd<I::Item> {
330    self.iter.partial_cmp(other)
331  }
332  
333  fn eq<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
334    self.iter.eq(other)
335  }
336  
337  fn ne<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
338    self.iter.ne(other)
339  }
340  
341  fn lt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
342    self.iter.lt(other)
343  }
344  
345  fn le<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
346    self.iter.le(other)
347  }
348  
349  fn gt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
350    self.iter.gt(other)
351  }
352  
353  fn ge<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
354    self.iter.ge(other)
355  }
356  
357}
358
359impl<'a> ExactSizeIterator for Iter<'a> {
360  
361  fn len(&self) -> usize {
362    self.iter.len()
363  }
364  
365}
366
367impl<'a> FusedIterator for Iter<'a> {}
368
369/**
370 * An owning iterator over the strings that were in an `Interner`.
371 * 
372 * This `struct` is created by the [`into_iter`](IntoIterator::into_iter) method on [`Interner`]
373 * (provided by the [`IntoIterator`] trait).
374 */
375#[repr(transparent)]
376#[derive(Debug)]
377pub struct IntoIter {
378  
379  iter: SetIntoIter<InternedStr>
380  
381}
382
383impl IntoIter {
384  
385  fn new(iter: SetIntoIter<InternedStr>) -> Self {
386    Self { iter }
387  }
388  
389}
390
391impl Iterator for IntoIter {
392  
393  type Item = InternedStr;
394  
395  fn next(&mut self) -> Option<Self::Item> {
396    self.iter.next()
397  }
398  
399  fn size_hint(&self) -> (usize, Option<usize>) {
400    self.iter.size_hint()
401  }
402  
403  fn count(self) -> usize {
404    self.iter.count()
405  }
406  
407  fn last(self) -> Option<Self::Item> {
408    self.iter.last()
409  }
410  
411  fn nth(&mut self, n: usize) -> Option<Self::Item> {
412    self.iter.nth(n)
413  }
414  
415  fn for_each<F: FnMut(Self::Item)>(self, f: F) {
416    self.iter.for_each(f)
417  }
418  
419  fn collect<B: FromIterator<Self::Item>>(self) -> B {
420    self.iter.collect()
421  }
422  
423  fn partition<B: Default + Extend<Self::Item>, F: FnMut(&Self::Item) -> bool>(self, f: F) -> (B, B) {
424    self.iter.partition(f)
425  }
426  
427  fn fold<B, F: FnMut(B, Self::Item) -> B>(self, init: B, f: F) -> B {
428    self.iter.fold(init, f)
429  }
430  
431  fn reduce<F: FnMut(Self::Item, Self::Item) -> Self::Item>(self, f: F) -> Option<Self::Item> {
432    self.iter.reduce(f)
433  }
434  
435  fn all<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
436    self.iter.all(f)
437  }
438  
439  fn any<F: FnMut(Self::Item) -> bool>(&mut self, f: F) -> bool {
440    self.iter.any(f)
441  }
442  
443  fn find<P: FnMut(&Self::Item) -> bool>(&mut self, predicate: P) -> Option<Self::Item> {
444    self.iter.find(predicate)
445  }
446  
447  fn find_map<B, F: FnMut(Self::Item) -> Option<B>>(&mut self, f: F) -> Option<B> {
448    self.iter.find_map(f)
449  }
450  
451  fn position<P: FnMut(Self::Item) -> bool>(&mut self, predicate: P) -> Option<usize> {
452    self.iter.position(predicate)
453  }
454  
455  fn max(self) -> Option<Self::Item> where Self::Item: Ord {
456    self.iter.max()
457  }
458  
459  fn min(self) -> Option<Self::Item> where Self::Item: Ord {
460    self.iter.min()
461  }
462  
463  fn max_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
464    self.iter.max_by_key(f)
465  }
466  
467  fn max_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
468    self.iter.max_by(compare)
469  }
470  
471  fn min_by_key<B: Ord, F: FnMut(&Self::Item) -> B>(self, f: F) -> Option<Self::Item> {
472    self.iter.min_by_key(f)
473  }
474  
475  fn min_by<F: FnMut(&Self::Item, &Self::Item) -> Ordering>(self, compare: F) -> Option<Self::Item> {
476    self.iter.min_by(compare)
477  }
478  
479  fn sum<S: Sum<Self::Item>>(self) -> S {
480    self.iter.sum()
481  }
482  
483  fn product<P: Product<Self::Item>>(self) -> P {
484    self.iter.product()
485  }
486  
487  fn cmp<I: IntoIterator<Item = Self::Item>>(self, other: I) -> Ordering where Self::Item: Ord {
488    self.iter.cmp(other)
489  }
490  
491  fn partial_cmp<I: IntoIterator>(self, other: I) -> Option<Ordering> where Self::Item: PartialOrd<I::Item> {
492    self.iter.partial_cmp(other)
493  }
494  
495  fn eq<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
496    self.iter.eq(other)
497  }
498  
499  fn ne<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialEq<I::Item> {
500    self.iter.ne(other)
501  }
502  
503  fn lt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
504    self.iter.lt(other)
505  }
506  
507  fn le<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
508    self.iter.le(other)
509  }
510  
511  fn gt<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
512    self.iter.gt(other)
513  }
514  
515  fn ge<I: IntoIterator>(self, other: I) -> bool where Self::Item: PartialOrd<I::Item> {
516    self.iter.ge(other)
517  }
518  
519}
520
521impl ExactSizeIterator for IntoIter {
522  
523  fn len(&self) -> usize {
524    self.iter.len()
525  }
526  
527}
528
529impl FusedIterator for IntoIter {}