simple_interner/
lib.rs

1//! A very simplistic interner based around giving out references rather than
2//! some placeholder symbol. This means that you can mostly transparently add
3//! interning into a system without requiring rewriting all of the code to work
4//! on a new `Symbol` type, asking the interener to concretize the symbols.
5//!
6//! The typical use case for something like this is text processing chunks,
7//! where chunks are very likely to be repeated. For example, when parsing
8//! source code, identifiers are likely to come up multiple times. Rather than
9//! have a `String` allocated for every occurrence of the identifier separately,
10//! interners allow you to store `Symbol`. This additionally allows comparing
11//! symbols to be much quicker than comparing the full interned string.
12//!
13//! This crate exists to give the option of using the simplest interface. For
14//! a more featureful interner, consider using a different crate, such as
15//!
16//! |             crate |    global   |  local | `'static` opt[^1] | `str`-only |  symbol size  | symbols deref |
17//! | ----------------: | :---------: | :----: | :---------------: | :--------: | :-----------: | :-----------: |
18//! |   simple-interner |  manual[^2] |   yes  |         no        |     no     |     `&T`      |      yes      |
19//! |        [intaglio] |     no      |   yes  |         yes       |     yes    |     `u32`     |      no       |
20//! |      [internment] |    rc[^3]   |   yes  |         no        |     no     |     `&T`      |      yes      |
21//! |           [lasso] |     no      |   yes  |         yes       |     yes    | `u8`–`usize`  |      no       |
22//! | [string-interner] |     no      |   yes  |     optionally    |     yes    | `u16`–`usize` |      no       |
23//! |    [string_cache] | static only | rc[^3] |     buildscript   |     yes    |     `u64`     |      yes      |
24//! |    [symbol_table] |     yes     |   yes  |         no        |     yes    |     `u32`     |  global only  |
25//! |            [ustr] |     yes     |   no   |         no        |     yes    |    `usize`    |      yes      |
26//!
27//! (PRs to this table are welcome!) <!-- crate must have seen activity in the last year -->
28//!
29//! [^1]: The interner stores `&'static` references without copying the pointee
30//!     into the store, e.g. storing `Cow<'static, str>` instead of `Box<str>`.
31//!
32//! [^2]: At the moment, creating the `Interner` inside a `static`, using
33//!     `Interner::with_hasher`, requires the `hashbrown` feature to be enabled.
34//!
35//! [^3]: Uses reference counting to collect globally unused symbols.
36//!
37//! [intaglio]: https://lib.rs/crates/intaglio
38//! [lasso]: https://lib.rs/crates/lasso
39//! [internment]: https://lib.rs/crates/internment
40//! [string-interner]: https://lib.rs/crates/string-interner
41//! [string_cache]: https://lib.rs/crates/string_cache
42//! [symbol_table]: https://lib.rs/crates/symbol_table
43//! [ustr]: https://lib.rs/crates/ustr
44
45#![forbid(unconditional_recursion, future_incompatible)]
46#![warn(unsafe_code, bad_style, missing_docs, missing_debug_implementations)]
47
48#[cfg(feature = "parking_lot")]
49mod parking_lot_shim;
50
51mod interner;
52pub use interner::Interner;
53
54mod interned;
55pub use interned::Interned;
56
57#[cfg(test)]
58mod tests {
59    use super::*;
60
61    #[test]
62    fn str_usage() {
63        // Create the interner
64        let interner = Interner::new();
65
66        // Intern some strings
67        let a1 = interner.intern(Box::<str>::from("a"));
68        let b1 = interner.intern(String::from("b"));
69        let c1 = interner.intern("c");
70
71        // Get the interned strings
72        let a2 = interner.intern("a");
73        let b2 = interner.intern("b");
74        let c2 = interner.intern("c");
75
76        let a3 = interner.get("a").unwrap();
77        let b3 = interner.get("b").unwrap();
78        let c3 = interner.get("c").unwrap();
79
80        // The same strings better be the same pointers or it's broken
81        assert_eq!(a1.as_ptr(), a2.as_ptr());
82        assert_eq!(a2.as_ptr(), a3.as_ptr());
83        assert_eq!(b1.as_ptr(), b2.as_ptr());
84        assert_eq!(b2.as_ptr(), b3.as_ptr());
85        assert_eq!(c1.as_ptr(), c2.as_ptr());
86        assert_eq!(c2.as_ptr(), c3.as_ptr());
87    }
88
89    #[test]
90    fn slice_usage() {
91        // Create the interner
92        let interner = Interner::new();
93
94        // Intern some strings
95        let a1 = interner.intern(Box::<[u8]>::from([0]));
96        let b1 = interner.intern(Vec::from([1]));
97        let c1 = interner.intern::<[u8; 1]>([2]);
98        let d1 = interner.intern::<&[u8]>(&[3][..]);
99
100        // Get the interned strings
101        let a2 = interner.intern([0]);
102        let b2 = interner.intern([1]);
103        let c2 = interner.intern([2]);
104        let d2 = interner.intern([3]);
105
106        let a3 = interner.get(&[0][..]).unwrap();
107        let b3 = interner.get(&[1]).unwrap();
108        let c3 = interner.get(&[2]).unwrap();
109        let d3 = interner.get(&[3]).unwrap();
110
111        // They better be the same or it's broken
112        assert_eq!(a1.as_ptr(), a2.as_ptr());
113        assert_eq!(a2.as_ptr(), a3.as_ptr());
114        assert_eq!(b1.as_ptr(), b2.as_ptr());
115        assert_eq!(b2.as_ptr(), b3.as_ptr());
116        assert_eq!(c1.as_ptr(), c2.as_ptr());
117        assert_eq!(c2.as_ptr(), c3.as_ptr());
118        assert_eq!(d1.as_ptr(), d2.as_ptr());
119        assert_eq!(d2.as_ptr(), d3.as_ptr());
120    }
121
122    #[cfg(feature = "hashbrown")]
123    #[test]
124    fn static_interner() {
125        use hash32::{BuildHasherDefault, FnvHasher};
126
127        static INTERNER: Interner<str, BuildHasherDefault<FnvHasher>> =
128            Interner::with_hasher(BuildHasherDefault::new());
129
130        let non_static_str = String::from("a");
131
132        let interned = INTERNER.intern(non_static_str);
133
134        let static_str: &'static str = Interned::get(&interned);
135
136        assert_eq!(static_str, "a");
137    }
138}