simple_interner/lib.rs
1//! A very simplistic interner based around giving out references rather than
2//! some placeholder symbol. This means that you can mostly transparently add
3//! interning into a system without requiring rewriting all of the code to work
4//! on a new `Symbol` type, asking the interener to concretize the symbols.
5//!
6//! The typical use case for something like this is text processing chunks,
7//! where chunks are very likely to be repeated. For example, when parsing
8//! source code, identifiers are likely to come up multiple times. Rather than
9//! have a `String` allocated for every occurrence of the identifier separately,
10//! interners allow you to store `Symbol`. This additionally allows comparing
11//! symbols to be much quicker than comparing the full interned string.
12//!
13//! This crate exists to give the option of using the simplest interface. For
14//! a more featureful interner, consider using a different crate, such as
15//!
16//! | crate | global | local | `'static` opt[^1] | `str`-only | symbol size | symbols deref |
17//! | ----------------: | :---------: | :----: | :---------------: | :--------: | :-----------: | :-----------: |
18//! | simple-interner | manual[^2] | yes | no | no | `&T` | yes |
19//! | [intaglio] | no | yes | yes | yes | `u32` | no |
20//! | [internment] | rc[^3] | yes | no | no | `&T` | yes |
21//! | [lasso] | no | yes | yes | yes | `u8`–`usize` | no |
22//! | [string-interner] | no | yes | optionally | yes | `u16`–`usize` | no |
23//! | [string_cache] | static only | rc[^3] | buildscript | yes | `u64` | yes |
24//! | [symbol_table] | yes | yes | no | yes | `u32` | global only |
25//! | [ustr] | yes | no | no | yes | `usize` | yes |
26//!
27//! (PRs to this table are welcome!) <!-- crate must have seen activity in the last year -->
28//!
29//! [^1]: The interner stores `&'static` references without copying the pointee
30//! into the store, e.g. storing `Cow<'static, str>` instead of `Box<str>`.
31//!
32//! [^2]: At the moment, creating the `Interner` inside a `static`, using
33//! `Interner::with_hasher`, requires the `hashbrown` feature to be enabled.
34//!
35//! [^3]: Uses reference counting to collect globally unused symbols.
36//!
37//! [intaglio]: https://lib.rs/crates/intaglio
38//! [lasso]: https://lib.rs/crates/lasso
39//! [internment]: https://lib.rs/crates/internment
40//! [string-interner]: https://lib.rs/crates/string-interner
41//! [string_cache]: https://lib.rs/crates/string_cache
42//! [symbol_table]: https://lib.rs/crates/symbol_table
43//! [ustr]: https://lib.rs/crates/ustr
44
45#![forbid(unconditional_recursion, future_incompatible)]
46#![warn(unsafe_code, bad_style, missing_docs, missing_debug_implementations)]
47
48#[cfg(feature = "parking_lot")]
49mod parking_lot_shim;
50
51mod interner;
52pub use interner::Interner;
53
54mod interned;
55pub use interned::Interned;
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60
61 #[test]
62 fn str_usage() {
63 // Create the interner
64 let interner = Interner::new();
65
66 // Intern some strings
67 let a1 = interner.intern(Box::<str>::from("a"));
68 let b1 = interner.intern(String::from("b"));
69 let c1 = interner.intern("c");
70
71 // Get the interned strings
72 let a2 = interner.intern("a");
73 let b2 = interner.intern("b");
74 let c2 = interner.intern("c");
75
76 let a3 = interner.get("a").unwrap();
77 let b3 = interner.get("b").unwrap();
78 let c3 = interner.get("c").unwrap();
79
80 // The same strings better be the same pointers or it's broken
81 assert_eq!(a1.as_ptr(), a2.as_ptr());
82 assert_eq!(a2.as_ptr(), a3.as_ptr());
83 assert_eq!(b1.as_ptr(), b2.as_ptr());
84 assert_eq!(b2.as_ptr(), b3.as_ptr());
85 assert_eq!(c1.as_ptr(), c2.as_ptr());
86 assert_eq!(c2.as_ptr(), c3.as_ptr());
87 }
88
89 #[test]
90 fn slice_usage() {
91 // Create the interner
92 let interner = Interner::new();
93
94 // Intern some strings
95 let a1 = interner.intern(Box::<[u8]>::from([0]));
96 let b1 = interner.intern(Vec::from([1]));
97 let c1 = interner.intern::<[u8; 1]>([2]);
98 let d1 = interner.intern::<&[u8]>(&[3][..]);
99
100 // Get the interned strings
101 let a2 = interner.intern([0]);
102 let b2 = interner.intern([1]);
103 let c2 = interner.intern([2]);
104 let d2 = interner.intern([3]);
105
106 let a3 = interner.get(&[0][..]).unwrap();
107 let b3 = interner.get(&[1]).unwrap();
108 let c3 = interner.get(&[2]).unwrap();
109 let d3 = interner.get(&[3]).unwrap();
110
111 // They better be the same or it's broken
112 assert_eq!(a1.as_ptr(), a2.as_ptr());
113 assert_eq!(a2.as_ptr(), a3.as_ptr());
114 assert_eq!(b1.as_ptr(), b2.as_ptr());
115 assert_eq!(b2.as_ptr(), b3.as_ptr());
116 assert_eq!(c1.as_ptr(), c2.as_ptr());
117 assert_eq!(c2.as_ptr(), c3.as_ptr());
118 assert_eq!(d1.as_ptr(), d2.as_ptr());
119 assert_eq!(d2.as_ptr(), d3.as_ptr());
120 }
121
122 #[cfg(feature = "hashbrown")]
123 #[test]
124 fn static_interner() {
125 use hash32::{BuildHasherDefault, FnvHasher};
126
127 static INTERNER: Interner<str, BuildHasherDefault<FnvHasher>> =
128 Interner::with_hasher(BuildHasherDefault::new());
129
130 let non_static_str = String::from("a");
131
132 let interned = INTERNER.intern(non_static_str);
133
134 let static_str: &'static str = Interned::get(&interned);
135
136 assert_eq!(static_str, "a");
137 }
138}