string_hash_interner/
lib.rs

1#![no_std]
2#![warn(unsafe_op_in_unsafe_fn, clippy::redundant_closure_for_method_calls)]
3
4//! Caches strings efficiently, with minimal memory footprint and associates them with unique symbols.
5//! These symbols allow constant time comparisons and look-ups to the underlying interned strings.
6//!
7//! ### Example: Interning & Symbols
8//!
9//! ```
10//! // An interner with default symbol type and hasher
11//! use string_hash_interner::DefaultStringInterner;
12//!
13//! let mut interner = DefaultStringInterner::default();
14//! let sym0 = interner.intern("Elephant");
15//! let sym1 = interner.intern("Tiger");
16//! let sym2 = interner.intern("Horse");
17//! let sym3 = interner.intern("Tiger");
18//! assert_ne!(sym0, sym1);
19//! assert_ne!(sym0, sym2);
20//! assert_ne!(sym1, sym2);
21//! assert_eq!(sym1, sym3); // same!
22//! ```
23//!
24//! ### Example: Creation by `FromIterator`
25//!
26//! ```
27//! # use string_hash_interner::DefaultStringInterner;
28//! let interner = ["Elephant", "Tiger", "Horse", "Tiger"]
29//!     .into_iter()
30//!     .collect::<DefaultStringInterner>();
31//! ```
32//!
33//! ### Example: Look-up
34//!
35//! ```
36//! # use string_hash_interner::DefaultStringInterner;
37//! let mut interner = DefaultStringInterner::default();
38//! let sym = interner.intern("Banana");
39//! assert_eq!(interner.resolve(sym), Some("Banana"));
40//! ```
41//!
42//! ### Example: Iteration
43//!
44//! ```
45//! # use string_hash_interner::{DefaultStringInterner, Symbol};
46//! let interner = DefaultStringInterner::from_iter(["Earth", "Water", "Fire", "Air"]);
47//! for (sym, str) in &interner {
48//!     println!("{} = {}", sym.to_usize(), str);
49//! }
50//! ```
51//!
52//! ### Example: Use different symbols and hashers
53//!
54//! ```
55//! # use string_hash_interner::StringInterner;
56//! use string_hash_interner::symbol::SymbolU16;
57//! use fxhash::FxBuildHasher;
58//! let mut interner = StringInterner::<SymbolU16, FxBuildHasher>::new();
59//! let sym = interner.intern("Fire Fox");
60//! assert_eq!(interner.resolve(sym), Some("Fire Fox"));
61//! assert_eq!(size_of_val(&sym), 2);
62//! ```
63//!
64//! ### Example: Intern different types of strings
65//!
66//! ```
67//! use string_hash_interner::Interner;
68//! use std::ffi::CStr;
69//!
70//! let strings = <Interner<CStr>>::from_iter([c"Earth", c"Water", c"Fire", c"Air"]);
71//!
72//! for (_sym, str) in &strings {
73//!     println!("This is a C string: {:?}", str);
74//! }
75//! ```
76//!
77//! ### Example: Use cached hashes for faster hashmap lookups
78//!
79//! ```
80//! # use string_hash_interner::DefaultStringInterner;
81//! # use string_hash_interner::DefaultHashBuilder;
82//! # use hashbrown::hash_map::RawEntryMut;
83//! // `DefaultHashBuilder` uses random state, so we need to use
84//! // the same instance in order for hashes to match.
85//! let build_hasher = DefaultHashBuilder::default();
86//!
87//! let mut hashmap = hashbrown::HashMap::with_hasher(build_hasher);
88//! hashmap.extend([("Earth", 1), ("Water", 2), ("Fire", 3), ("Air", 4)]);
89//!
90//! let mut interner = DefaultStringInterner::with_hasher(build_hasher);
91//! let sym = interner.intern("Water");
92//!
93//! // Now, if we need to lookup the entry in the hashmap and we
94//! // only have the symbol, we don't need to recompute the hash.
95//!
96//! let string = interner.resolve(sym).unwrap();
97//! let hash = interner.get_hash(sym).unwrap();
98//!
99//! let (k, v) = hashmap
100//!     .raw_entry()
101//!     .from_key_hashed_nocheck(hash, string)
102//!     .unwrap();
103//!
104//! assert_eq!(*k, "Water");
105//! assert_eq!(*v, 2)
106//! ```
107//!
108//! ### Example: Hashmap with only interned strings
109//!
110//! ```
111//! # use string_hash_interner::symbol::DefaultSymbol;
112//! # use string_hash_interner::DefaultStringInterner;
113//! # use hashbrown::hash_map::RawEntryMut;
114//! let mut interner = DefaultStringInterner::default();
115//!
116//! let symbols = ["Earth", "Water", "Fire", "Air", "Air", "Water"].map(|s| interner.intern(s));
117//!
118//! // Now, using symbols we can fill the hashmap without ever recomputing hashes.
119//!
120//! // Use `()` as a hasher, as we'll be using cached hashes.
121//! let mut counts = hashbrown::HashMap::<DefaultSymbol, usize, ()>::default();
122//!
123//! for symbol in symbols {
124//!     // SAFETY: we now these symbols are coming from this interner
125//!     let hash = unsafe { interner.get_hash_unchecked(symbol) };
126//!     let hasher = |sym: &DefaultSymbol| unsafe { interner.get_hash_unchecked(*sym) };
127//!
128//!     match counts.raw_entry_mut().from_key_hashed_nocheck(hash, &symbol) {
129//!         RawEntryMut::Occupied(mut entry) => {
130//!             *entry.get_mut() += 1;
131//!         }
132//!         RawEntryMut::Vacant(entry) => {
133//!             entry.insert_with_hasher(hash, symbol, 1, hasher);
134//!         }
135//!     }
136//! }
137//!
138//! for (sym, count) in &counts {
139//!     println!("{:?} appeared {} times", interner.resolve(*sym).unwrap(), count);
140//! }
141//! ```
142//!
143
144extern crate alloc;
145#[cfg(feature = "std")]
146extern crate std;
147
148#[cfg(feature = "serde")]
149mod serde_impl;
150
151mod backend;
152mod intern;
153mod interner;
154pub mod symbol;
155
156#[doc(inline)]
157pub use self::{
158    backend::{Iter, IterWithHashes},
159    intern::Intern,
160    interner::Interner,
161    symbol::{DefaultSymbol, Symbol},
162};
163
164#[doc(inline)]
165pub use hashbrown::DefaultHashBuilder;
166
167/// [`Interner`] for [`str`]'s.
168pub type StringInterner<S = DefaultSymbol, H = DefaultHashBuilder> = Interner<str, S, H>;
169
170/// [`StringInterner`] with default Symbol and Hasher.
171pub type DefaultStringInterner = StringInterner;