interns/lib.rs
1/* Copyright (C) 2025 Saúl Valdelvira
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, version 3.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program. If not, see <https://www.gnu.org/licenses/>. */
14
15//! An object interner library
16//!
17//! The main element of this crate is the [`Interner`] struct.
18//!
19//! It allows to build a "storage" of any kind of object that
20//! avoids repetition and memory waste.
21//!
22//! # Example (String interner)
23//! ```
24//! use interns::*;
25//!
26//! let mut interner = Interner::<str>::default();
27//!
28//! let a = interner.get_or_intern("hello");
29//! let b = interner.get_or_intern("world");
30//! let c = interner.get_or_intern("hello");
31//!
32//! let a_resolv = interner.resolve(a);
33//! let b_resolv = interner.resolve(b);
34//! let c_resolv = interner.resolve(c);
35//!
36//! assert_eq!(a_resolv, Some("hello"));
37//! assert_eq!(b_resolv, Some("world"));
38//! assert_eq!(c_resolv, Some("hello"));
39//!
40//! assert_eq!(a, c);
41//! assert_ne!(a, b);
42//! assert_ne!(b, c);
43//! ```
44
45use hashbrown::hash_map::RawEntryMut;
46use hashbrown::HashMap;
47use core::borrow::Borrow;
48use std::hash::{BuildHasher, Hash, RandomState};
49
50pub mod backend;
51pub use backend::{Backend, DefaultBackendBuilder, StringBackend};
52
53use crate::backend::Internable;
54
55pub type Symbol<T, B = <T as DefaultBackendBuilder>::Backend> = <B as Backend<T>>::Symbol;
56
57pub type StringInterner = Interner<str,StringBackend>;
58
59/// Interner
60///
61/// This struct is responsible for tracking objects and
62/// interning them.
63///
64/// # Example
65/// ```
66/// use interns::*;
67///
68/// let mut interner = Interner::<str>::default();
69///
70/// let a = interner.get_or_intern("hello");
71/// let b = interner.get_or_intern("world");
72/// let c = interner.get_or_intern("hello");
73///
74/// let a_resolv = interner.resolve(a);
75/// let b_resolv = interner.resolve(b);
76/// let c_resolv = interner.resolve(c);
77///
78/// assert_eq!(a_resolv, Some("hello"));
79/// assert_eq!(b_resolv, Some("world"));
80/// assert_eq!(c_resolv, Some("hello"));
81///
82/// assert_eq!(a, c);
83/// assert_ne!(a, b);
84/// assert_ne!(b, c);
85/// ```
86pub struct Interner<
87 T,
88 B = <T as DefaultBackendBuilder>::Backend,
89 H = RandomState
90>
91where
92 T: Hash + Eq + PartialEq + ?Sized,
93 H: BuildHasher,
94 B: Backend<T>,
95{
96 backend: B,
97 set: HashMap<B::Symbol, (), ()>,
98 hasher: H,
99}
100
101impl<T, B, H> Interner<T, B, H>
102where
103 T: Hash + Eq + PartialEq + ?Sized,
104 H: BuildHasher,
105 B: Backend<T>,
106{
107 /// Create a new Interner with a default [backend](Backend)
108 /// and [hasher](BuildHasher)
109 pub fn new() -> Self
110 where
111 B: Default,
112 H: Default,
113 {
114 Self {
115 backend: B::default(),
116 set: HashMap::default(),
117 hasher: H::default(),
118 }
119 }
120
121 /// Create a new Interner with a default [backend](Backend) and
122 /// the given [hasher](BuildHasher)
123 pub fn with_hasher(hasher: H) -> Self
124 where
125 B: Default,
126 {
127 Self {
128 backend: B::default(),
129 set: HashMap::default(),
130 hasher,
131 }
132 }
133
134 /// Create a new Interner with a default [hasher](BuildHasher) and
135 /// the given [backend](Backend)
136 pub fn with_backend(backend: B) -> Self
137 where
138 H: Default,
139 {
140 Self {
141 backend,
142 set: HashMap::default(),
143 hasher: H::default(),
144 }
145 }
146
147 /// Create a new Interner with the given [backend](Backend)
148 /// and [hasher](BuildHasher)
149 pub const fn with_backend_and_hasher(backend: B, hasher: H) -> Self {
150 Self {
151 backend,
152 hasher,
153 set: HashMap::with_hasher(()),
154 }
155 }
156
157 /// Gets the [Symbol](Backend::Symbol) for `src`, interning it if it doesn't exist.
158 ///
159 /// # Example
160 /// ```
161 /// use interns::Interner;
162 ///
163 /// let mut interner = Interner::<str>::new();
164 /// let name = interner.get_or_intern("Abcd");
165 /// let name_again = interner.get_or_intern("Abcd");
166 /// assert_eq!(name, name_again);
167 /// ```
168 pub fn get_or_intern<Ref>(&mut self, src: &Ref) -> B::Symbol
169 where
170 Ref: Internable<T, B> + ?Sized + Hash + Eq,
171 T: Borrow<Ref>,
172 {
173 /* We are doing shenanigans here.
174 *
175 * We are storing B::Symbol as the key, but we don't hash the
176 * Symbol itself. `src` is a reference to T, so we have no way
177 * of getting a Symbol from `src`.
178 *
179 * When we look for a symbol, we pass the hash or `src`, and also provide
180 * a custom function to check if the keys match (in case of collision).
181 * This function must resolve the Symbol to a value of T, and test it against `src`.
182 *
183 * When we insert a new element, we also need to provide a custom hasher function.
184 * This is because an insertion could cause the table to resize, thus causing all
185 * keys to be rehashed. If we don't provide a custom hasher function, a resize
186 * would reallocate the keys according to the Symbol, not the `T` value they
187 * resolve to, making it imposible to retrive those symbols from a `T` reference
188 * in the future.
189 *
190 * For this trick to work, we need to make sure that we _always_ access the table
191 * with a custom function, that resolves the Symbols before hashing/comparing.
192 */
193
194 let Self {
195 backend,
196 set,
197 hasher,
198 } = self;
199
200 let hash = hasher.hash_one(src);
201
202 let entry = set
203 .raw_entry_mut()
204 .from_hash(hash, |&sym| {
205 /* SAFETY: If the symbol is on the table it must also be on the backend. */
206 src == unsafe { backend.get_unchecked(sym) }.borrow()
207 });
208
209 let k = match entry {
210 RawEntryMut::Occupied(occupied) => occupied.into_key(),
211 RawEntryMut::Vacant(vacant) => {
212 let sym = backend.intern(src);
213 vacant
214 .insert_with_hasher(hash, sym, (), |sym| {
215 /* SAFETY: We've interned the symbol on the call to `Backed::intern` above */
216 let src = unsafe { backend.get_unchecked(*sym) };
217 hasher.hash_one(src)
218 })
219 .0
220 }
221 };
222
223 *k
224 }
225
226 /// Resolves the [symbol](Backend::Symbol) into a reference of T
227 ///
228 /// # Example
229 /// ```
230 /// use interns::Interner;
231 ///
232 /// let mut interner = Interner::<str>::new();
233 /// let name = interner.get_or_intern("Abcd");
234 /// let resolved = interner.resolve(name);
235 /// assert_eq!(resolved, Some("Abcd"));
236 /// ```
237 pub fn resolve(&self, sym: B::Symbol) -> Option<&T> {
238 self.backend.get(sym)
239 }
240}
241
242impl<T,B> Default for Interner<T,B>
243where
244 T: Hash + Eq + PartialEq + ?Sized,
245 B: Backend<T> + Default
246{
247 fn default() -> Self {
248 Self::new()
249 }
250}
251
252#[cfg(test)]
253mod test;