interns/
lib.rs

1/*  Copyright (C) 2025 Saúl Valdelvira
2 *
3 *  This program is free software: you can redistribute it and/or modify
4 *  it under the terms of the GNU General Public License as published by
5 *  the Free Software Foundation, version 3.
6 *
7 *  This program is distributed in the hope that it will be useful,
8 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *  GNU General Public License for more details.
11 *
12 *  You should have received a copy of the GNU General Public License
13 *  along with this program.  If not, see <https://www.gnu.org/licenses/>. */
14
15//! An object interner library
16//!
17//! The main element of this crate is the [`Interner`] struct.
18//!
19//! It allows to build a "storage" of any kind of object that
20//! avoids repetition and memory waste.
21//!
22//! # Example (String interner)
23//! ```
24//! use interns::*;
25//!
26//! let mut interner = Interner::<str>::default();
27//!
28//! let a = interner.get_or_intern("hello");
29//! let b = interner.get_or_intern("world");
30//! let c = interner.get_or_intern("hello");
31//!
32//! let a_resolv = interner.resolve(a);
33//! let b_resolv = interner.resolve(b);
34//! let c_resolv = interner.resolve(c);
35//!
36//! assert_eq!(a_resolv, Some("hello"));
37//! assert_eq!(b_resolv, Some("world"));
38//! assert_eq!(c_resolv, Some("hello"));
39//!
40//! assert_eq!(a, c);
41//! assert_ne!(a, b);
42//! assert_ne!(b, c);
43//! ```
44
45use hashbrown::hash_map::RawEntryMut;
46use hashbrown::HashMap;
47use core::borrow::Borrow;
48use std::hash::{BuildHasher, Hash, RandomState};
49
50pub mod backend;
51pub use backend::{Backend, DefaultBackendBuilder, StringBackend};
52
53use crate::backend::Internable;
54
55pub type Symbol<T, B = <T as DefaultBackendBuilder>::Backend> = <B as Backend<T>>::Symbol;
56
57pub type StringInterner = Interner<str,StringBackend>;
58
59/// Interner
60///
61/// This struct is responsible for tracking objects and
62/// interning them.
63///
64/// # Example
65/// ```
66/// use interns::*;
67///
68/// let mut interner = Interner::<str>::default();
69///
70/// let a = interner.get_or_intern("hello");
71/// let b = interner.get_or_intern("world");
72/// let c = interner.get_or_intern("hello");
73///
74/// let a_resolv = interner.resolve(a);
75/// let b_resolv = interner.resolve(b);
76/// let c_resolv = interner.resolve(c);
77///
78/// assert_eq!(a_resolv, Some("hello"));
79/// assert_eq!(b_resolv, Some("world"));
80/// assert_eq!(c_resolv, Some("hello"));
81///
82/// assert_eq!(a, c);
83/// assert_ne!(a, b);
84/// assert_ne!(b, c);
85/// ```
86pub struct Interner<
87    T,
88    B = <T as DefaultBackendBuilder>::Backend,
89    H = RandomState
90>
91where
92    T: Hash + Eq + PartialEq + ?Sized,
93    H: BuildHasher,
94    B: Backend<T>,
95{
96    backend: B,
97    set: HashMap<B::Symbol, (), ()>,
98    hasher: H,
99}
100
101impl<T, B, H> Interner<T, B, H>
102where
103    T: Hash + Eq + PartialEq + ?Sized,
104    H: BuildHasher,
105    B: Backend<T>,
106{
107    /// Create a new Interner with a default [backend](Backend)
108    /// and [hasher](BuildHasher)
109    pub fn new() -> Self
110    where
111        B: Default,
112        H: Default,
113    {
114        Self {
115            backend: B::default(),
116            set: HashMap::default(),
117            hasher: H::default(),
118        }
119    }
120
121    /// Create a new Interner with a default [backend](Backend) and
122    /// the given [hasher](BuildHasher)
123    pub fn with_hasher(hasher: H) -> Self
124    where
125        B: Default,
126    {
127        Self {
128            backend: B::default(),
129            set: HashMap::default(),
130            hasher,
131        }
132    }
133
134    /// Create a new Interner with a default [hasher](BuildHasher) and
135    /// the given [backend](Backend)
136    pub fn with_backend(backend: B) -> Self
137    where
138        H: Default,
139    {
140        Self {
141            backend,
142            set: HashMap::default(),
143            hasher: H::default(),
144        }
145    }
146
147    /// Create a new Interner with the given [backend](Backend)
148    /// and [hasher](BuildHasher)
149    pub const fn with_backend_and_hasher(backend: B, hasher: H) -> Self {
150        Self {
151            backend,
152            hasher,
153            set: HashMap::with_hasher(()),
154        }
155    }
156
157    /// Gets the [Symbol](Backend::Symbol) for `src`, interning it if it doesn't exist.
158    ///
159    /// # Example
160    /// ```
161    /// use interns::Interner;
162    ///
163    /// let mut interner = Interner::<str>::new();
164    /// let name = interner.get_or_intern("Abcd");
165    /// let name_again = interner.get_or_intern("Abcd");
166    /// assert_eq!(name, name_again);
167    /// ```
168    pub fn get_or_intern<Ref>(&mut self, src: &Ref) -> B::Symbol
169    where
170        Ref: Internable<T, B> + ?Sized + Hash + Eq,
171        T: Borrow<Ref>,
172    {
173        /* We are doing shenanigans here.
174         *
175         * We are storing B::Symbol as the key, but we don't hash the
176         * Symbol itself. `src` is a reference to T, so we have no way
177         * of getting a Symbol from `src`.
178         *
179         * When we look for a symbol, we pass the hash or `src`, and also provide
180         * a custom function to check if the keys match (in case of collision).
181         * This function must resolve the Symbol to a value of T, and test it against `src`.
182         *
183         * When we insert a new element, we also need to provide a custom hasher function.
184         * This is because an insertion could cause the table to resize, thus causing all
185         * keys to be rehashed. If we don't provide a custom hasher function, a resize
186         * would reallocate the keys according to the Symbol, not the `T` value they
187         * resolve to, making it imposible to retrive those symbols from a `T` reference
188         * in the future.
189         *
190         * For this trick to work, we need to make sure that we _always_ access the table
191         * with a custom function, that resolves the Symbols before hashing/comparing.
192         */
193
194        let Self {
195            backend,
196            set,
197            hasher,
198        } = self;
199
200        let hash = hasher.hash_one(src);
201
202        let entry = set
203            .raw_entry_mut()
204            .from_hash(hash, |&sym| {
205                /* SAFETY: If the symbol is on the table it must also be on the backend. */
206                src == unsafe { backend.get_unchecked(sym) }.borrow()
207            });
208
209        let k = match entry {
210            RawEntryMut::Occupied(occupied) => occupied.into_key(),
211            RawEntryMut::Vacant(vacant) => {
212                let sym = backend.intern(src);
213                vacant
214                    .insert_with_hasher(hash, sym, (), |sym| {
215                        /* SAFETY: We've interned the symbol on the call to `Backed::intern` above */
216                        let src = unsafe { backend.get_unchecked(*sym) };
217                        hasher.hash_one(src)
218                    })
219                    .0
220            }
221        };
222
223        *k
224    }
225
226    /// Resolves the [symbol](Backend::Symbol) into a reference of T
227    ///
228    /// # Example
229    /// ```
230    /// use interns::Interner;
231    ///
232    /// let mut interner = Interner::<str>::new();
233    /// let name = interner.get_or_intern("Abcd");
234    /// let resolved = interner.resolve(name);
235    /// assert_eq!(resolved, Some("Abcd"));
236    /// ```
237    pub fn resolve(&self, sym: B::Symbol) -> Option<&T> {
238        self.backend.get(sym)
239    }
240}
241
242impl<T,B> Default for Interner<T,B>
243where
244    T: Hash + Eq + PartialEq + ?Sized,
245    B: Backend<T> + Default
246{
247    fn default() -> Self {
248        Self::new()
249    }
250}
251
252#[cfg(test)]
253mod test;