Skip to main content

logicaffeine_base/
intern.rs

1//! String interning for O(1) equality comparison.
2//!
3//! Symbols are lightweight integer handles that point to interned strings.
4//! By storing each unique string exactly once and comparing integer handles,
5//! we achieve O(1) equality checks regardless of string length.
6//!
7//! ## Example
8//!
9//! ```
10//! use logicaffeine_base::{Interner, Symbol};
11//!
12//! let mut interner = Interner::new();
13//!
14//! let s1 = interner.intern("hello");
15//! let s2 = interner.intern("hello");  // Same string
16//! let s3 = interner.intern("world");  // Different string
17//!
18//! // Same strings produce same symbols (O(1) comparison)
19//! assert_eq!(s1, s2);
20//! assert_ne!(s1, s3);
21//!
22//! // Resolve back to strings when needed
23//! assert_eq!(interner.resolve(s1), "hello");
24//! ```
25//!
26//! ## Use Cases
27//!
28//! - **Variable names**: Compared frequently during scope lookup
29//! - **Keywords**: Compared during lexing and parsing
30//! - **Type names**: Compared during type checking
31
32use std::collections::HashMap;
33
34/// A lightweight handle to an interned string.
35///
36/// Symbols are `Copy` and compare in O(1) time regardless of string length.
37/// Use [`Interner::resolve`] to retrieve the original string.
38#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
39pub struct Symbol(u32);
40
41impl Symbol {
42    /// The empty string symbol, always at index 0.
43    pub const EMPTY: Symbol = Symbol(0);
44
45    /// Returns the internal index of this symbol.
46    ///
47    /// Useful for dense storage (e.g., indexing into a `Vec` instead of a `HashMap`).
48    pub fn index(self) -> usize {
49        self.0 as usize
50    }
51}
52
53impl Default for Symbol {
54    fn default() -> Self {
55        Self::EMPTY
56    }
57}
58
59/// A string interner providing O(1) equality comparison via [`Symbol`] handles.
60///
61/// Each unique string is stored exactly once. Interning the same string twice
62/// returns the same symbol, enabling fast equality checks by comparing integers.
63#[derive(Clone)]
64pub struct Interner {
65    map: HashMap<String, Symbol>,
66    vec: Vec<String>,
67}
68
69impl Interner {
70    /// Creates an interner with only the empty string pre-interned.
71    pub fn new() -> Self {
72        let mut interner = Interner {
73            map: HashMap::new(),
74            vec: Vec::new(),
75        };
76        interner.vec.push(String::new());
77        interner
78    }
79
80    /// Interns a string, returning its symbol.
81    ///
82    /// Returns the existing symbol if the string was already interned.
83    pub fn intern(&mut self, s: &str) -> Symbol {
84        if let Some(&sym) = self.map.get(s) {
85            return sym;
86        }
87        let sym = Symbol(self.vec.len() as u32);
88        self.vec.push(s.to_string());
89        self.map.insert(s.to_string(), sym);
90        sym
91    }
92
93    /// Returns the string for the given symbol.
94    ///
95    /// # Panics
96    ///
97    /// Panics if `sym` was not created by this interner.
98    pub fn resolve(&self, sym: Symbol) -> &str {
99        &self.vec[sym.0 as usize]
100    }
101
102    /// Looks up an existing interned string without creating a new entry.
103    ///
104    /// Returns `None` if the string has not been interned.
105    pub fn lookup(&self, s: &str) -> Option<Symbol> {
106        self.map.get(s).copied()
107    }
108
109    /// Returns the number of interned strings, including the empty string.
110    pub fn len(&self) -> usize {
111        self.vec.len()
112    }
113
114    /// Returns `true` if no strings have been interned (only the empty string is present).
115    pub fn is_empty(&self) -> bool {
116        self.vec.len() <= 1
117    }
118}
119
120impl Default for Interner {
121    fn default() -> Self {
122        Self::new()
123    }
124}
125
126/// Convenience trait for comparing a [`Symbol`] to a string literal.
127///
128/// Avoids the need to call `interner.resolve(sym) == "..."` repeatedly.
129pub trait SymbolEq {
130    /// Returns `true` if this symbol resolves to the given string.
131    fn is(&self, interner: &Interner, s: &str) -> bool;
132}
133
134impl SymbolEq for Symbol {
135    #[inline]
136    fn is(&self, interner: &Interner, s: &str) -> bool {
137        interner.resolve(*self) == s
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn intern_returns_same_symbol_for_same_string() {
147        let mut interner = Interner::new();
148        let s1 = interner.intern("hello");
149        let s2 = interner.intern("hello");
150        assert_eq!(s1, s2);
151    }
152
153    #[test]
154    fn intern_returns_different_symbols_for_different_strings() {
155        let mut interner = Interner::new();
156        let s1 = interner.intern("hello");
157        let s2 = interner.intern("world");
158        assert_ne!(s1, s2);
159    }
160
161    #[test]
162    fn resolve_returns_original_string() {
163        let mut interner = Interner::new();
164        let sym = interner.intern("test");
165        assert_eq!(interner.resolve(sym), "test");
166    }
167
168    #[test]
169    fn empty_symbol_resolves_to_empty_string() {
170        let interner = Interner::new();
171        assert_eq!(interner.resolve(Symbol::EMPTY), "");
172    }
173
174    #[test]
175    fn symbols_are_copy() {
176        let mut interner = Interner::new();
177        let s1 = interner.intern("copy_test");
178        let s2 = s1;
179        assert_eq!(s1, s2);
180        assert_eq!(interner.resolve(s1), interner.resolve(s2));
181    }
182
183    #[test]
184    fn symbol_equality_is_fast() {
185        let mut interner = Interner::new();
186        let s1 = interner.intern("a_very_long_string_that_would_be_slow_to_compare");
187        let s2 = interner.intern("a_very_long_string_that_would_be_slow_to_compare");
188        assert_eq!(s1, s2);
189    }
190
191    #[test]
192    fn len_tracks_interned_count() {
193        let mut interner = Interner::new();
194        assert_eq!(interner.len(), 1);
195        interner.intern("first");
196        assert_eq!(interner.len(), 2);
197        interner.intern("second");
198        assert_eq!(interner.len(), 3);
199        interner.intern("first");
200        assert_eq!(interner.len(), 3);
201    }
202
203    #[test]
204    fn is_empty_after_new() {
205        let interner = Interner::new();
206        assert!(interner.is_empty());
207    }
208
209    #[test]
210    fn not_empty_after_intern() {
211        let mut interner = Interner::new();
212        interner.intern("something");
213        assert!(!interner.is_empty());
214    }
215
216    #[test]
217    fn symbol_index_matches_position() {
218        let mut interner = Interner::new();
219        let s1 = interner.intern("first");
220        let s2 = interner.intern("second");
221        assert_eq!(s1.index(), 1);
222        assert_eq!(s2.index(), 2);
223    }
224
225    #[test]
226    fn symbol_is_matches_interned_string() {
227        let mut interner = Interner::new();
228        let sym = interner.intern("test");
229        assert!(sym.is(&interner, "test"));
230    }
231
232    #[test]
233    fn symbol_is_rejects_different_string() {
234        let mut interner = Interner::new();
235        let sym = interner.intern("hello");
236        assert!(!sym.is(&interner, "world"));
237    }
238
239    #[test]
240    fn symbol_is_case_sensitive() {
241        let mut interner = Interner::new();
242        let sym = interner.intern("Test");
243        assert!(!sym.is(&interner, "test"));
244        assert!(sym.is(&interner, "Test"));
245    }
246
247    #[test]
248    fn symbol_empty_is_empty_string() {
249        let interner = Interner::new();
250        assert!(Symbol::EMPTY.is(&interner, ""));
251    }
252}