Skip to main content

bhc_intern/
lib.rs

1//! String interning for efficient symbol handling.
2//!
3//! This crate provides interned strings (symbols) that enable O(1) equality
4//! comparisons and reduced memory usage for repeated strings.
5
6#![warn(missing_docs)]
7
8use parking_lot::RwLock;
9use rustc_hash::FxHashMap;
10use serde::{Deserialize, Serialize};
11use std::fmt;
12use std::sync::LazyLock;
13
14/// The global interner for symbols.
15static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::new);
16
17/// An interned string symbol.
18///
19/// Symbols are cheap to copy and compare (O(1) equality).
20/// The actual string data is stored in a global interner.
21#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
22#[repr(transparent)]
23pub struct Symbol(u32);
24
25impl Symbol {
26    /// Intern a string and return its symbol.
27    #[must_use]
28    pub fn intern(s: &str) -> Self {
29        INTERNER.intern(s)
30    }
31
32    /// Get the string value of this symbol.
33    #[must_use]
34    pub fn as_str(self) -> &'static str {
35        INTERNER.get(self)
36    }
37
38    /// Get the raw index of this symbol.
39    #[must_use]
40    pub const fn as_u32(self) -> u32 {
41        self.0
42    }
43
44    /// Create a symbol from a raw index.
45    ///
46    /// # Safety
47    ///
48    /// The index must have been obtained from a valid symbol.
49    #[must_use]
50    pub const unsafe fn from_raw(idx: u32) -> Self {
51        Self(idx)
52    }
53
54    /// Check if this symbol is empty.
55    #[must_use]
56    pub fn is_empty(self) -> bool {
57        self.as_str().is_empty()
58    }
59
60    /// Get the length of the symbol's string.
61    #[must_use]
62    pub fn len(self) -> usize {
63        self.as_str().len()
64    }
65}
66
67impl fmt::Debug for Symbol {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        write!(f, "Symbol({:?})", self.as_str())
70    }
71}
72
73impl fmt::Display for Symbol {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        write!(f, "{}", self.as_str())
76    }
77}
78
79impl From<&str> for Symbol {
80    fn from(s: &str) -> Self {
81        Self::intern(s)
82    }
83}
84
85impl From<String> for Symbol {
86    fn from(s: String) -> Self {
87        Self::intern(&s)
88    }
89}
90
91impl AsRef<str> for Symbol {
92    fn as_ref(&self) -> &str {
93        self.as_str()
94    }
95}
96
97impl PartialEq<str> for Symbol {
98    fn eq(&self, other: &str) -> bool {
99        self.as_str() == other
100    }
101}
102
103impl PartialEq<&str> for Symbol {
104    fn eq(&self, other: &&str) -> bool {
105        self.as_str() == *other
106    }
107}
108
109impl PartialOrd for Symbol {
110    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
111        Some(self.cmp(other))
112    }
113}
114
115impl Ord for Symbol {
116    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
117        self.as_str().cmp(other.as_str())
118    }
119}
120
121/// The string interner that stores all interned strings.
122struct Interner {
123    map: RwLock<FxHashMap<&'static str, Symbol>>,
124    strings: RwLock<Vec<&'static str>>,
125}
126
127impl Interner {
128    fn new() -> Self {
129        Self {
130            map: RwLock::new(FxHashMap::default()),
131            strings: RwLock::new(Vec::new()),
132        }
133    }
134
135    fn intern(&self, s: &str) -> Symbol {
136        // Fast path: check if already interned
137        {
138            let map = self.map.read();
139            if let Some(&sym) = map.get(s) {
140                return sym;
141            }
142        }
143
144        // Slow path: intern the string
145        let mut map = self.map.write();
146        let mut strings = self.strings.write();
147
148        // Double-check after acquiring write lock
149        if let Some(&sym) = map.get(s) {
150            return sym;
151        }
152
153        // Leak the string to get a static lifetime
154        let interned: &'static str = Box::leak(s.to_string().into_boxed_str());
155        let sym = Symbol(strings.len() as u32);
156
157        strings.push(interned);
158        map.insert(interned, sym);
159
160        sym
161    }
162
163    fn get(&self, sym: Symbol) -> &'static str {
164        let strings = self.strings.read();
165        strings[sym.0 as usize]
166    }
167}
168
169/// Pre-interned symbols for common identifiers.
170pub mod kw {
171    use super::Symbol;
172    use std::sync::LazyLock;
173
174    macro_rules! define_keywords {
175        ($($name:ident => $string:literal),* $(,)?) => {
176            $(
177                #[doc = concat!("The `", $string, "` keyword.")]
178                pub static $name: LazyLock<Symbol> = LazyLock::new(|| Symbol::intern($string));
179            )*
180
181            /// Intern all keywords. Call this at startup for better performance.
182            pub fn intern_all() {
183                $(
184                    let _ = *$name;
185                )*
186            }
187        };
188    }
189
190    define_keywords! {
191        // Haskell keywords
192        CASE => "case",
193        CLASS => "class",
194        DATA => "data",
195        DEFAULT => "default",
196        DERIVING => "deriving",
197        DO => "do",
198        ELSE => "else",
199        FORALL => "forall",
200        FOREIGN => "foreign",
201        IF => "if",
202        IMPORT => "import",
203        IN => "in",
204        INFIX => "infix",
205        INFIXL => "infixl",
206        INFIXR => "infixr",
207        INSTANCE => "instance",
208        LET => "let",
209        MODULE => "module",
210        NEWTYPE => "newtype",
211        OF => "of",
212        QUALIFIED => "qualified",
213        THEN => "then",
214        TYPE => "type",
215        WHERE => "where",
216
217        // BHC/H26 extensions
218        LAZY => "lazy",
219        STRICT => "strict",
220        PROFILE => "profile",
221        EDITION => "edition",
222
223        // Common type names
224        INT => "Int",
225        FLOAT => "Float",
226        DOUBLE => "Double",
227        BOOL => "Bool",
228        CHAR => "Char",
229        STRING => "String",
230        UNIT => "()",
231
232        // Common constructors
233        TRUE => "True",
234        FALSE => "False",
235        JUST => "Just",
236        NOTHING => "Nothing",
237        LEFT => "Left",
238        RIGHT => "Right",
239
240        // Underscore
241        UNDERSCORE => "_",
242    }
243}
244
245/// An identifier with a name symbol.
246#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
247pub struct Ident {
248    /// The symbol for this identifier's name.
249    pub name: Symbol,
250}
251
252impl Ident {
253    /// Create a new identifier.
254    #[must_use]
255    pub fn new(name: Symbol) -> Self {
256        Self { name }
257    }
258
259    /// Create an identifier from a string.
260    #[must_use]
261    pub fn from_str(s: &str) -> Self {
262        Self {
263            name: Symbol::intern(s),
264        }
265    }
266
267    /// Get the string value of this identifier.
268    #[must_use]
269    pub fn as_str(self) -> &'static str {
270        self.name.as_str()
271    }
272}
273
274impl fmt::Debug for Ident {
275    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
276        write!(f, "Ident({:?})", self.name.as_str())
277    }
278}
279
280impl fmt::Display for Ident {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        write!(f, "{}", self.name.as_str())
283    }
284}
285
286impl From<&str> for Ident {
287    fn from(s: &str) -> Self {
288        Self::from_str(s)
289    }
290}
291
292impl From<Symbol> for Ident {
293    fn from(name: Symbol) -> Self {
294        Self::new(name)
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_symbol_interning() {
304        let s1 = Symbol::intern("hello");
305        let s2 = Symbol::intern("hello");
306        let s3 = Symbol::intern("world");
307
308        assert_eq!(s1, s2);
309        assert_ne!(s1, s3);
310        assert_eq!(s1.as_str(), "hello");
311    }
312
313    #[test]
314    fn test_symbol_comparison() {
315        let s1 = Symbol::intern("apple");
316        let s2 = Symbol::intern("banana");
317
318        assert!(s1 < s2);
319        assert_eq!(s1, "apple");
320    }
321
322    #[test]
323    fn test_ident() {
324        let id = Ident::from_str("foo");
325        assert_eq!(id.as_str(), "foo");
326    }
327}