Skip to main content

syster/base/
intern.rs

1//! String interning for identifiers and paths.
2
3use parking_lot::RwLock;
4use rustc_hash::FxHashMap;
5use smol_str::SmolStr;
6use std::fmt;
7
8/// An interned identifier name.
9///
10/// `Name` is a lightweight handle (just a u32) that represents an identifier
11/// string. The actual string is stored in an [`Interner`].
12///
13/// Benefits:
14/// - O(1) equality comparison
15/// - 4 bytes storage vs variable-length string
16/// - Cheap to copy and hash
17#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
18pub struct Name(u32);
19
20impl Name {
21    /// Create a Name from a raw index (used internally).
22    #[inline]
23    pub(crate) const fn from_raw(index: u32) -> Self {
24        Self(index)
25    }
26
27    /// Get the raw index.
28    #[inline]
29    pub const fn index(self) -> u32 {
30        self.0
31    }
32}
33
34impl fmt::Debug for Name {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        write!(f, "Name({})", self.0)
37    }
38}
39
40/// String interner for deduplicating identifier strings.
41///
42/// Thread-safe via internal locking.
43#[derive(Default)]
44pub struct Interner {
45    inner: RwLock<InternerInner>,
46}
47
48#[derive(Default)]
49struct InternerInner {
50    /// Map from string to index
51    map: FxHashMap<SmolStr, u32>,
52    /// Storage of all interned strings
53    strings: Vec<SmolStr>,
54}
55
56impl Interner {
57    /// Create a new empty interner.
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    /// Intern a string, returning a `Name` handle.
63    ///
64    /// If the string has been interned before, returns the existing `Name`.
65    pub fn intern(&self, s: &str) -> Name {
66        // Fast path: check if already interned (read lock)
67        {
68            let inner = self.inner.read();
69            if let Some(&index) = inner.map.get(s) {
70                return Name::from_raw(index);
71            }
72        }
73
74        // Slow path: need to insert (write lock)
75        let mut inner = self.inner.write();
76
77        // Double-check after acquiring write lock
78        if let Some(&index) = inner.map.get(s) {
79            return Name::from_raw(index);
80        }
81
82        let smol = SmolStr::new(s);
83        let index = inner.strings.len() as u32;
84        inner.strings.push(smol.clone());
85        inner.map.insert(smol, index);
86
87        Name::from_raw(index)
88    }
89
90    /// Look up the string for a `Name`.
91    ///
92    /// Returns `None` if the `Name` was created by a different interner.
93    pub fn lookup(&self, name: Name) -> Option<SmolStr> {
94        let inner = self.inner.read();
95        inner.strings.get(name.0 as usize).cloned()
96    }
97
98    /// Look up the string for a `Name`, returning a reference.
99    ///
100    /// # Panics
101    /// Panics if the `Name` was not created by this interner.
102    pub fn get(&self, name: Name) -> SmolStr {
103        self.lookup(name).expect("Name not found in interner")
104    }
105
106    /// Get the number of interned strings.
107    pub fn len(&self) -> usize {
108        self.inner.read().strings.len()
109    }
110
111    /// Check if the interner is empty.
112    pub fn is_empty(&self) -> bool {
113        self.len() == 0
114    }
115}
116
117impl fmt::Debug for Interner {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        let inner = self.inner.read();
120        f.debug_struct("Interner")
121            .field("count", &inner.strings.len())
122            .finish()
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    #[test]
131    fn test_intern_same_string() {
132        let interner = Interner::new();
133
134        let a = interner.intern("hello");
135        let b = interner.intern("hello");
136
137        assert_eq!(a, b);
138        assert_eq!(interner.len(), 1);
139    }
140
141    #[test]
142    fn test_intern_different_strings() {
143        let interner = Interner::new();
144
145        let a = interner.intern("hello");
146        let b = interner.intern("world");
147
148        assert_ne!(a, b);
149        assert_eq!(interner.len(), 2);
150    }
151
152    #[test]
153    fn test_lookup() {
154        let interner = Interner::new();
155
156        let name = interner.intern("test");
157        let s = interner.get(name);
158
159        assert_eq!(s.as_str(), "test");
160    }
161
162    #[test]
163    fn test_name_size() {
164        assert_eq!(std::mem::size_of::<Name>(), 4);
165    }
166}