llmcc_core/
interner.rs

1use parking_lot::RwLock;
2use std::sync::Arc;
3
4use string_interner::StringInterner;
5use string_interner::backend::DefaultBackend;
6use string_interner::symbol::DefaultSymbol;
7
8/// Interned string symbol backed by a `StringInterner`.
9pub type InternedStr = DefaultSymbol;
10
11/// Inner implementation of the string interner.
12#[derive(Debug)]
13pub struct InternPoolInner {
14    interner: RwLock<StringInterner<DefaultBackend>>,
15}
16
17impl InternPoolInner {
18    /// Create a new interner.
19    pub fn new() -> Self {
20        Self {
21            interner: RwLock::new(StringInterner::new()),
22        }
23    }
24
25    /// Intern the provided string slice and return its symbol.
26    #[inline]
27    pub fn intern<S>(&self, value: S) -> InternedStr
28    where
29        S: AsRef<str>,
30    {
31        {
32            let s = value.as_ref();
33            // Fast path: check if already interned with read lock
34            if let Some(symbol) = self.interner.read().get(s) {
35                return symbol;
36            }
37            // Slow path: take write lock and intern
38            self.interner.write().get_or_intern(s)
39        }
40    }
41
42    /// Intern multiple strings and return a vector of their symbols.
43    pub fn intern_batch<S>(&self, values: impl IntoIterator<Item = S>) -> Vec<InternedStr>
44    where
45        S: AsRef<str>,
46    {
47        values.into_iter().map(|v| self.intern(v)).collect()
48    }
49
50    /// Resolve an interned symbol back into an owned string.
51    ///
52    /// Clones the underlying string from the interner to avoid lifetime issues.
53    pub fn resolve_owned(&self, symbol: InternedStr) -> Option<String> {
54        self.interner.read().resolve(symbol).map(|s| s.to_owned())
55    }
56
57    /// Resolve an interned symbol and apply a closure while the borrow is active.
58    pub fn with_resolved<R, F>(&self, symbol: InternedStr, f: F) -> Option<R>
59    where
60        F: FnOnce(&str) -> R,
61    {
62        self.interner.read().resolve(symbol).map(f)
63    }
64}
65
66impl Default for InternPoolInner {
67    fn default() -> Self {
68        Self::new()
69    }
70}
71
72/// Shared string interner used across the llmcc core.
73///
74/// Thread-safe wrapper around `InternPoolInner` using `Arc` for shared ownership.
75#[derive(Clone, Debug)]
76pub struct InternPool {
77    inner: Arc<InternPoolInner>,
78}
79
80impl Default for InternPool {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86impl InternPool {
87    /// Create a new shared interner pool.
88    pub fn new() -> Self {
89        Self {
90            inner: Arc::new(InternPoolInner::new()),
91        }
92    }
93
94    /// Intern the provided string slice and return its symbol.
95    pub fn intern<S>(&self, value: S) -> InternedStr
96    where
97        S: AsRef<str>,
98    {
99        self.inner.intern(value)
100    }
101
102    /// Intern multiple strings and return a vector of their symbols.
103    pub fn intern_batch<S>(&self, values: impl IntoIterator<Item = S>) -> Vec<InternedStr>
104    where
105        S: AsRef<str>,
106    {
107        self.inner.intern_batch(values)
108    }
109
110    /// Resolve an interned symbol back into an owned string.
111    ///
112    /// Clones the underlying string from the interner to avoid lifetime issues.
113    pub fn resolve_owned(&self, symbol: InternedStr) -> Option<String> {
114        self.inner.resolve_owned(symbol)
115    }
116
117    /// Resolve an interned symbol and apply a closure while the borrow is active.
118    pub fn with_resolved<R, F>(&self, symbol: InternedStr, f: F) -> Option<R>
119    where
120        F: FnOnce(&str) -> R,
121    {
122        self.inner.with_resolved(symbol, f)
123    }
124
125    /// Get the number of interned strings (for diagnostics).
126    pub fn len(&self) -> usize {
127        self.inner.interner.read().len()
128    }
129
130    /// Check if the pool is empty.
131    pub fn is_empty(&self) -> bool {
132        self.len() == 0
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use rayon::prelude::*;
140
141    #[test]
142    fn interning_returns_stable_symbol() {
143        let pool = InternPool::default();
144        let first = pool.intern("foo");
145        let second = pool.intern("foo");
146        assert_eq!(
147            first, second,
148            "Interned symbols should be stable for the same string"
149        );
150    }
151
152    #[test]
153    fn resolve_owned_recovers_string() {
154        let pool = InternPool::default();
155        let sym = pool.intern("bar");
156        let resolved = pool
157            .resolve_owned(sym)
158            .expect("symbol should resolve to a string");
159        assert_eq!(resolved, "bar");
160    }
161
162    #[test]
163    fn with_resolved_provides_borrowed_str() {
164        let pool = InternPool::default();
165        let sym = pool.intern("baz");
166        let length = pool
167            .with_resolved(sym, |s| s.len())
168            .expect("symbol should resolve to a closure result");
169        assert_eq!(length, 3);
170    }
171
172    #[test]
173    fn intern_batch_interns_multiple_strings() {
174        let pool = InternPool::default();
175        let strings = vec!["apple", "banana", "cherry"];
176        let symbols = pool.intern_batch(strings.clone());
177
178        assert_eq!(symbols.len(), 3);
179
180        // Verify each symbol resolves correctly
181        for (i, sym) in symbols.iter().enumerate() {
182            let resolved = pool.resolve_owned(*sym).expect("symbol should resolve");
183            assert_eq!(resolved, strings[i]);
184        }
185    }
186
187    #[test]
188    fn intern_batch_with_duplicates() {
189        let pool = InternPool::default();
190        let strings = vec!["x", "y", "x", "z", "y"];
191        let symbols = pool.intern_batch(strings);
192
193        // Duplicates should map to the same symbol
194        assert_eq!(
195            symbols[0], symbols[2],
196            "First and third 'x' should be the same symbol"
197        );
198        assert_eq!(
199            symbols[1], symbols[4],
200            "Second and fifth 'y' should be the same symbol"
201        );
202        assert_ne!(
203            symbols[0], symbols[1],
204            "Different strings should have different symbols"
205        );
206    }
207
208    #[test]
209    fn send_sync_bounds_work() {
210        // This test ensures InternPool is Send + Sync
211        fn assert_send_sync<T: Send + Sync>() {}
212        assert_send_sync::<InternPool>();
213    }
214
215    #[test]
216    fn pool_length_tracking() {
217        let pool = InternPool::default();
218        assert!(pool.is_empty());
219        assert_eq!(pool.len(), 0);
220
221        pool.intern("first");
222        assert_eq!(pool.len(), 1);
223
224        pool.intern("second");
225        assert_eq!(pool.len(), 2);
226
227        // Interning the same string shouldn't increase count
228        pool.intern("first");
229        assert_eq!(pool.len(), 2);
230    }
231
232    #[test]
233    fn pool_cloning() {
234        let pool1 = InternPool::default();
235        let pool2 = pool1.clone();
236
237        let sym1 = pool1.intern("shared");
238        let sym2 = pool2.intern("shared");
239
240        // Both should refer to the same interned string
241        assert_eq!(sym1, sym2);
242        assert_eq!(pool1.len(), 1);
243        assert_eq!(pool2.len(), 1);
244    }
245
246    #[test]
247    fn parallel_interning_many_strings() {
248        let pool = InternPool::default();
249
250        // Intern 1000 strings in parallel
251        let symbols: Vec<_> = (0..1000)
252            .into_par_iter()
253            .map(|i| pool.intern(format!("string_{i}").as_str()))
254            .collect();
255
256        // Verify all were interned
257        assert_eq!(symbols.len(), 1000);
258        assert_eq!(pool.len(), 1000);
259
260        // Verify each resolves correctly
261        for (i, sym) in symbols.iter().enumerate() {
262            let resolved = pool.resolve_owned(*sym).expect("should resolve");
263            assert_eq!(resolved, format!("string_{i}"));
264        }
265    }
266
267    #[test]
268    fn parallel_interning_with_duplicates() {
269        let pool = InternPool::default();
270
271        // Intern strings with many duplicates in parallel
272        let base_strings = ["alpha", "beta", "gamma", "delta", "epsilon"];
273        let symbols: Vec<_> = (0..500)
274            .into_par_iter()
275            .map(|i| {
276                let s = &base_strings[i % base_strings.len()];
277                pool.intern(*s)
278            })
279            .collect();
280
281        // Should only have 5 unique strings
282        assert_eq!(pool.len(), 5);
283        assert_eq!(symbols.len(), 500);
284
285        // Verify all symbols resolve correctly
286        for sym in symbols.iter() {
287            let resolved = pool.resolve_owned(*sym);
288            assert!(resolved.is_some());
289        }
290    }
291
292    #[test]
293    fn parallel_batch_interning() {
294        let pool = InternPool::default();
295
296        let batches: Vec<Vec<&str>> = (0..10)
297            .map(|_batch_idx| {
298                (0..100)
299                    .map(|i| if i % 2 == 0 { "even" } else { "odd" })
300                    .collect()
301            })
302            .collect();
303
304        // Intern batches in parallel
305        let all_symbols: Vec<_> = batches
306            .into_par_iter()
307            .flat_map(|batch| pool.intern_batch(batch))
308            .collect();
309
310        // Should have 1000 symbols total but only 2 unique strings
311        assert_eq!(all_symbols.len(), 1000);
312        assert_eq!(pool.len(), 2);
313    }
314
315    #[test]
316    fn parallel_mixed_operations() {
317        let pool = InternPool::default();
318
319        // Perform mixed operations in parallel
320        (0..100).into_par_iter().for_each(|i| {
321            let s = format!("item_{}", i % 10);
322            let sym = pool.intern(s.as_str());
323            let resolved = pool.resolve_owned(sym);
324            assert!(resolved.is_some());
325
326            // Use with_resolved as well
327            let len = pool.with_resolved(sym, |s| s.len());
328            assert!(len.is_some());
329        });
330
331        // Should have exactly 10 unique strings
332        assert_eq!(pool.len(), 10);
333    }
334
335    #[test]
336    fn parallel_interning_high_contention() {
337        let pool = InternPool::default();
338
339        // Very high contention: all threads intern the same string repeatedly
340        (0..1000).into_par_iter().for_each(|_| {
341            let _ = pool.intern("hotspot");
342        });
343
344        // Should still only have 1 string
345        assert_eq!(pool.len(), 1);
346    }
347
348    #[test]
349    fn parallel_clone_and_intern() {
350        let pool_original = InternPool::default();
351
352        // Create multiple clones and intern in parallel
353        (0..100).into_par_iter().for_each(|i| {
354            let pool = pool_original.clone();
355            let s = format!("cloned_{}", i % 5);
356            let sym = pool.intern(s.as_str());
357            let resolved = pool.resolve_owned(sym);
358            assert!(resolved.is_some());
359        });
360
361        // All clones share the same inner pool
362        assert_eq!(pool_original.len(), 5);
363    }
364
365    #[test]
366    fn intern_pool_inner_direct_usage() {
367        let inner = InternPoolInner::new();
368
369        let sym1 = inner.intern("direct");
370        let sym2 = inner.intern("direct");
371        assert_eq!(sym1, sym2);
372
373        let resolved = inner.resolve_owned(sym1).expect("should resolve");
374        assert_eq!(resolved, "direct");
375    }
376}