Skip to main content

oxilean_kernel/string_intern/
interner.rs

1//! Global `StringInterner` singleton and public API (`intern` / `resolve`).
2
3use super::pool::InternPool;
4use std::sync::{Arc, Mutex, OnceLock};
5
6// ---------------------------------------------------------------------------
7// InternedStr — lightweight handle
8// ---------------------------------------------------------------------------
9
10/// A lightweight, `Copy` handle to an interned string.
11///
12/// The index refers to a slot in the global `InternPool`. Use [`resolve`] to
13/// obtain the `&'static str` for this handle.
14#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
15pub struct InternedStr(u32);
16
17impl InternedStr {
18    /// Create an `InternedStr` from a raw pool index.
19    ///
20    /// In normal usage callers should obtain handles via [`intern`] rather
21    /// than constructing them manually.
22    #[inline]
23    pub fn from_raw(idx: u32) -> Self {
24        Self(idx)
25    }
26
27    /// Return the raw pool index.
28    #[inline]
29    pub fn raw(self) -> u32 {
30        self.0
31    }
32}
33
34impl std::fmt::Display for InternedStr {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        // Attempt resolution; fall back to showing the raw index.
37        match try_resolve(*self) {
38            Some(s) => f.write_str(s),
39            None => write!(f, "<InternedStr#{}>", self.0),
40        }
41    }
42}
43
44// ---------------------------------------------------------------------------
45// StringInterner — thin wrapper owning the Arc<Mutex<InternPool>>
46// ---------------------------------------------------------------------------
47
48/// A thread-safe string interning pool.
49///
50/// Wraps an `Arc<Mutex<InternPool>>` so multiple owners can share the same
51/// underlying storage. The global singleton is accessible via [`intern`] and
52/// [`resolve`]; `StringInterner` can also be used as a standalone local
53/// interner in tests or as an injected dependency.
54pub struct StringInterner {
55    pool: Arc<Mutex<InternPool>>,
56}
57
58impl StringInterner {
59    /// Creates a new `StringInterner` backed by a fresh `InternPool`.
60    pub fn new() -> Self {
61        Self {
62            pool: Arc::new(Mutex::new(InternPool::new())),
63        }
64    }
65
66    /// Creates a `StringInterner` sharing the backing pool with `other`.
67    pub fn shared_with(other: &StringInterner) -> Self {
68        Self {
69            pool: Arc::clone(&other.pool),
70        }
71    }
72
73    /// Interns `s` and returns an `InternedStr` handle.
74    ///
75    /// Acquiring the lock returns an error only if the mutex is poisoned (a
76    /// panic occurred while the lock was held). In that case this method
77    /// panics with a descriptive message rather than silently continuing.
78    pub fn intern(&self, s: &str) -> InternedStr {
79        let mut guard = self
80            .pool
81            .lock()
82            .expect("StringInterner pool mutex was poisoned");
83        InternedStr::from_raw(guard.intern_str(s))
84    }
85
86    /// Resolves `handle` to its `&'static str` content.
87    ///
88    /// Returns `None` if the handle was not produced by this interner.
89    pub fn resolve(&self, handle: InternedStr) -> Option<&'static str> {
90        let mut guard = self
91            .pool
92            .lock()
93            .expect("StringInterner pool mutex was poisoned");
94        guard.resolve_str(handle.raw())
95    }
96
97    /// Returns the number of distinct strings stored.
98    pub fn len(&self) -> usize {
99        let guard = self
100            .pool
101            .lock()
102            .expect("StringInterner pool mutex was poisoned");
103        guard.len()
104    }
105
106    /// Returns `true` when no strings have been interned.
107    pub fn is_empty(&self) -> bool {
108        self.len() == 0
109    }
110
111    /// Returns the total byte count of all interned strings.
112    pub fn total_bytes(&self) -> usize {
113        let guard = self
114            .pool
115            .lock()
116            .expect("StringInterner pool mutex was poisoned");
117        guard.total_bytes()
118    }
119
120    /// Clones the underlying `Arc` so the pool can be shared across threads.
121    pub fn arc_clone(&self) -> Arc<Mutex<InternPool>> {
122        Arc::clone(&self.pool)
123    }
124}
125
126impl Default for StringInterner {
127    fn default() -> Self {
128        Self::new()
129    }
130}
131
132// ---------------------------------------------------------------------------
133// Global singleton
134// ---------------------------------------------------------------------------
135
136/// Global `InternPool` guarded by a `Mutex`.
137///
138/// Initialised exactly once on the first call to [`intern`] or [`resolve`].
139static GLOBAL_POOL: OnceLock<Arc<Mutex<InternPool>>> = OnceLock::new();
140
141fn global_pool() -> &'static Arc<Mutex<InternPool>> {
142    GLOBAL_POOL.get_or_init(|| Arc::new(Mutex::new(InternPool::new())))
143}
144
145/// Interns `s` into the global pool and returns an [`InternedStr`] handle.
146///
147/// Identical strings always return the same handle.
148///
149/// # Panics
150///
151/// Panics if the global pool mutex is poisoned (another thread panicked while
152/// holding the lock, which should never happen under normal usage).
153pub fn intern(s: &str) -> InternedStr {
154    let mut guard = global_pool()
155        .lock()
156        .expect("global StringInterner mutex was poisoned");
157    InternedStr::from_raw(guard.intern_str(s))
158}
159
160/// Resolves `handle` to its `&'static str` content using the global pool.
161///
162/// The returned slice has `'static` lifetime because the string is leaked
163/// on first resolution and thereafter cached as a static pointer.
164///
165/// # Panics
166///
167/// Panics if the global pool mutex is poisoned or if `handle` was not
168/// produced by [`intern`] (i.e., the raw index is out of range).
169pub fn resolve(handle: InternedStr) -> &'static str {
170    let mut guard = global_pool()
171        .lock()
172        .expect("global StringInterner mutex was poisoned");
173    guard
174        .resolve_str(handle.raw())
175        .expect("InternedStr handle is out of range in the global pool")
176}
177
178/// Like [`resolve`] but returns `None` instead of panicking when the handle
179/// is out of range.
180pub fn try_resolve(handle: InternedStr) -> Option<&'static str> {
181    let mut guard = global_pool().lock().ok()?;
182    guard.resolve_str(handle.raw())
183}
184
185// ---------------------------------------------------------------------------
186// Tests
187// ---------------------------------------------------------------------------
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    // --- InternedStr ---
194
195    #[test]
196    fn test_interned_str_is_copy() {
197        let h = InternedStr::from_raw(0);
198        let h2 = h; // copy
199        assert_eq!(h, h2);
200    }
201
202    #[test]
203    fn test_interned_str_raw_round_trip() {
204        let h = InternedStr::from_raw(42);
205        assert_eq!(h.raw(), 42);
206    }
207
208    #[test]
209    fn test_interned_str_ordering() {
210        let h0 = InternedStr::from_raw(0);
211        let h1 = InternedStr::from_raw(1);
212        assert!(h0 < h1);
213    }
214
215    // --- StringInterner (local, isolated) ---
216
217    #[test]
218    fn test_local_interner_deduplicates() {
219        let si = StringInterner::new();
220        let h1 = si.intern("apple");
221        let h2 = si.intern("apple");
222        assert_eq!(h1, h2);
223    }
224
225    #[test]
226    fn test_local_interner_unique_for_distinct_strings() {
227        let si = StringInterner::new();
228        let h1 = si.intern("cat");
229        let h2 = si.intern("dog");
230        assert_ne!(h1, h2);
231    }
232
233    #[test]
234    fn test_local_interner_resolve() {
235        let si = StringInterner::new();
236        let h = si.intern("oxilean");
237        assert_eq!(si.resolve(h), Some("oxilean"));
238    }
239
240    #[test]
241    fn test_local_interner_len() {
242        let si = StringInterner::new();
243        si.intern("x");
244        si.intern("y");
245        si.intern("x"); // duplicate
246        assert_eq!(si.len(), 2);
247    }
248
249    #[test]
250    fn test_local_interner_total_bytes() {
251        let si = StringInterner::new();
252        si.intern("abc"); // 3
253        si.intern("de"); // 2
254        assert_eq!(si.total_bytes(), 5);
255    }
256
257    #[test]
258    fn test_local_interner_is_empty_initially() {
259        let si = StringInterner::new();
260        assert!(si.is_empty());
261    }
262
263    #[test]
264    fn test_local_interner_shared_with() {
265        let si1 = StringInterner::new();
266        let h1 = si1.intern("shared");
267        let si2 = StringInterner::shared_with(&si1);
268        let h2 = si2.intern("shared");
269        assert_eq!(h1, h2, "shared interners must return identical handles");
270    }
271
272    #[test]
273    fn test_local_interner_resolve_out_of_range() {
274        let si = StringInterner::new();
275        assert!(si.resolve(InternedStr::from_raw(999)).is_none());
276    }
277
278    #[test]
279    fn test_local_interner_empty_string() {
280        let si = StringInterner::new();
281        let h = si.intern("");
282        assert_eq!(si.resolve(h), Some(""));
283    }
284
285    #[test]
286    fn test_local_interner_unicode() {
287        let si = StringInterner::new();
288        let h = si.intern("αβγ");
289        assert_eq!(si.resolve(h), Some("αβγ"));
290    }
291
292    #[test]
293    fn test_local_interner_many_strings() {
294        let si = StringInterner::new();
295        let strings: Vec<String> = (0..100).map(|i| format!("str_{}", i)).collect();
296        let handles: Vec<InternedStr> = strings.iter().map(|s| si.intern(s)).collect();
297        // All unique
298        let unique: std::collections::HashSet<InternedStr> = handles.iter().copied().collect();
299        assert_eq!(unique.len(), 100);
300        // All resolve correctly
301        for (i, h) in handles.iter().enumerate() {
302            let resolved = si.resolve(*h).expect("should resolve");
303            assert_eq!(resolved, strings[i].as_str());
304        }
305    }
306
307    // --- Global API ---
308
309    #[test]
310    fn test_global_intern_deduplicates() {
311        let h1 = intern("__global_test_string_A__");
312        let h2 = intern("__global_test_string_A__");
313        assert_eq!(h1, h2);
314    }
315
316    #[test]
317    fn test_global_resolve_returns_correct_content() {
318        let h = intern("__global_test_string_B__");
319        assert_eq!(resolve(h), "__global_test_string_B__");
320    }
321
322    #[test]
323    fn test_try_resolve_missing_handle_returns_none() {
324        // A fresh local interner produces handles that are unknown to the
325        // global pool (unless the raw index happens to collide).
326        // We test try_resolve with a very large index instead.
327        let result = try_resolve(InternedStr::from_raw(u32::MAX));
328        assert!(result.is_none());
329    }
330
331    #[test]
332    fn test_global_intern_concurrent() {
333        use std::thread;
334        let handles: Vec<_> = (0..8)
335            .map(|_| thread::spawn(|| intern("concurrent_intern_test")))
336            .collect();
337        let results: Vec<InternedStr> = handles
338            .into_iter()
339            .map(|h| h.join().expect("thread should not panic"))
340            .collect();
341        // All threads must get the same handle.
342        let first = results[0];
343        assert!(results.iter().all(|&h| h == first));
344    }
345
346    #[test]
347    fn test_display_interned_str() {
348        let h = intern("display_test");
349        let s = format!("{}", h);
350        assert_eq!(s, "display_test");
351    }
352}