Skip to main content

mir_codebase/
interner.rs

1use std::sync::{Arc, RwLock};
2
3use dashmap::DashMap;
4
5/// Thread-safe string interner — maps `Arc<str>` ↔ `u32` IDs.
6///
7/// Interning replaces repeated `Arc<str>` pointers (16 bytes each) with 4-byte
8/// `u32` IDs. The same string always maps to the same ID for the lifetime of
9/// this interner.
10///
11/// # Concurrency
12///
13/// - Fast path (already interned): lock-free read from the `DashMap`.
14/// - Slow path (new string): acquires a `RwLock` write guard, re-checks under
15///   the lock to handle races, then assigns an ID atomically.
16/// - `get(id)` acquires a read guard; multiple concurrent readers are allowed.
17#[derive(Debug, Default)]
18pub struct Interner {
19    /// Fast read path: string → ID.  Written only while holding `to_str` write lock.
20    to_id: DashMap<Arc<str>, u32>,
21    /// ID → string table.  The write lock also serialises ID assignment.
22    to_str: RwLock<Vec<Arc<str>>>,
23}
24
25impl Interner {
26    /// Intern `s` and return its ID. Idempotent: the same string always returns
27    /// the same ID.
28    pub fn intern(&self, s: Arc<str>) -> u32 {
29        // Fast path — already interned, no allocation needed.
30        if let Some(id) = self.to_id.get(s.as_ref()) {
31            return *id;
32        }
33        // Slow path — serialise ID assignment under the write lock.
34        let mut vec = self.to_str.write().expect("interner lock poisoned");
35        // Re-check: another thread may have raced between the fast-path read
36        // and our acquisition of the write lock.
37        if let Some(id) = self.to_id.get(s.as_ref()) {
38            return *id;
39        }
40        let id = vec.len() as u32;
41        vec.push(s.clone());
42        // Insert into DashMap while still holding the write lock so that any
43        // thread doing `get(id)` after seeing this entry in `to_id` is
44        // guaranteed to find the string already in `vec`.
45        self.to_id.insert(s, id);
46        id
47    }
48
49    /// Intern from a `&str` without allocating an `Arc` when the string is
50    /// already interned.
51    pub fn intern_str(&self, s: &str) -> u32 {
52        // `Arc<str>: Borrow<str>`, so DashMap lets us look up with `&str` directly.
53        if let Some(id) = self.to_id.get(s) {
54            return *id;
55        }
56        self.intern(Arc::from(s))
57    }
58
59    /// Resolve an ID back to its string. Panics if `id` is out of range.
60    pub fn get(&self, id: u32) -> Arc<str> {
61        self.to_str.read().expect("interner lock poisoned")[id as usize].clone()
62    }
63
64    /// Return the ID for `s` if it has already been interned, or `None`.
65    pub fn get_id(&self, s: &str) -> Option<u32> {
66        self.to_id.get(s).map(|id| *id)
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn same_string_gives_same_id() {
76        let interner = Interner::default();
77        let a = interner.intern_str("Foo::bar");
78        let b = interner.intern_str("Foo::bar");
79        assert_eq!(a, b);
80    }
81
82    #[test]
83    fn different_strings_give_different_ids() {
84        let interner = Interner::default();
85        let a = interner.intern_str("Foo::bar");
86        let b = interner.intern_str("Foo::baz");
87        assert_ne!(a, b);
88    }
89
90    #[test]
91    fn get_roundtrips_id_to_string() {
92        let interner = Interner::default();
93        let id = interner.intern_str("App\\Service");
94        assert_eq!(interner.get(id).as_ref(), "App\\Service");
95    }
96
97    #[test]
98    fn get_id_returns_none_for_unknown_string() {
99        let interner = Interner::default();
100        assert!(interner.get_id("unknown").is_none());
101    }
102
103    #[test]
104    fn intern_and_intern_str_agree() {
105        let interner = Interner::default();
106        let id_arc = interner.intern(Arc::from("hello"));
107        let id_str = interner.intern_str("hello");
108        assert_eq!(id_arc, id_str);
109    }
110
111    #[test]
112    fn concurrent_intern_is_consistent() {
113        use std::sync::Arc as StdArc;
114        let interner = StdArc::new(Interner::default());
115        let handles: Vec<_> = (0..8)
116            .map(|_| {
117                let i = interner.clone();
118                std::thread::spawn(move || i.intern_str("shared"))
119            })
120            .collect();
121        let ids: Vec<u32> = handles.into_iter().map(|h| h.join().unwrap()).collect();
122        assert!(
123            ids.iter().all(|&id| id == ids[0]),
124            "all threads must see the same ID"
125        );
126    }
127}