lean_agentic/
symbol.rs

1//! Symbol interning for efficient name representation
2//!
3//! Names are interned in a global table to avoid duplicating strings
4//! and enable fast equality comparison via integer IDs.
5
6use std::collections::HashMap;
7use std::hash::BuildHasherDefault;
8use std::hash::DefaultHasher;
9use std::sync::{Arc, RwLock};
10
11type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<DefaultHasher>>;
12
13/// Interned symbol ID for fast comparisons
14#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
15pub struct SymbolId(u32);
16
17impl SymbolId {
18    /// Create a new symbol ID (internal use only)
19    pub(crate) fn new(id: u32) -> Self {
20        Self(id)
21    }
22
23    /// Get the raw ID value
24    pub fn raw(self) -> u32 {
25        self.0
26    }
27}
28
29/// Represents an interned symbol (name)
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct Symbol {
32    id: SymbolId,
33    name: Arc<str>,
34}
35
36impl Symbol {
37    /// Get the symbol's ID
38    pub fn id(&self) -> SymbolId {
39        self.id
40    }
41
42    /// Get the symbol's string representation
43    pub fn as_str(&self) -> &str {
44        &self.name
45    }
46}
47
48/// Global symbol table for interning strings
49pub struct SymbolTable {
50    strings: RwLock<Vec<Arc<str>>>,
51    lookup: RwLock<FxHashMap<Arc<str>, SymbolId>>,
52}
53
54impl SymbolTable {
55    /// Create a new symbol table
56    pub fn new() -> Self {
57        Self {
58            strings: RwLock::new(Vec::new()),
59            lookup: RwLock::new(FxHashMap::default()),
60        }
61    }
62
63    /// Intern a string and return its symbol ID
64    pub fn intern(&self, s: &str) -> SymbolId {
65        // Fast path: check if already interned (read lock)
66        {
67            let lookup = self.lookup.read().unwrap();
68            if let Some(&id) = lookup.get(s) {
69                return id;
70            }
71        }
72
73        // Slow path: need to intern (write lock)
74        let mut lookup = self.lookup.write().unwrap();
75        let mut strings = self.strings.write().unwrap();
76
77        // Double-check in case another thread interned it
78        if let Some(&id) = lookup.get(s) {
79            return id;
80        }
81
82        let arc_str: Arc<str> = Arc::from(s);
83        let id = SymbolId::new(strings.len() as u32);
84
85        strings.push(arc_str.clone());
86        lookup.insert(arc_str, id);
87
88        id
89    }
90
91    /// Get a symbol by its ID
92    pub fn get(&self, id: SymbolId) -> Option<Symbol> {
93        let strings = self.strings.read().unwrap();
94        strings.get(id.0 as usize).map(|name| Symbol {
95            id,
96            name: name.clone(),
97        })
98    }
99
100    /// Resolve a symbol ID to its string
101    pub fn resolve(&self, id: SymbolId) -> Option<Arc<str>> {
102        let strings = self.strings.read().unwrap();
103        strings.get(id.0 as usize).cloned()
104    }
105
106    /// Get the number of interned symbols
107    pub fn len(&self) -> usize {
108        self.strings.read().unwrap().len()
109    }
110
111    /// Check if the symbol table is empty
112    pub fn is_empty(&self) -> bool {
113        self.len() == 0
114    }
115}
116
117impl Default for SymbolTable {
118    fn default() -> Self {
119        Self::new()
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126
127    #[test]
128    fn test_symbol_interning() {
129        let table = SymbolTable::new();
130
131        let id1 = table.intern("hello");
132        let id2 = table.intern("world");
133        let id3 = table.intern("hello"); // Should reuse id1
134
135        assert_eq!(id1, id3);
136        assert_ne!(id1, id2);
137        assert_eq!(table.len(), 2);
138    }
139
140    #[test]
141    fn test_symbol_resolution() {
142        let table = SymbolTable::new();
143
144        let id = table.intern("test");
145        let sym = table.get(id).unwrap();
146
147        assert_eq!(sym.as_str(), "test");
148        assert_eq!(sym.id(), id);
149    }
150}