anomaly_grid/
string_interner.rs

1//! String Interning System for Memory Optimization
2//!
3//! This module provides a string interning system that replaces duplicate string
4//! storage with compact integer IDs, significantly reducing memory usage.
5
6use std::collections::HashMap;
7use std::sync::{Arc, RwLock};
8
9/// Compact identifier for interned strings
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
11pub struct StateId(u32);
12
13impl StateId {
14    /// Create a new StateId (internal use only)
15    pub(crate) fn new(id: u32) -> Self {
16        Self(id)
17    }
18
19    /// Get the raw ID value
20    pub fn as_u32(self) -> u32 {
21        self.0
22    }
23}
24
25impl std::fmt::Display for StateId {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        write!(f, "StateId({})", self.0)
28    }
29}
30
31/// Thread-safe string interning system
32#[derive(Debug, Clone)]
33pub struct StringInterner {
34    inner: Arc<RwLock<InternerInner>>,
35}
36
37#[derive(Debug)]
38struct InternerInner {
39    /// Storage for interned strings
40    strings: Vec<String>,
41    /// Mapping from string to ID for deduplication
42    string_to_id: HashMap<String, StateId>,
43}
44
45impl StringInterner {
46    /// Create a new string interner
47    pub fn new() -> Self {
48        Self {
49            inner: Arc::new(RwLock::new(InternerInner {
50                strings: Vec::new(),
51                string_to_id: HashMap::new(),
52            })),
53        }
54    }
55
56    /// Intern a string and return its ID
57    ///
58    /// If the string is already interned, returns the existing ID.
59    /// Otherwise, creates a new ID and stores the string.
60    pub fn get_or_intern(&self, s: &str) -> StateId {
61        // Try read-only access first (common case)
62        {
63            let inner = self.inner.read().unwrap();
64            if let Some(&id) = inner.string_to_id.get(s) {
65                return id;
66            }
67        }
68
69        // Need write access to intern new string
70        let mut inner = self.inner.write().unwrap();
71
72        // Double-check in case another thread interned it
73        if let Some(&id) = inner.string_to_id.get(s) {
74            return id;
75        }
76
77        // Create new ID and intern the string
78        let id = StateId::new(inner.strings.len() as u32);
79        inner.strings.push(s.to_string());
80        inner.string_to_id.insert(s.to_string(), id);
81
82        id
83    }
84
85    /// Get the string for a given ID
86    ///
87    /// Returns None if the ID is invalid.
88    pub fn get_string(&self, id: StateId) -> Option<String> {
89        let inner = self.inner.read().unwrap();
90        inner.strings.get(id.0 as usize).cloned()
91    }
92
93    /// Get the number of interned strings
94    pub fn len(&self) -> usize {
95        let inner = self.inner.read().unwrap();
96        inner.strings.len()
97    }
98
99    /// Check if the interner is empty
100    pub fn is_empty(&self) -> bool {
101        self.len() == 0
102    }
103
104    /// Get all interned strings with their IDs
105    pub fn iter(&self) -> Vec<(StateId, String)> {
106        let inner = self.inner.read().unwrap();
107        inner
108            .strings
109            .iter()
110            .enumerate()
111            .map(|(i, s)| (StateId::new(i as u32), s.clone()))
112            .collect()
113    }
114
115    /// Estimate memory usage of the interner
116    pub fn estimate_memory_usage(&self) -> usize {
117        let inner = self.inner.read().unwrap();
118        let strings_memory: usize = inner.strings.iter().map(|s| s.capacity()).sum();
119        let hashmap_memory = inner.string_to_id.capacity()
120            * (std::mem::size_of::<String>() + std::mem::size_of::<StateId>());
121        let vec_memory = inner.strings.capacity() * std::mem::size_of::<String>();
122
123        strings_memory + hashmap_memory + vec_memory
124    }
125}
126
127impl Default for StringInterner {
128    fn default() -> Self {
129        Self::new()
130    }
131}
132
133/// Helper trait for converting between strings and StateIds
134pub trait StateIdConversion {
135    /// Convert a string slice to a StateId using the interner
136    fn to_state_id(&self, interner: &StringInterner) -> StateId;
137
138    /// Convert a StateId back to a string using the interner
139    fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String>;
140}
141
142impl StateIdConversion for str {
143    fn to_state_id(&self, interner: &StringInterner) -> StateId {
144        interner.get_or_intern(self)
145    }
146
147    fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String> {
148        interner.get_string(id)
149    }
150}
151
152impl StateIdConversion for String {
153    fn to_state_id(&self, interner: &StringInterner) -> StateId {
154        interner.get_or_intern(self)
155    }
156
157    fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String> {
158        interner.get_string(id)
159    }
160}
161
162/// Convert a vector of strings to StateIds
163pub fn strings_to_state_ids(strings: &[String], interner: &StringInterner) -> Vec<StateId> {
164    strings.iter().map(|s| interner.get_or_intern(s)).collect()
165}
166
167/// Convert a vector of StateIds back to strings
168pub fn state_ids_to_strings(ids: &[StateId], interner: &StringInterner) -> Option<Vec<String>> {
169    ids.iter().map(|&id| interner.get_string(id)).collect()
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_basic_interning() {
178        let interner = StringInterner::new();
179
180        let id1 = interner.get_or_intern("hello");
181        let id2 = interner.get_or_intern("world");
182        let id3 = interner.get_or_intern("hello"); // Should reuse id1
183
184        assert_eq!(id1, id3);
185        assert_ne!(id1, id2);
186
187        assert_eq!(interner.get_string(id1), Some("hello".to_string()));
188        assert_eq!(interner.get_string(id2), Some("world".to_string()));
189
190        assert_eq!(interner.len(), 2);
191    }
192
193    #[test]
194    fn test_thread_safety() {
195        use std::thread;
196
197        let interner = StringInterner::new();
198        let interner_clone = interner.clone();
199
200        let handle = thread::spawn(move || interner_clone.get_or_intern("thread_string"));
201
202        let id1 = interner.get_or_intern("main_string");
203        let id2 = handle.join().unwrap();
204
205        assert_ne!(id1, id2);
206        assert_eq!(interner.len(), 2);
207    }
208
209    #[test]
210    fn test_memory_estimation() {
211        let interner = StringInterner::new();
212
213        let initial_memory = interner.estimate_memory_usage();
214
215        interner.get_or_intern("test_string_1");
216        interner.get_or_intern("test_string_2");
217
218        let after_memory = interner.estimate_memory_usage();
219
220        assert!(after_memory > initial_memory);
221    }
222
223    #[test]
224    fn test_conversion_helpers() {
225        let interner = StringInterner::new();
226
227        let strings = vec!["A".to_string(), "B".to_string(), "C".to_string()];
228        let ids = strings_to_state_ids(&strings, &interner);
229        let recovered = state_ids_to_strings(&ids, &interner).unwrap();
230
231        assert_eq!(strings, recovered);
232    }
233
234    #[test]
235    fn test_state_id_conversion_trait() {
236        let interner = StringInterner::new();
237
238        let id = "test".to_state_id(&interner);
239        let recovered = String::from_state_id(id, &interner).unwrap();
240
241        assert_eq!(recovered, "test");
242    }
243
244    #[test]
245    fn test_iterator() {
246        let interner = StringInterner::new();
247
248        interner.get_or_intern("first");
249        interner.get_or_intern("second");
250        interner.get_or_intern("third");
251
252        let items = interner.iter();
253        assert_eq!(items.len(), 3);
254
255        // Check that all strings are present
256        let strings: Vec<String> = items.into_iter().map(|(_, s)| s).collect();
257        assert!(strings.contains(&"first".to_string()));
258        assert!(strings.contains(&"second".to_string()));
259        assert!(strings.contains(&"third".to_string()));
260    }
261
262    #[test]
263    fn test_invalid_state_id() {
264        let interner = StringInterner::new();
265
266        let invalid_id = StateId::new(999);
267        assert_eq!(interner.get_string(invalid_id), None);
268    }
269
270    #[test]
271    fn test_memory_efficiency() {
272        let interner = StringInterner::new();
273
274        // Intern the same string multiple times
275        let test_string = "repeated_string";
276        let mut ids = Vec::new();
277
278        for _ in 0..1000 {
279            ids.push(interner.get_or_intern(test_string));
280        }
281
282        // Should only have one unique string stored
283        assert_eq!(interner.len(), 1);
284
285        // All IDs should be the same
286        let first_id = ids[0];
287        assert!(ids.iter().all(|&id| id == first_id));
288    }
289}