anomaly_grid/
string_interner.rs1use std::collections::HashMap;
7use std::sync::{Arc, RwLock};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
11pub struct StateId(u32);
12
13impl StateId {
14 pub(crate) fn new(id: u32) -> Self {
16 Self(id)
17 }
18
19 pub fn as_u32(self) -> u32 {
21 self.0
22 }
23}
24
25impl std::fmt::Display for StateId {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 write!(f, "StateId({})", self.0)
28 }
29}
30
31#[derive(Debug, Clone)]
33pub struct StringInterner {
34 inner: Arc<RwLock<InternerInner>>,
35}
36
37#[derive(Debug)]
38struct InternerInner {
39 strings: Vec<String>,
41 string_to_id: HashMap<String, StateId>,
43}
44
45impl StringInterner {
46 pub fn new() -> Self {
48 Self {
49 inner: Arc::new(RwLock::new(InternerInner {
50 strings: Vec::new(),
51 string_to_id: HashMap::new(),
52 })),
53 }
54 }
55
56 pub fn get_or_intern(&self, s: &str) -> StateId {
61 {
63 let inner = self.inner.read().unwrap();
64 if let Some(&id) = inner.string_to_id.get(s) {
65 return id;
66 }
67 }
68
69 let mut inner = self.inner.write().unwrap();
71
72 if let Some(&id) = inner.string_to_id.get(s) {
74 return id;
75 }
76
77 let id = StateId::new(inner.strings.len() as u32);
79 inner.strings.push(s.to_string());
80 inner.string_to_id.insert(s.to_string(), id);
81
82 id
83 }
84
85 pub fn get_string(&self, id: StateId) -> Option<String> {
89 let inner = self.inner.read().unwrap();
90 inner.strings.get(id.0 as usize).cloned()
91 }
92
93 pub fn len(&self) -> usize {
95 let inner = self.inner.read().unwrap();
96 inner.strings.len()
97 }
98
99 pub fn is_empty(&self) -> bool {
101 self.len() == 0
102 }
103
104 pub fn iter(&self) -> Vec<(StateId, String)> {
106 let inner = self.inner.read().unwrap();
107 inner
108 .strings
109 .iter()
110 .enumerate()
111 .map(|(i, s)| (StateId::new(i as u32), s.clone()))
112 .collect()
113 }
114
115 pub fn estimate_memory_usage(&self) -> usize {
117 let inner = self.inner.read().unwrap();
118 let strings_memory: usize = inner.strings.iter().map(|s| s.capacity()).sum();
119 let hashmap_memory = inner.string_to_id.capacity()
120 * (std::mem::size_of::<String>() + std::mem::size_of::<StateId>());
121 let vec_memory = inner.strings.capacity() * std::mem::size_of::<String>();
122
123 strings_memory + hashmap_memory + vec_memory
124 }
125}
126
127impl Default for StringInterner {
128 fn default() -> Self {
129 Self::new()
130 }
131}
132
133pub trait StateIdConversion {
135 fn to_state_id(&self, interner: &StringInterner) -> StateId;
137
138 fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String>;
140}
141
142impl StateIdConversion for str {
143 fn to_state_id(&self, interner: &StringInterner) -> StateId {
144 interner.get_or_intern(self)
145 }
146
147 fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String> {
148 interner.get_string(id)
149 }
150}
151
152impl StateIdConversion for String {
153 fn to_state_id(&self, interner: &StringInterner) -> StateId {
154 interner.get_or_intern(self)
155 }
156
157 fn from_state_id(id: StateId, interner: &StringInterner) -> Option<String> {
158 interner.get_string(id)
159 }
160}
161
162pub fn strings_to_state_ids(strings: &[String], interner: &StringInterner) -> Vec<StateId> {
164 strings.iter().map(|s| interner.get_or_intern(s)).collect()
165}
166
167pub fn state_ids_to_strings(ids: &[StateId], interner: &StringInterner) -> Option<Vec<String>> {
169 ids.iter().map(|&id| interner.get_string(id)).collect()
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn test_basic_interning() {
178 let interner = StringInterner::new();
179
180 let id1 = interner.get_or_intern("hello");
181 let id2 = interner.get_or_intern("world");
182 let id3 = interner.get_or_intern("hello"); assert_eq!(id1, id3);
185 assert_ne!(id1, id2);
186
187 assert_eq!(interner.get_string(id1), Some("hello".to_string()));
188 assert_eq!(interner.get_string(id2), Some("world".to_string()));
189
190 assert_eq!(interner.len(), 2);
191 }
192
193 #[test]
194 fn test_thread_safety() {
195 use std::thread;
196
197 let interner = StringInterner::new();
198 let interner_clone = interner.clone();
199
200 let handle = thread::spawn(move || interner_clone.get_or_intern("thread_string"));
201
202 let id1 = interner.get_or_intern("main_string");
203 let id2 = handle.join().unwrap();
204
205 assert_ne!(id1, id2);
206 assert_eq!(interner.len(), 2);
207 }
208
209 #[test]
210 fn test_memory_estimation() {
211 let interner = StringInterner::new();
212
213 let initial_memory = interner.estimate_memory_usage();
214
215 interner.get_or_intern("test_string_1");
216 interner.get_or_intern("test_string_2");
217
218 let after_memory = interner.estimate_memory_usage();
219
220 assert!(after_memory > initial_memory);
221 }
222
223 #[test]
224 fn test_conversion_helpers() {
225 let interner = StringInterner::new();
226
227 let strings = vec!["A".to_string(), "B".to_string(), "C".to_string()];
228 let ids = strings_to_state_ids(&strings, &interner);
229 let recovered = state_ids_to_strings(&ids, &interner).unwrap();
230
231 assert_eq!(strings, recovered);
232 }
233
234 #[test]
235 fn test_state_id_conversion_trait() {
236 let interner = StringInterner::new();
237
238 let id = "test".to_state_id(&interner);
239 let recovered = String::from_state_id(id, &interner).unwrap();
240
241 assert_eq!(recovered, "test");
242 }
243
244 #[test]
245 fn test_iterator() {
246 let interner = StringInterner::new();
247
248 interner.get_or_intern("first");
249 interner.get_or_intern("second");
250 interner.get_or_intern("third");
251
252 let items = interner.iter();
253 assert_eq!(items.len(), 3);
254
255 let strings: Vec<String> = items.into_iter().map(|(_, s)| s).collect();
257 assert!(strings.contains(&"first".to_string()));
258 assert!(strings.contains(&"second".to_string()));
259 assert!(strings.contains(&"third".to_string()));
260 }
261
262 #[test]
263 fn test_invalid_state_id() {
264 let interner = StringInterner::new();
265
266 let invalid_id = StateId::new(999);
267 assert_eq!(interner.get_string(invalid_id), None);
268 }
269
270 #[test]
271 fn test_memory_efficiency() {
272 let interner = StringInterner::new();
273
274 let test_string = "repeated_string";
276 let mut ids = Vec::new();
277
278 for _ in 0..1000 {
279 ids.push(interner.get_or_intern(test_string));
280 }
281
282 assert_eq!(interner.len(), 1);
284
285 let first_id = ids[0];
287 assert!(ids.iter().all(|&id| id == first_id));
288 }
289}