formualizer_eval/engine/arena/
string_interner.rs1use rustc_hash::FxHashMap;
4use std::{fmt, sync::Arc};
5
6#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
8pub struct StringId(u32);
9
10impl StringId {
11 pub const MAX: u32 = u32::MAX - 1;
13
14 pub const INVALID: StringId = StringId(u32::MAX);
16
17 pub fn as_u32(self) -> u32 {
18 self.0
19 }
20
21 pub fn from_raw(raw: u32) -> Self {
22 StringId(raw)
23 }
24
25 pub fn is_valid(self) -> bool {
26 self.0 != u32::MAX
27 }
28}
29
30impl fmt::Display for StringId {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 write!(f, "StringId({})", self.0)
33 }
34}
35
36#[derive(Debug)]
38pub struct StringInterner {
39 strings: Vec<Arc<str>>,
41 lookup: FxHashMap<Arc<str>, StringId>,
43}
44
45impl StringInterner {
46 pub fn new() -> Self {
47 Self {
48 strings: Vec::new(),
49 lookup: FxHashMap::default(),
50 }
51 }
52
53 pub fn with_capacity(cap: usize) -> Self {
54 Self {
55 strings: Vec::with_capacity(cap),
56 lookup: FxHashMap::with_capacity_and_hasher(cap, Default::default()),
57 }
58 }
59
60 pub fn intern(&mut self, s: &str) -> StringId {
63 if let Some(&id) = self.lookup.get(s) {
65 return id;
66 }
67
68 let index = self.strings.len() as u32;
70 if index > StringId::MAX {
71 panic!("String interner overflow: too many strings");
72 }
73
74 let id = StringId(index);
76 let boxed: Arc<str> = s.into();
77
78 self.lookup.insert(boxed.clone(), id);
81 self.strings.push(boxed);
82
83 id
84 }
85
86 #[inline]
88 pub fn get(&self, id: StringId) -> Option<&str> {
89 if id.is_valid() {
90 self.strings.get(id.0 as usize).map(|s| &**s)
91 } else {
92 None
93 }
94 }
95
96 #[inline]
98 pub fn resolve(&self, id: StringId) -> &str {
99 self.get(id)
100 .unwrap_or_else(|| panic!("Invalid string ID: {id:?}"))
101 }
102
103 pub fn contains(&self, s: &str) -> bool {
105 self.lookup.contains_key(s)
106 }
107
108 pub fn get_id(&self, s: &str) -> Option<StringId> {
110 self.lookup.get(s).copied()
111 }
112
113 pub fn len(&self) -> usize {
115 self.strings.len()
116 }
117
118 pub fn is_empty(&self) -> bool {
120 self.strings.is_empty()
121 }
122
123 pub fn memory_usage(&self) -> usize {
125 let vec_overhead = self.strings.capacity() * std::mem::size_of::<Box<str>>();
127
128 let string_data: usize = self
130 .strings
131 .iter()
132 .map(|s| s.len() + std::mem::size_of::<Box<str>>())
133 .sum();
134
135 let map_overhead = self.lookup.capacity()
137 * (std::mem::size_of::<Box<str>>() + std::mem::size_of::<StringId>());
138
139 vec_overhead + string_data + map_overhead
140 }
141
142 pub fn clear(&mut self) {
144 self.strings.clear();
145 self.lookup.clear();
146 }
147
148 pub fn iter(&self) -> impl Iterator<Item = (StringId, &str)> + '_ {
150 self.strings
151 .iter()
152 .enumerate()
153 .map(|(i, s)| (StringId(i as u32), &**s))
154 }
155
156 pub fn stats(&self) -> InternerStats {
158 let total_bytes: usize = self.strings.iter().map(|s| s.len()).sum();
159 let unique_count = self.strings.len();
160
161 InternerStats {
162 unique_count,
163 total_bytes,
164 average_length: if unique_count > 0 {
165 total_bytes as f64 / unique_count as f64
166 } else {
167 0.0
168 },
169 }
170 }
171}
172
173impl Default for StringInterner {
174 fn default() -> Self {
175 Self::new()
176 }
177}
178
179#[derive(Debug, Clone)]
181pub struct InternerStats {
182 pub unique_count: usize,
183 pub total_bytes: usize,
184 pub average_length: f64,
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_string_interning() {
193 let mut interner = StringInterner::new();
194
195 let s1 = interner.intern("Hello");
196 let s2 = interner.intern("Hello");
197 let s3 = interner.intern("World");
198
199 assert_eq!(s1, s2);
200 assert_ne!(s1, s3);
201 assert_eq!(interner.get(s1), Some("Hello"));
202 assert_eq!(interner.get(s3), Some("World"));
203 }
204
205 #[test]
206 fn test_string_memory_efficiency() {
207 let mut interner = StringInterner::new();
208
209 let refs: Vec<_> = (0..1000).map(|_| interner.intern("repeated")).collect();
211
212 assert_eq!(interner.len(), 1);
214
215 for r in &refs[1..] {
217 assert_eq!(*r, refs[0]);
218 }
219
220 assert!(interner.memory_usage() < 1000);
222 }
223
224 #[test]
225 fn test_string_interner_capacity() {
226 let mut interner = StringInterner::with_capacity(100);
227
228 for i in 0..1000 {
230 let s = format!("string_{i}");
231 let id = interner.intern(&s);
232 assert_eq!(interner.get(id), Some(s.as_str()));
233 }
234
235 assert_eq!(interner.len(), 1000);
236 }
237
238 #[test]
239 fn test_string_interner_contains() {
240 let mut interner = StringInterner::new();
241
242 interner.intern("exists");
243
244 assert!(interner.contains("exists"));
245 assert!(!interner.contains("not_exists"));
246 }
247
248 #[test]
249 fn test_string_interner_get_id() {
250 let mut interner = StringInterner::new();
251
252 let id = interner.intern("test");
253
254 assert_eq!(interner.get_id("test"), Some(id));
255 assert_eq!(interner.get_id("not_interned"), None);
256 }
257
258 #[test]
259 fn test_string_interner_clear() {
260 let mut interner = StringInterner::new();
261
262 interner.intern("one");
263 interner.intern("two");
264 interner.intern("three");
265
266 assert_eq!(interner.len(), 3);
267
268 interner.clear();
269
270 assert_eq!(interner.len(), 0);
271 assert!(interner.is_empty());
272 }
273
274 #[test]
275 fn test_string_interner_iter() {
276 let mut interner = StringInterner::new();
277
278 let id1 = interner.intern("first");
279 let id2 = interner.intern("second");
280 let id3 = interner.intern("third");
281
282 let items: Vec<_> = interner.iter().collect();
283
284 assert_eq!(items.len(), 3);
285 assert_eq!(items[0], (id1, "first"));
286 assert_eq!(items[1], (id2, "second"));
287 assert_eq!(items[2], (id3, "third"));
288 }
289
290 #[test]
291 fn test_string_interner_stats() {
292 let mut interner = StringInterner::new();
293
294 interner.intern("short");
295 interner.intern("medium_length");
296 interner.intern("this_is_a_longer_string");
297
298 let stats = interner.stats();
299
300 assert_eq!(stats.unique_count, 3);
301 assert_eq!(stats.total_bytes, 5 + 13 + 23);
302 assert!((stats.average_length - 13.67).abs() < 0.01);
303 }
304
305 #[test]
306 fn test_invalid_string_id() {
307 let interner = StringInterner::new();
308
309 assert!(StringId::INVALID.0 == u32::MAX);
310 assert!(!StringId::INVALID.is_valid());
311 assert_eq!(interner.get(StringId::INVALID), None);
312 }
313
314 #[test]
315 #[should_panic(expected = "Invalid string ID")]
316 fn test_resolve_invalid_id() {
317 let interner = StringInterner::new();
318 interner.resolve(StringId::INVALID);
319 }
320
321 #[test]
322 fn test_string_id_ordering() {
323 let mut interner = StringInterner::new();
324
325 let id1 = interner.intern("a");
326 let id2 = interner.intern("b");
327 let id3 = interner.intern("c");
328
329 assert!(id1 < id2);
330 assert!(id2 < id3);
331 assert!(id1 < id3);
332 }
333
334 #[test]
335 fn test_empty_string() {
336 let mut interner = StringInterner::new();
337
338 let id = interner.intern("");
339 assert_eq!(interner.get(id), Some(""));
340
341 let id2 = interner.intern("");
343 assert_eq!(id, id2);
344 }
345
346 #[test]
347 fn test_unicode_strings() {
348 let mut interner = StringInterner::new();
349
350 let id1 = interner.intern("Hello δΈη");
351 let id2 = interner.intern("π¦ Rust");
352 let id3 = interner.intern("Hello δΈη");
353
354 assert_eq!(id1, id3);
355 assert_ne!(id1, id2);
356
357 assert_eq!(interner.get(id1), Some("Hello δΈη"));
358 assert_eq!(interner.get(id2), Some("π¦ Rust"));
359 }
360}