1use std::collections::HashMap;
4
5#[allow(dead_code)]
7#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
8pub struct StringId(pub u32);
9
10#[allow(dead_code)]
12#[derive(Clone, Debug)]
13pub struct StringPool {
14 lookup: HashMap<String, StringId>,
16 strings: Vec<String>,
18}
19
20#[allow(dead_code)]
26pub fn new_string_pool() -> StringPool {
27 StringPool {
28 lookup: HashMap::new(),
29 strings: Vec::new(),
30 }
31}
32
33#[allow(dead_code)]
40pub fn intern(pool: &mut StringPool, s: &str) -> StringId {
41 if let Some(&id) = pool.lookup.get(s) {
42 return id;
43 }
44 let id = StringId(pool.strings.len() as u32);
45 pool.strings.push(s.to_string());
46 pool.lookup.insert(s.to_string(), id);
47 id
48}
49
50#[allow(dead_code)]
53pub fn resolve(pool: &StringPool, id: StringId) -> Option<&str> {
54 pool.strings.get(id.0 as usize).map(|s| s.as_str())
55}
56
57#[allow(dead_code)]
59pub fn contains(pool: &StringPool, s: &str) -> bool {
60 pool.lookup.contains_key(s)
61}
62
63#[allow(dead_code)]
65pub fn pool_size(pool: &StringPool) -> usize {
66 pool.strings.len()
67}
68
69#[allow(dead_code)]
71pub fn total_bytes(pool: &StringPool) -> usize {
72 pool.strings.iter().map(|s| s.len()).sum()
73}
74
75#[allow(dead_code)]
77pub fn intern_many(pool: &mut StringPool, strings: &[&str]) -> Vec<StringId> {
78 strings.iter().map(|s| intern(pool, s)).collect()
79}
80
81#[allow(dead_code)]
87pub fn remove_unused(pool: &mut StringPool, keep: &[StringId]) -> usize {
88 let keep_set: std::collections::HashSet<u32> = keep.iter().map(|id| id.0).collect();
89 let original_count = pool.strings.len();
90
91 let retained: Vec<String> = pool
92 .strings
93 .iter()
94 .enumerate()
95 .filter(|(i, _)| keep_set.contains(&(*i as u32)))
96 .map(|(_, s)| s.clone())
97 .collect();
98
99 pool.strings = retained;
100 pool.lookup.clear();
101 for (i, s) in pool.strings.iter().enumerate() {
102 pool.lookup.insert(s.clone(), StringId(i as u32));
103 }
104
105 original_count - pool.strings.len()
106}
107
108#[allow(dead_code)]
110pub fn string_id_valid(pool: &StringPool, id: StringId) -> bool {
111 (id.0 as usize) < pool.strings.len()
112}
113
114#[allow(dead_code)]
116pub fn pool_stats_json(pool: &StringPool) -> String {
117 let count = pool_size(pool);
118 let bytes = total_bytes(pool);
119 let avg = if count > 0 {
120 bytes as f64 / count as f64
121 } else {
122 0.0
123 };
124 format!(
125 "{{\"unique_strings\":{},\"total_bytes\":{},\"average_length\":{:.2}}}",
126 count, bytes, avg
127 )
128}
129
130#[allow(dead_code)]
132pub fn clear_pool(pool: &mut StringPool) {
133 pool.strings.clear();
134 pool.lookup.clear();
135}
136
137#[allow(dead_code)]
140pub fn merge_pools(dst: &mut StringPool, src: &StringPool) -> usize {
141 let before = pool_size(dst);
142 for s in &src.strings {
143 intern(dst, s);
144 }
145 pool_size(dst) - before
146}
147
148#[allow(dead_code)]
151pub fn find_by_prefix(pool: &StringPool, prefix: &str) -> Vec<StringId> {
152 pool.strings
153 .iter()
154 .enumerate()
155 .filter(|(_, s)| s.starts_with(prefix))
156 .map(|(i, _)| StringId(i as u32))
157 .collect()
158}
159
160#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_new_string_pool_empty() {
170 let pool = new_string_pool();
171 assert_eq!(pool_size(&pool), 0);
172 assert_eq!(total_bytes(&pool), 0);
173 }
174
175 #[test]
176 fn test_intern_returns_id() {
177 let mut pool = new_string_pool();
178 let id = intern(&mut pool, "hello");
179 assert_eq!(id.0, 0);
180 }
181
182 #[test]
183 fn test_intern_deduplicates() {
184 let mut pool = new_string_pool();
185 let id1 = intern(&mut pool, "hello");
186 let id2 = intern(&mut pool, "hello");
187 assert_eq!(id1, id2);
188 assert_eq!(pool_size(&pool), 1);
189 }
190
191 #[test]
192 fn test_resolve_valid() {
193 let mut pool = new_string_pool();
194 let id = intern(&mut pool, "world");
195 assert_eq!(resolve(&pool, id), Some("world"));
196 }
197
198 #[test]
199 fn test_resolve_invalid() {
200 let pool = new_string_pool();
201 assert_eq!(resolve(&pool, StringId(999)), None);
202 }
203
204 #[test]
205 fn test_contains() {
206 let mut pool = new_string_pool();
207 intern(&mut pool, "abc");
208 assert!(contains(&pool, "abc"));
209 assert!(!contains(&pool, "xyz"));
210 }
211
212 #[test]
213 fn test_pool_size() {
214 let mut pool = new_string_pool();
215 intern(&mut pool, "a");
216 intern(&mut pool, "b");
217 intern(&mut pool, "c");
218 assert_eq!(pool_size(&pool), 3);
219 }
220
221 #[test]
222 fn test_total_bytes() {
223 let mut pool = new_string_pool();
224 intern(&mut pool, "ab"); intern(&mut pool, "cde"); assert_eq!(total_bytes(&pool), 5);
227 }
228
229 #[test]
230 fn test_intern_many() {
231 let mut pool = new_string_pool();
232 let ids = intern_many(&mut pool, &["x", "y", "z"]);
233 assert_eq!(ids.len(), 3);
234 assert_eq!(pool_size(&pool), 3);
235 }
236
237 #[test]
238 fn test_remove_unused() {
239 let mut pool = new_string_pool();
240 let id0 = intern(&mut pool, "keep");
241 intern(&mut pool, "remove");
242 let removed = remove_unused(&mut pool, &[id0]);
243 assert_eq!(removed, 1);
244 assert_eq!(pool_size(&pool), 1);
245 assert!(contains(&pool, "keep"));
246 }
247
248 #[test]
249 fn test_string_id_valid() {
250 let mut pool = new_string_pool();
251 let id = intern(&mut pool, "test");
252 assert!(string_id_valid(&pool, id));
253 assert!(!string_id_valid(&pool, StringId(100)));
254 }
255
256 #[test]
257 fn test_pool_stats_json() {
258 let mut pool = new_string_pool();
259 intern(&mut pool, "abc");
260 let json = pool_stats_json(&pool);
261 assert!(json.contains("\"unique_strings\":1"));
262 assert!(json.contains("\"total_bytes\":3"));
263 }
264
265 #[test]
266 fn test_clear_pool() {
267 let mut pool = new_string_pool();
268 intern(&mut pool, "a");
269 intern(&mut pool, "b");
270 clear_pool(&mut pool);
271 assert_eq!(pool_size(&pool), 0);
272 }
273
274 #[test]
275 fn test_merge_pools() {
276 let mut pool1 = new_string_pool();
277 intern(&mut pool1, "a");
278 let mut pool2 = new_string_pool();
279 intern(&mut pool2, "b");
280 intern(&mut pool2, "c");
281 let added = merge_pools(&mut pool1, &pool2);
282 assert_eq!(added, 2);
283 assert_eq!(pool_size(&pool1), 3);
284 }
285
286 #[test]
287 fn test_merge_pools_no_duplicates() {
288 let mut pool1 = new_string_pool();
289 intern(&mut pool1, "shared");
290 let mut pool2 = new_string_pool();
291 intern(&mut pool2, "shared");
292 intern(&mut pool2, "new");
293 let added = merge_pools(&mut pool1, &pool2);
294 assert_eq!(added, 1); }
296
297 #[test]
298 fn test_find_by_prefix() {
299 let mut pool = new_string_pool();
300 intern(&mut pool, "morph_face");
301 intern(&mut pool, "morph_body");
302 intern(&mut pool, "texture_skin");
303 let results = find_by_prefix(&pool, "morph_");
304 assert_eq!(results.len(), 2);
305 }
306
307 #[test]
308 fn test_find_by_prefix_empty() {
309 let mut pool = new_string_pool();
310 intern(&mut pool, "abc");
311 let results = find_by_prefix(&pool, "xyz");
312 assert!(results.is_empty());
313 }
314}