1use dashmap::DashMap;
8use serde::{Deserialize, Serialize};
9use std::sync::atomic::{AtomicU64, Ordering};
10use std::sync::{Arc, OnceLock};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct StringPoolStats {
15 pub unique_strings: usize,
17 pub intern_operations: u64,
19 pub cache_hits: u64,
21 pub memory_saved_bytes: u64,
23 pub average_string_length: f64,
25}
26
27struct PoolMetrics {
29 intern_count: AtomicU64,
30 cache_hits: AtomicU64,
31 total_string_bytes: AtomicU64,
32 duplicate_bytes_saved: AtomicU64,
33}
34
35impl PoolMetrics {
36 fn new() -> Self {
37 Self {
38 intern_count: AtomicU64::new(0),
39 cache_hits: AtomicU64::new(0),
40 total_string_bytes: AtomicU64::new(0),
41 duplicate_bytes_saved: AtomicU64::new(0),
42 }
43 }
44
45 fn record_intern(&self, string_len: usize, was_cached: bool) {
46 self.intern_count.fetch_add(1, Ordering::Relaxed);
47
48 if was_cached {
49 self.cache_hits.fetch_add(1, Ordering::Relaxed);
50 self.duplicate_bytes_saved
51 .fetch_add(string_len as u64, Ordering::Relaxed);
52 } else {
53 self.total_string_bytes
54 .fetch_add(string_len as u64, Ordering::Relaxed);
55 }
56 }
57}
58
59#[cfg(test)]
60mod string_pool_tests {
61 use super::*;
62 use std::sync::Arc;
63 use std::thread;
64
65 #[test]
66 fn test_string_pool_deduplication() {
67 let pool = StringPool::new();
68
69 let str1 = pool.intern("hello world");
71 let str2 = pool.intern("hello world");
72
73 assert!(Arc::ptr_eq(&str1, &str2));
74
75 let str3 = pool.intern("different string");
77 assert!(!Arc::ptr_eq(&str1, &str3));
78 }
79
80 #[test]
81 fn test_string_pool_memory_efficiency() {
82 let pool = StringPool::new();
83
84 let original = "this is a test string for memory efficiency";
86 let mut handles = Vec::new();
87
88 for _ in 0..1000 {
89 handles.push(pool.intern(original));
90 }
91
92 for handle in &handles[1..] {
94 assert!(Arc::ptr_eq(&handles[0], handle));
95 }
96
97 let stats = pool.get_stats();
99 assert_eq!(stats.unique_strings, 1);
100 assert_eq!(stats.intern_operations, 1000);
101 }
102
103 #[test]
104 fn test_string_pool_concurrent_access() {
105 let pool = Arc::new(StringPool::new());
106 let mut handles = Vec::new();
107
108 for _i in 0..10 {
110 let pool_clone = Arc::clone(&pool);
111 let handle = thread::spawn(move || {
112 let mut results = Vec::new();
113 for j in 0..100 {
114 let s = if j % 2 == 0 {
115 "common_string_a"
116 } else {
117 "common_string_b"
118 };
119 results.push(pool_clone.intern(s));
120 }
121 results
122 });
123 handles.push(handle);
124 }
125
126 let mut all_results = Vec::new();
128 for handle in handles {
129 all_results.extend(handle.join().unwrap());
130 }
131
132 let string_a_refs: Vec<_> = all_results
134 .iter()
135 .filter(|s| &***s == "common_string_a")
136 .collect();
137 let string_b_refs: Vec<_> = all_results
138 .iter()
139 .filter(|s| &***s == "common_string_b")
140 .collect();
141
142 for ref_a in &string_a_refs[1..] {
144 assert!(Arc::ptr_eq(string_a_refs[0], ref_a));
145 }
146 for ref_b in &string_b_refs[1..] {
147 assert!(Arc::ptr_eq(string_b_refs[0], ref_b));
148 }
149
150 let stats = pool.get_stats();
151 assert_eq!(stats.unique_strings, 2); }
153
154 #[test]
155 fn test_string_pool_memory_usage_calculation() {
156 let pool = StringPool::new();
157
158 let _short_str = pool.intern("hi");
159 let _long_str = pool.intern("this is a much longer string that takes more memory");
160
161 let stats = pool.get_stats();
162
163 assert_eq!(stats.unique_strings, 2);
165 }
166
167 #[test]
168 fn test_string_pool_cleanup_on_drop() {
169 let pool = StringPool::new();
170
171 {
172 let _temp_str = pool.intern("temporary string");
173 let stats = pool.get_stats();
174 assert_eq!(stats.unique_strings, 1);
175 }
176
177 let stats = pool.get_stats();
180 assert_eq!(stats.unique_strings, 1);
181
182 }
185
186 #[test]
187 fn test_string_pool_large_strings() {
188 let pool = StringPool::new();
189
190 let large_string = "x".repeat(10000);
192 let str1 = pool.intern(&large_string);
193 let str2 = pool.intern(&large_string);
194
195 assert!(Arc::ptr_eq(&str1, &str2));
196
197 let stats = pool.get_stats();
198 assert_eq!(stats.unique_strings, 1);
199 }
200
201 #[test]
202 fn test_string_pool_empty_strings() {
203 let pool = StringPool::new();
204
205 let empty1 = pool.intern("");
206 let empty2 = pool.intern("");
207
208 assert!(Arc::ptr_eq(&empty1, &empty2));
209 assert_eq!(empty1.as_ref(), "");
210
211 let stats = pool.get_stats();
212 assert_eq!(stats.unique_strings, 1);
213 }
214
215 #[test]
216 fn test_string_pool_unicode_strings() {
217 let pool = StringPool::new();
218
219 let unicode1 = pool.intern("Hello world 🌍");
220 let unicode2 = pool.intern("Hello world 🌍");
221 let different_unicode = pool.intern("Hello world 🌎");
222
223 assert!(Arc::ptr_eq(&unicode1, &unicode2));
224 assert!(!Arc::ptr_eq(&unicode1, &different_unicode));
225
226 let stats = pool.get_stats();
227 assert_eq!(stats.unique_strings, 2);
228 }
229
230 #[test]
231 fn test_string_pool_stats_accuracy() {
232 let pool = StringPool::new();
233
234 let initial_stats = pool.get_stats();
236 assert_eq!(initial_stats.unique_strings, 0);
237 assert_eq!(initial_stats.intern_operations, 0);
238
239 let str1 = pool.intern("first");
241 let str2 = pool.intern("second");
242 let str1_again = pool.intern("first"); let stats = pool.get_stats();
245 assert_eq!(stats.unique_strings, 2); assert!(stats.intern_operations >= 2); assert_eq!(str1.as_ref(), "first");
250 assert_eq!(str2.as_ref(), "second");
251 assert!(Arc::ptr_eq(&str1, &str1_again));
252 }
253
254 #[test]
255 fn test_global_string_pool_singleton() {
256 let str1 = intern_string("global test");
258 let str2 = intern_string("global test");
259
260 assert!(Arc::ptr_eq(&str1, &str2));
261
262 let pool1 = get_global_pool();
264 let pool2 = get_global_pool();
265
266 let str3 = pool1.intern("another global test");
267 let str4 = pool2.intern("another global test");
268
269 assert!(Arc::ptr_eq(&str3, &str4));
270 }
271
272 #[test]
273 fn test_string_pool_performance_characteristics() {
274 let pool = StringPool::new();
275
276 let test_string = "performance test string";
278
279 let start = std::time::Instant::now();
280 for _ in 0..10000 {
281 pool.intern(test_string);
282 }
283 let duration = start.elapsed();
284
285 assert!(
287 duration.as_millis() < 100,
288 "String pool lookup too slow: {duration:?}",
289 );
290
291 let stats = pool.get_stats();
292 assert_eq!(stats.unique_strings, 1);
293 }
294
295 #[test]
296 fn test_string_pool_memory_overhead() {
297 let pool = StringPool::new();
298
299 let test_strings = vec![
301 "short",
302 "medium length string",
303 "this is a much longer string that should still be handled efficiently",
304 ];
305
306 let mut total_string_bytes = 0;
307 for s in &test_strings {
308 pool.intern(s);
309 total_string_bytes += s.len();
310 }
311
312 let stats = pool.get_stats();
313
314 let overhead_ratio = (stats.memory_saved_bytes + total_string_bytes as u64) as f64
316 / total_string_bytes as f64;
317 assert!(
318 overhead_ratio < 2.0,
319 "Memory overhead too high: {overhead_ratio:.2}x"
320 );
321 assert!(
322 overhead_ratio >= 1.0,
323 "Memory usage calculation seems wrong"
324 );
325 }
326}
327
328pub struct StringPool {
330 pool: DashMap<String, Arc<str>>,
332 metrics: PoolMetrics,
334}
335
336impl StringPool {
337 fn new() -> Self {
339 Self {
340 pool: DashMap::new(),
341 metrics: PoolMetrics::new(),
342 }
343 }
344
345 pub fn intern(&self, s: &str) -> Arc<str> {
350 let string_len = s.len();
351
352 if let Some(existing) = self.pool.get(s) {
354 self.metrics.record_intern(string_len, true);
355 return existing.clone();
356 }
357
358 let arc_str: Arc<str> = Arc::from(s);
360
361 let result = self
363 .pool
364 .entry(s.to_string())
365 .or_insert_with(|| Arc::clone(&arc_str))
366 .clone();
367
368 self.metrics.record_intern(string_len, false);
369 result
370 }
371
372 pub fn get_stats(&self) -> StringPoolStats {
374 let unique_strings = self.pool.len();
375 let intern_operations = self.metrics.intern_count.load(Ordering::Relaxed);
376 let cache_hits = self.metrics.cache_hits.load(Ordering::Relaxed);
377 let total_bytes = self.metrics.total_string_bytes.load(Ordering::Relaxed);
378 let saved_bytes = self.metrics.duplicate_bytes_saved.load(Ordering::Relaxed);
379
380 let average_length = if unique_strings > 0 {
381 total_bytes as f64 / unique_strings as f64
382 } else {
383 0.0
384 };
385
386 StringPoolStats {
387 unique_strings,
388 intern_operations,
389 cache_hits,
390 memory_saved_bytes: saved_bytes,
391 average_string_length: average_length,
392 }
393 }
394
395 #[cfg(test)]
397 pub fn clear(&self) {
398 self.pool.clear();
399 }
400
401 pub fn len(&self) -> usize {
403 self.pool.len()
404 }
405
406 pub fn is_empty(&self) -> bool {
408 self.pool.is_empty()
409 }
410}
411
412static GLOBAL_STRING_POOL: OnceLock<StringPool> = OnceLock::new();
414
415fn get_global_pool() -> &'static StringPool {
417 GLOBAL_STRING_POOL.get_or_init(StringPool::new)
418}
419
420pub fn intern_string(s: &str) -> Arc<str> {
437 let start_time = std::time::Instant::now();
438 let result = get_global_pool().intern(s);
439
440 let duration_ns = start_time.elapsed().as_nanos() as u64;
442 crate::core::string_pool_monitor::record_intern_operation(duration_ns);
443
444 result
445}
446
447pub fn get_string_pool_stats() -> StringPoolStats {
449 get_global_pool().get_stats()
450}
451
452#[cfg(test)]
454pub fn clear_string_pool() {
455 if let Some(pool) = GLOBAL_STRING_POOL.get() {
456 pool.clear();
457 }
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 #[test]
465 fn test_string_interning() {
466 clear_string_pool();
467
468 let s1 = intern_string("test");
469 let s2 = intern_string("test");
470 let s3 = intern_string("different");
471
472 assert!(Arc::ptr_eq(&s1, &s2));
474
475 assert!(!Arc::ptr_eq(&s1, &s3));
477
478 assert_eq!(&*s1, "test");
480 assert_eq!(&*s3, "different");
481 }
482
483 #[test]
484 fn test_string_pool_stats() {
485 let pool = StringPool::new();
487
488 let initial_stats = pool.get_stats();
489
490 let s1 = pool.intern("unique_test_string_1");
491 let s2 = pool.intern("unique_test_string_1"); let s3 = pool.intern("unique_test_string_2");
493
494 let stats = pool.get_stats();
495
496 assert!(stats.intern_operations >= initial_stats.intern_operations + 3);
498 assert!(stats.unique_strings >= initial_stats.unique_strings + 2);
500 assert!(stats.cache_hits > initial_stats.cache_hits);
502
503 assert_eq!(&*s1, "unique_test_string_1");
505 assert_eq!(&*s2, "unique_test_string_1");
506 assert_eq!(&*s3, "unique_test_string_2");
507 }
508
509 #[test]
510 fn test_empty_string_interning() {
511 clear_string_pool();
512
513 let s1 = intern_string("");
514 let s2 = intern_string("");
515
516 assert!(Arc::ptr_eq(&s1, &s2));
517 assert_eq!(&*s1, "");
518 }
519
520 #[test]
521 fn test_concurrent_interning() {
522 use std::thread;
523
524 let handles: Vec<_> = (0..10)
527 .map(|i| {
528 thread::spawn(move || {
529 let s = format!("concurrent_test_{}", i % 3); intern_string(&s)
531 })
532 })
533 .collect();
534
535 let results: Vec<_> = handles.into_iter().filter_map(|h| h.join().ok()).collect();
536
537 for (i, arc_str) in results.iter().enumerate() {
539 let expected = format!("concurrent_test_{}", i % 3);
540 assert_eq!(arc_str.as_ref(), expected);
541 }
542
543 assert_eq!(results.len(), 10);
545 }
546}