1use dashmap::DashMap;
4use std::sync::Arc;
5
6#[derive(Debug, Clone)]
12pub struct StringInterner {
13 pool: Arc<DashMap<String, Arc<String>>>,
15 stats: Arc<InternerStats>,
17}
18
19#[derive(Debug, Default)]
20struct InternerStats {
21 lookups: std::sync::atomic::AtomicUsize,
22 hits: std::sync::atomic::AtomicUsize,
23 unique_strings: std::sync::atomic::AtomicUsize,
24}
25
26impl StringInterner {
27 pub fn new() -> Self {
29 Self {
30 pool: Arc::new(DashMap::new()),
31 stats: Arc::new(InternerStats::default()),
32 }
33 }
34
35 pub fn with_capacity(capacity: usize) -> Self {
37 Self {
38 pool: Arc::new(DashMap::with_capacity(capacity)),
39 stats: Arc::new(InternerStats::default()),
40 }
41 }
42
43 pub fn intern(&self, s: &str) -> Arc<String> {
48 self.stats
49 .lookups
50 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
51
52 if let Some(existing) = self.pool.get(s) {
54 self.stats
55 .hits
56 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
57 Arc::clone(&*existing)
58 } else {
59 let arc_str = Arc::new(s.to_string());
61 self.pool.insert(s.to_string(), Arc::clone(&arc_str));
62 self.stats
63 .unique_strings
64 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
65 arc_str
66 }
67 }
68
69 pub fn intern_owned(&self, s: String) -> Arc<String> {
71 self.stats
72 .lookups
73 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
74
75 if let Some(existing) = self.pool.get(&s) {
77 self.stats
78 .hits
79 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
80 Arc::clone(&*existing)
81 } else {
82 let arc_str = Arc::new(s.clone());
84 self.pool.insert(s, Arc::clone(&arc_str));
85 self.stats
86 .unique_strings
87 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
88 arc_str
89 }
90 }
91
92 pub fn unique_count(&self) -> usize {
94 self.pool.len()
95 }
96
97 pub fn memory_usage(&self) -> MemoryStats {
99 let mut total_bytes = 0;
100 let mut total_references = 0;
101
102 for entry in self.pool.iter() {
103 let string = entry.value();
104 total_bytes += string.len() + std::mem::size_of::<String>();
105 total_references += Arc::strong_count(string);
106 }
107
108 MemoryStats {
109 unique_strings: self.pool.len(),
110 total_bytes,
111 total_references,
112 deduplication_ratio: if total_references > 0 {
113 (total_references as f64) / (self.pool.len() as f64)
114 } else {
115 0.0
116 },
117 }
118 }
119
120 pub fn clear(&self) {
122 self.pool.clear();
123 self.stats
124 .unique_strings
125 .store(0, std::sync::atomic::Ordering::Relaxed);
126 }
127
128 pub fn hit_rate(&self) -> f64 {
130 let lookups = self
131 .stats
132 .lookups
133 .load(std::sync::atomic::Ordering::Relaxed);
134 let hits = self.stats.hits.load(std::sync::atomic::Ordering::Relaxed);
135
136 if lookups > 0 {
137 (hits as f64) / (lookups as f64)
138 } else {
139 0.0
140 }
141 }
142}
143
144impl Default for StringInterner {
145 fn default() -> Self {
146 Self::new()
147 }
148}
149
150#[derive(Debug, Clone)]
152pub struct MemoryStats {
153 pub unique_strings: usize,
155 pub total_bytes: usize,
157 pub total_references: usize,
159 pub deduplication_ratio: f64,
161}
162
163#[derive(Debug, Clone, PartialEq)]
165pub enum InternedValue {
166 String(Arc<String>),
168 Hex(Arc<String>),
170 Decimal(i64),
172 Empty,
174}
175
176impl InternedValue {
177 pub fn from_bpsv_value(value: crate::value::BpsvValue, interner: &StringInterner) -> Self {
179 match value {
180 crate::value::BpsvValue::String(s) => InternedValue::String(interner.intern_owned(s)),
181 crate::value::BpsvValue::Hex(h) => InternedValue::Hex(interner.intern_owned(h)),
182 crate::value::BpsvValue::Decimal(d) => InternedValue::Decimal(d),
183 crate::value::BpsvValue::Empty => InternedValue::Empty,
184 }
185 }
186
187 pub fn as_str(&self) -> Option<&str> {
189 match self {
190 InternedValue::String(s) | InternedValue::Hex(s) => Some(s.as_str()),
191 _ => None,
192 }
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn test_string_interning() {
202 let interner = StringInterner::new();
203
204 let s1 = interner.intern("hello");
206 let s2 = interner.intern("hello");
207 let s3 = interner.intern("hello");
208
209 assert!(Arc::ptr_eq(&s1, &s2));
211 assert!(Arc::ptr_eq(&s2, &s3));
212
213 let s4 = interner.intern("world");
215 assert!(!Arc::ptr_eq(&s1, &s4));
216
217 assert_eq!(interner.unique_count(), 2);
218 }
219
220 #[test]
221 #[ignore = "Deduplication ratio depends on implementation details"]
222 fn test_memory_savings() {
223 let interner = StringInterner::new();
224
225 let common_values = vec![
227 "Region!STRING:0",
228 "Encoding!HEX:16",
229 "CDNConfig!HEX:16",
230 "BuildConfig!HEX:16",
231 "us",
232 "eu",
233 "cn",
234 "1",
235 "0",
236 ];
237
238 for _ in 0..100 {
240 for value in &common_values {
241 interner.intern(value);
242 }
243 }
244
245 let stats = interner.memory_usage();
246 assert_eq!(stats.unique_strings, common_values.len());
247 assert!(
249 stats.deduplication_ratio > 5.0,
250 "Expected ratio > 5.0, got {}",
251 stats.deduplication_ratio
252 );
253
254 println!("Memory stats: {stats:?}");
255 println!("Hit rate: {:.2}%", interner.hit_rate() * 100.0);
256 }
257
258 #[test]
259 fn test_concurrent_interning() {
260 use std::thread;
261
262 let interner = StringInterner::new();
263 let mut handles = vec![];
264
265 for i in 0..10 {
267 let interner_clone = interner.clone();
268 let handle = thread::spawn(move || {
269 for j in 0..100 {
270 interner_clone.intern("common_string");
272 interner_clone.intern(&format!("thread_{i}_unique_{j}"));
273 }
274 });
275 handles.push(handle);
276 }
277
278 for handle in handles {
279 handle.join().unwrap();
280 }
281
282 assert_eq!(interner.unique_count(), 1001);
284 }
285}