mecab_ko_dict/
string_pool.rs1use std::collections::HashMap;
26use std::sync::Arc;
27
28#[derive(Debug, Default)]
35pub struct StringPool {
36 strings: HashMap<Arc<str>, ()>,
38}
39
40impl StringPool {
41 #[must_use]
43 pub fn new() -> Self {
44 Self {
45 strings: HashMap::new(),
46 }
47 }
48
49 #[must_use]
51 pub fn with_capacity(capacity: usize) -> Self {
52 Self {
53 strings: HashMap::with_capacity(capacity),
54 }
55 }
56
57 pub fn intern(&mut self, s: &str) -> Arc<str> {
62 if let Some((existing, ())) = self.strings.get_key_value(s) {
64 return Arc::clone(existing);
65 }
66
67 let arc: Arc<str> = Arc::from(s);
69 self.strings.insert(Arc::clone(&arc), ());
70 arc
71 }
72
73 pub fn intern_string(&mut self, s: String) -> Arc<str> {
78 if let Some((existing, ())) = self.strings.get_key_value(s.as_str()) {
80 return Arc::clone(existing);
81 }
82
83 let arc: Arc<str> = Arc::from(s);
85 self.strings.insert(Arc::clone(&arc), ());
86 arc
87 }
88
89 #[must_use]
91 pub fn len(&self) -> usize {
92 self.strings.len()
93 }
94
95 #[must_use]
97 pub fn is_empty(&self) -> bool {
98 self.strings.is_empty()
99 }
100
101 pub fn clear(&mut self) {
103 self.strings.clear();
104 }
105
106 #[must_use]
110 pub fn memory_usage(&self) -> usize {
111 let mut total = std::mem::size_of::<Self>();
112
113 total += self.strings.capacity() * std::mem::size_of::<(Arc<str>, ())>();
115
116 for (s, ()) in &self.strings {
118 total += std::mem::size_of::<Arc<str>>() + s.len();
119 }
120
121 total
122 }
123
124 #[must_use]
126 pub fn stats(&self) -> StringPoolStats {
127 let mut total_string_bytes = 0;
128 let mut min_len = usize::MAX;
129 let mut max_len = 0;
130
131 for (s, ()) in &self.strings {
132 let len = s.len();
133 total_string_bytes += len;
134 min_len = min_len.min(len);
135 max_len = max_len.max(len);
136 }
137
138 if self.strings.is_empty() {
139 min_len = 0;
140 }
141
142 StringPoolStats {
143 count: self.strings.len(),
144 total_bytes: total_string_bytes,
145 avg_length: if self.strings.is_empty() {
146 0.0
147 } else {
148 total_string_bytes as f64 / self.strings.len() as f64
149 },
150 min_length: min_len,
151 max_length: max_len,
152 memory_usage: self.memory_usage(),
153 }
154 }
155}
156
157#[derive(Debug, Clone, Copy, PartialEq)]
159pub struct StringPoolStats {
160 pub count: usize,
162 pub total_bytes: usize,
164 pub avg_length: f64,
166 pub min_length: usize,
168 pub max_length: usize,
170 pub memory_usage: usize,
172}
173
174#[derive(Debug, Default)]
179pub struct ConcurrentStringPool {
180 inner: std::sync::RwLock<StringPool>,
181}
182
183impl ConcurrentStringPool {
184 #[must_use]
186 pub fn new() -> Self {
187 Self {
188 inner: std::sync::RwLock::new(StringPool::new()),
189 }
190 }
191
192 #[must_use]
194 pub fn with_capacity(capacity: usize) -> Self {
195 Self {
196 inner: std::sync::RwLock::new(StringPool::with_capacity(capacity)),
197 }
198 }
199
200 pub fn intern(&self, s: &str) -> Arc<str> {
202 {
204 let pool = self.inner.read().unwrap();
205 if let Some((existing, ())) = pool.strings.get_key_value(s) {
206 return Arc::clone(existing);
207 }
208 }
209
210 let mut pool = self.inner.write().unwrap();
212 pool.intern(s)
213 }
214
215 #[must_use]
217 pub fn len(&self) -> usize {
218 self.inner.read().unwrap().len()
219 }
220
221 #[must_use]
223 pub fn is_empty(&self) -> bool {
224 self.inner.read().unwrap().is_empty()
225 }
226
227 #[must_use]
229 pub fn stats(&self) -> StringPoolStats {
230 self.inner.read().unwrap().stats()
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn test_string_pool_basic() {
240 let mut pool = StringPool::new();
241
242 let s1 = pool.intern("hello");
243 let s2 = pool.intern("hello");
244 let s3 = pool.intern("world");
245
246 assert!(Arc::ptr_eq(&s1, &s2));
248
249 assert!(!Arc::ptr_eq(&s1, &s3));
251
252 assert_eq!(pool.len(), 2);
254 }
255
256 #[test]
257 fn test_string_pool_korean() {
258 let mut pool = StringPool::new();
259
260 let s1 = pool.intern("안녕하세요");
261 let s2 = pool.intern("안녕하세요");
262 let s3 = pool.intern("감사합니다");
263
264 assert!(Arc::ptr_eq(&s1, &s2));
265 assert!(!Arc::ptr_eq(&s1, &s3));
266 assert_eq!(pool.len(), 2);
267 }
268
269 #[test]
270 fn test_string_pool_stats() {
271 let mut pool = StringPool::new();
272
273 pool.intern("a");
274 pool.intern("bb");
275 pool.intern("ccc");
276
277 let stats = pool.stats();
278 assert_eq!(stats.count, 3);
279 assert_eq!(stats.total_bytes, 6); assert!((stats.avg_length - 2.0).abs() < f64::EPSILON);
281 assert_eq!(stats.min_length, 1);
282 assert_eq!(stats.max_length, 3);
283 }
284
285 #[test]
286 fn test_string_pool_intern_string() {
287 let mut pool = StringPool::new();
288
289 let owned = String::from("test");
290 let s1 = pool.intern_string(owned);
291 let s2 = pool.intern("test");
292
293 assert!(Arc::ptr_eq(&s1, &s2));
294 assert_eq!(pool.len(), 1);
295 }
296
297 #[test]
298 fn test_concurrent_string_pool() {
299 let pool = ConcurrentStringPool::new();
300
301 let s1 = pool.intern("test");
302 let s2 = pool.intern("test");
303
304 assert!(Arc::ptr_eq(&s1, &s2));
305 assert_eq!(pool.len(), 1);
306 }
307
308 #[test]
309 fn test_string_pool_memory_usage() {
310 let mut pool = StringPool::new();
311
312 let initial = pool.memory_usage();
313
314 pool.intern("a short string");
315 pool.intern("another string");
316
317 let after = pool.memory_usage();
318
319 assert!(after > initial);
321 }
322
323 #[test]
324 fn test_string_pool_clear() {
325 let mut pool = StringPool::new();
326
327 pool.intern("test1");
328 pool.intern("test2");
329 assert_eq!(pool.len(), 2);
330
331 pool.clear();
332 assert!(pool.is_empty());
333 }
334}