1use parking_lot::Mutex;
44use std::cell::RefCell;
45use std::sync::Arc;
46use string_interner::{DefaultBackend, DefaultSymbol, StringInterner as Interner};
47
48use crate::lattice::Node;
49use crate::tokenizer::Token;
50
51pub type Symbol = DefaultSymbol;
53
54#[derive(Debug, Clone)]
59pub struct SharedStringInterner {
60 interner: Arc<Mutex<Interner<DefaultBackend>>>,
61}
62
63impl SharedStringInterner {
64 #[must_use]
66 pub fn new() -> Self {
67 Self {
68 interner: Arc::new(Mutex::new(Interner::new())),
69 }
70 }
71
72 #[must_use]
77 pub fn intern(&self, s: &str) -> Symbol {
78 self.interner.lock().get_or_intern(s)
79 }
80
81 #[must_use]
83 pub fn resolve(&self, symbol: Symbol) -> Option<String> {
84 self.interner
85 .lock()
86 .resolve(symbol)
87 .map(ToString::to_string)
88 }
89
90 #[must_use]
92 pub fn len(&self) -> usize {
93 self.interner.lock().len()
94 }
95
96 #[must_use]
98 pub fn is_empty(&self) -> bool {
99 self.interner.lock().is_empty()
100 }
101
102 #[must_use]
104 pub fn memory_usage(&self) -> usize {
105 let interner = self.interner.lock();
106 interner.len() * 20
108 }
109}
110
111impl Default for SharedStringInterner {
112 fn default() -> Self {
113 Self::new()
114 }
115}
116
117pub struct TokenPool {
121 pool: RefCell<Vec<Token>>,
122 max_size: usize,
123}
124
125impl TokenPool {
126 #[must_use]
128 pub fn new() -> Self {
129 Self::with_capacity(128)
130 }
131
132 #[must_use]
134 pub fn with_capacity(capacity: usize) -> Self {
135 Self {
136 pool: RefCell::new(Vec::with_capacity(capacity)),
137 max_size: capacity * 2, }
139 }
140
141 pub fn acquire(&self) -> Token {
146 self.pool
147 .borrow_mut()
148 .pop()
149 .unwrap_or_else(|| Token::new(String::new(), String::new(), 0, 0, 0, 0))
150 }
151
152 pub fn release(&self, mut token: Token) {
158 let mut pool = self.pool.borrow_mut();
159
160 if pool.len() >= self.max_size {
162 return;
163 }
164
165 token.surface.clear();
167 token.pos.clear();
168 token.start_pos = 0;
169 token.end_pos = 0;
170 token.start_byte = 0;
171 token.end_byte = 0;
172 token.reading = None;
173 token.lemma = None;
174 token.cost = 0;
175 token.features.clear();
176 token.normalized = None;
177
178 pool.push(token);
179 }
180
181 pub fn size(&self) -> usize {
183 self.pool.borrow().len()
184 }
185
186 pub fn clear(&self) {
188 self.pool.borrow_mut().clear();
189 }
190
191 pub fn memory_usage(&self) -> usize {
193 self.pool.borrow().len() * std::mem::size_of::<Token>()
195 }
196}
197
198impl Default for TokenPool {
199 fn default() -> Self {
200 Self::new()
201 }
202}
203
204pub struct NodeVecPool {
208 pool: RefCell<Vec<Vec<Node>>>,
209 max_size: usize,
210}
211
212impl NodeVecPool {
213 #[must_use]
215 pub fn new() -> Self {
216 Self::with_capacity(32)
217 }
218
219 #[must_use]
221 pub fn with_capacity(capacity: usize) -> Self {
222 Self {
223 pool: RefCell::new(Vec::with_capacity(capacity)),
224 max_size: capacity * 2,
225 }
226 }
227
228 pub fn acquire(&self) -> Vec<Node> {
230 self.pool.borrow_mut().pop().unwrap_or_default()
231 }
232
233 pub fn release(&self, mut vec: Vec<Node>) {
235 let mut pool = self.pool.borrow_mut();
236
237 if pool.len() >= self.max_size {
238 return;
239 }
240
241 vec.clear();
243
244 pool.push(vec);
245 }
246
247 pub fn size(&self) -> usize {
249 self.pool.borrow().len()
250 }
251
252 pub fn clear(&self) {
254 self.pool.borrow_mut().clear();
255 }
256
257 pub fn memory_usage(&self) -> usize {
259 let pool = self.pool.borrow();
260 pool.iter()
261 .map(|v| v.capacity() * std::mem::size_of::<Node>())
262 .sum()
263 }
264}
265
266impl Default for NodeVecPool {
267 fn default() -> Self {
268 Self::new()
269 }
270}
271
272pub struct IdVecPool {
276 pool: RefCell<Vec<Vec<u32>>>,
277 max_size: usize,
278}
279
280impl IdVecPool {
281 #[must_use]
283 pub fn new() -> Self {
284 Self::with_capacity(64)
285 }
286
287 #[must_use]
289 pub fn with_capacity(capacity: usize) -> Self {
290 Self {
291 pool: RefCell::new(Vec::with_capacity(capacity)),
292 max_size: capacity * 2,
293 }
294 }
295
296 pub fn acquire(&self) -> Vec<u32> {
298 self.pool.borrow_mut().pop().unwrap_or_default()
299 }
300
301 pub fn release(&self, mut vec: Vec<u32>) {
303 let mut pool = self.pool.borrow_mut();
304
305 if pool.len() >= self.max_size {
306 return;
307 }
308
309 vec.clear();
310 pool.push(vec);
311 }
312
313 pub fn size(&self) -> usize {
315 self.pool.borrow().len()
316 }
317
318 pub fn clear(&self) {
320 self.pool.borrow_mut().clear();
321 }
322}
323
324impl Default for IdVecPool {
325 fn default() -> Self {
326 Self::new()
327 }
328}
329
330#[derive(Default)]
334pub struct PoolManager {
335 pub token_pool: TokenPool,
337 pub node_vec_pool: NodeVecPool,
339 pub id_vec_pool: IdVecPool,
341 pub string_interner: SharedStringInterner,
343}
344
345impl PoolManager {
346 #[must_use]
348 pub fn new() -> Self {
349 Self::default()
350 }
351
352 pub fn stats(&self) -> PoolStats {
354 PoolStats {
355 token_pool_size: self.token_pool.size(),
356 node_vec_pool_size: self.node_vec_pool.size(),
357 id_vec_pool_size: self.id_vec_pool.size(),
358 interned_strings: self.string_interner.len(),
359 total_memory: self.total_memory_usage(),
360 }
361 }
362
363 pub fn clear_all(&self) {
365 self.token_pool.clear();
366 self.node_vec_pool.clear();
367 self.id_vec_pool.clear();
368 }
369
370 pub fn total_memory_usage(&self) -> usize {
372 self.token_pool.memory_usage()
373 + self.node_vec_pool.memory_usage()
374 + self.id_vec_pool.size() * std::mem::size_of::<Vec<u32>>()
375 + self.string_interner.memory_usage()
376 }
377}
378
379#[derive(Debug, Clone, Copy)]
381pub struct PoolStats {
382 pub token_pool_size: usize,
384 pub node_vec_pool_size: usize,
386 pub id_vec_pool_size: usize,
388 pub interned_strings: usize,
390 pub total_memory: usize,
392}
393
394impl PoolStats {
395 #[must_use]
397 pub fn format_human_readable(&self) -> String {
398 format!(
399 "Token Pool: {}, Node Vec Pool: {}, ID Vec Pool: {}, Interned Strings: {}, Memory: {} KB",
400 self.token_pool_size,
401 self.node_vec_pool_size,
402 self.id_vec_pool_size,
403 self.interned_strings,
404 self.total_memory / 1024
405 )
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412
413 #[test]
414 fn test_string_interner() {
415 let interner = SharedStringInterner::new();
416
417 let s1 = interner.intern("NNG");
418 let s2 = interner.intern("NNG");
419 let s3 = interner.intern("VV");
420
421 assert_eq!(s1, s2);
423 assert_ne!(s1, s3);
424
425 assert_eq!(interner.resolve(s1), Some("NNG".to_string()));
427 assert_eq!(interner.resolve(s3), Some("VV".to_string()));
428
429 assert_eq!(interner.len(), 2);
431 }
432
433 #[test]
434 fn test_token_pool() {
435 let pool = TokenPool::new();
436
437 let token1 = pool.acquire();
439 assert_eq!(pool.size(), 0);
440
441 pool.release(token1);
443 assert_eq!(pool.size(), 1);
444
445 let mut token2 = pool.acquire();
447 assert_eq!(pool.size(), 0);
448
449 token2.surface = "테스트".to_string();
450 pool.release(token2);
451
452 let token3 = pool.acquire();
454 assert!(token3.surface.is_empty());
455 }
456
457 #[test]
458 fn test_node_vec_pool() {
459 let pool = NodeVecPool::new();
460
461 let mut vec1 = pool.acquire();
462 assert_eq!(pool.size(), 0);
463
464 vec1.push(Node::bos());
466 vec1.push(Node::eos(1, 10, 30));
467
468 pool.release(vec1);
470 assert_eq!(pool.size(), 1);
471
472 let vec2 = pool.acquire();
474 assert_eq!(vec2.len(), 0);
475 }
476
477 #[test]
478 fn test_pool_manager() {
479 let manager = PoolManager::new();
480
481 let token = manager.token_pool.acquire();
483 manager.token_pool.release(token);
484
485 let vec = manager.node_vec_pool.acquire();
487 manager.node_vec_pool.release(vec);
488
489 let _s1 = manager.string_interner.intern("NNG");
491 let _s2 = manager.string_interner.intern("VV");
492
493 let stats = manager.stats();
495 assert_eq!(stats.token_pool_size, 1);
496 assert_eq!(stats.node_vec_pool_size, 1);
497 assert_eq!(stats.interned_strings, 2);
498 }
499
500 #[test]
501 fn test_pool_max_size() {
502 let pool = TokenPool::with_capacity(2);
503
504 for _ in 0..10 {
506 let token = pool.acquire();
507 pool.release(token);
508 }
509
510 assert!(pool.size() <= 4); }
513
514 #[test]
515 fn test_pool_clear() {
516 let pool = TokenPool::new();
517
518 let mut tokens = Vec::new();
520 for _ in 0..5 {
521 tokens.push(pool.acquire());
522 }
523
524 for token in tokens {
526 pool.release(token);
527 }
528
529 assert_eq!(pool.size(), 5);
530
531 pool.clear();
532 assert_eq!(pool.size(), 0);
533 }
534
535 #[test]
536 fn test_pool_manager_clear_all() {
537 let manager = PoolManager::new();
538
539 let token = manager.token_pool.acquire();
540 manager.token_pool.release(token);
541
542 let vec = manager.node_vec_pool.acquire();
543 manager.node_vec_pool.release(vec);
544
545 assert!(manager.token_pool.size() > 0);
546 assert!(manager.node_vec_pool.size() > 0);
547
548 manager.clear_all();
549
550 assert_eq!(manager.token_pool.size(), 0);
551 assert_eq!(manager.node_vec_pool.size(), 0);
552 }
553}