use parking_lot::Mutex;
use std::cell::RefCell;
use std::sync::Arc;
use string_interner::{DefaultBackend, DefaultSymbol, StringInterner as Interner};
use crate::lattice::Node;
use crate::tokenizer::Token;
pub type Symbol = DefaultSymbol;
#[derive(Debug, Clone)]
pub struct SharedStringInterner {
interner: Arc<Mutex<Interner<DefaultBackend>>>,
}
impl SharedStringInterner {
#[must_use]
pub fn new() -> Self {
Self {
interner: Arc::new(Mutex::new(Interner::new())),
}
}
#[must_use]
pub fn intern(&self, s: &str) -> Symbol {
self.interner.lock().get_or_intern(s)
}
#[must_use]
pub fn resolve(&self, symbol: Symbol) -> Option<String> {
self.interner
.lock()
.resolve(symbol)
.map(ToString::to_string)
}
#[must_use]
pub fn len(&self) -> usize {
self.interner.lock().len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.interner.lock().is_empty()
}
#[must_use]
pub fn memory_usage(&self) -> usize {
let interner = self.interner.lock();
interner.len() * 20
}
}
impl Default for SharedStringInterner {
fn default() -> Self {
Self::new()
}
}
pub struct TokenPool {
pool: RefCell<Vec<Token>>,
max_size: usize,
}
impl TokenPool {
#[must_use]
pub fn new() -> Self {
Self::with_capacity(128)
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
pool: RefCell::new(Vec::with_capacity(capacity)),
max_size: capacity * 2, }
}
pub fn acquire(&self) -> Token {
self.pool
.borrow_mut()
.pop()
.unwrap_or_else(|| Token::new(String::new(), String::new(), 0, 0, 0, 0))
}
pub fn release(&self, mut token: Token) {
let mut pool = self.pool.borrow_mut();
if pool.len() >= self.max_size {
return;
}
token.surface.clear();
token.pos.clear();
token.start_pos = 0;
token.end_pos = 0;
token.start_byte = 0;
token.end_byte = 0;
token.reading = None;
token.lemma = None;
token.cost = 0;
token.features.clear();
token.normalized = None;
pool.push(token);
}
pub fn size(&self) -> usize {
self.pool.borrow().len()
}
pub fn clear(&self) {
self.pool.borrow_mut().clear();
}
pub fn memory_usage(&self) -> usize {
self.pool.borrow().len() * std::mem::size_of::<Token>()
}
}
impl Default for TokenPool {
fn default() -> Self {
Self::new()
}
}
pub struct NodeVecPool {
pool: RefCell<Vec<Vec<Node>>>,
max_size: usize,
}
impl NodeVecPool {
#[must_use]
pub fn new() -> Self {
Self::with_capacity(32)
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
pool: RefCell::new(Vec::with_capacity(capacity)),
max_size: capacity * 2,
}
}
pub fn acquire(&self) -> Vec<Node> {
self.pool.borrow_mut().pop().unwrap_or_default()
}
pub fn release(&self, mut vec: Vec<Node>) {
let mut pool = self.pool.borrow_mut();
if pool.len() >= self.max_size {
return;
}
vec.clear();
pool.push(vec);
}
pub fn size(&self) -> usize {
self.pool.borrow().len()
}
pub fn clear(&self) {
self.pool.borrow_mut().clear();
}
pub fn memory_usage(&self) -> usize {
let pool = self.pool.borrow();
pool.iter()
.map(|v| v.capacity() * std::mem::size_of::<Node>())
.sum()
}
}
impl Default for NodeVecPool {
fn default() -> Self {
Self::new()
}
}
pub struct IdVecPool {
pool: RefCell<Vec<Vec<u32>>>,
max_size: usize,
}
impl IdVecPool {
#[must_use]
pub fn new() -> Self {
Self::with_capacity(64)
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
pool: RefCell::new(Vec::with_capacity(capacity)),
max_size: capacity * 2,
}
}
pub fn acquire(&self) -> Vec<u32> {
self.pool.borrow_mut().pop().unwrap_or_default()
}
pub fn release(&self, mut vec: Vec<u32>) {
let mut pool = self.pool.borrow_mut();
if pool.len() >= self.max_size {
return;
}
vec.clear();
pool.push(vec);
}
pub fn size(&self) -> usize {
self.pool.borrow().len()
}
pub fn clear(&self) {
self.pool.borrow_mut().clear();
}
}
impl Default for IdVecPool {
fn default() -> Self {
Self::new()
}
}
#[derive(Default)]
pub struct PoolManager {
pub token_pool: TokenPool,
pub node_vec_pool: NodeVecPool,
pub id_vec_pool: IdVecPool,
pub string_interner: SharedStringInterner,
}
impl PoolManager {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn stats(&self) -> PoolStats {
PoolStats {
token_pool_size: self.token_pool.size(),
node_vec_pool_size: self.node_vec_pool.size(),
id_vec_pool_size: self.id_vec_pool.size(),
interned_strings: self.string_interner.len(),
total_memory: self.total_memory_usage(),
}
}
pub fn clear_all(&self) {
self.token_pool.clear();
self.node_vec_pool.clear();
self.id_vec_pool.clear();
}
pub fn total_memory_usage(&self) -> usize {
self.token_pool.memory_usage()
+ self.node_vec_pool.memory_usage()
+ self.id_vec_pool.size() * std::mem::size_of::<Vec<u32>>()
+ self.string_interner.memory_usage()
}
}
#[derive(Debug, Clone, Copy)]
pub struct PoolStats {
pub token_pool_size: usize,
pub node_vec_pool_size: usize,
pub id_vec_pool_size: usize,
pub interned_strings: usize,
pub total_memory: usize,
}
impl PoolStats {
#[must_use]
pub fn format_human_readable(&self) -> String {
format!(
"Token Pool: {}, Node Vec Pool: {}, ID Vec Pool: {}, Interned Strings: {}, Memory: {} KB",
self.token_pool_size,
self.node_vec_pool_size,
self.id_vec_pool_size,
self.interned_strings,
self.total_memory / 1024
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_interner() {
let interner = SharedStringInterner::new();
let s1 = interner.intern("NNG");
let s2 = interner.intern("NNG");
let s3 = interner.intern("VV");
assert_eq!(s1, s2);
assert_ne!(s1, s3);
assert_eq!(interner.resolve(s1), Some("NNG".to_string()));
assert_eq!(interner.resolve(s3), Some("VV".to_string()));
assert_eq!(interner.len(), 2);
}
#[test]
fn test_token_pool() {
let pool = TokenPool::new();
let token1 = pool.acquire();
assert_eq!(pool.size(), 0);
pool.release(token1);
assert_eq!(pool.size(), 1);
let mut token2 = pool.acquire();
assert_eq!(pool.size(), 0);
token2.surface = "테스트".to_string();
pool.release(token2);
let token3 = pool.acquire();
assert!(token3.surface.is_empty());
}
#[test]
fn test_node_vec_pool() {
let pool = NodeVecPool::new();
let mut vec1 = pool.acquire();
assert_eq!(pool.size(), 0);
vec1.push(Node::bos());
vec1.push(Node::eos(1, 10, 30));
pool.release(vec1);
assert_eq!(pool.size(), 1);
let vec2 = pool.acquire();
assert_eq!(vec2.len(), 0);
}
#[test]
fn test_pool_manager() {
let manager = PoolManager::new();
let token = manager.token_pool.acquire();
manager.token_pool.release(token);
let vec = manager.node_vec_pool.acquire();
manager.node_vec_pool.release(vec);
let _s1 = manager.string_interner.intern("NNG");
let _s2 = manager.string_interner.intern("VV");
let stats = manager.stats();
assert_eq!(stats.token_pool_size, 1);
assert_eq!(stats.node_vec_pool_size, 1);
assert_eq!(stats.interned_strings, 2);
}
#[test]
fn test_pool_max_size() {
let pool = TokenPool::with_capacity(2);
for _ in 0..10 {
let token = pool.acquire();
pool.release(token);
}
assert!(pool.size() <= 4); }
#[test]
fn test_pool_clear() {
let pool = TokenPool::new();
let mut tokens = Vec::new();
for _ in 0..5 {
tokens.push(pool.acquire());
}
for token in tokens {
pool.release(token);
}
assert_eq!(pool.size(), 5);
pool.clear();
assert_eq!(pool.size(), 0);
}
#[test]
fn test_pool_manager_clear_all() {
let manager = PoolManager::new();
let token = manager.token_pool.acquire();
manager.token_pool.release(token);
let vec = manager.node_vec_pool.acquire();
manager.node_vec_pool.release(vec);
assert!(manager.token_pool.size() > 0);
assert!(manager.node_vec_pool.size() > 0);
manager.clear_all();
assert_eq!(manager.token_pool.size(), 0);
assert_eq!(manager.node_vec_pool.size(), 0);
}
}