use std::collections::HashMap;
use std::sync::Arc;
#[derive(Debug, Default)]
pub struct StringPool {
strings: HashMap<Arc<str>, ()>,
}
impl StringPool {
#[must_use]
pub fn new() -> Self {
Self {
strings: HashMap::new(),
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
strings: HashMap::with_capacity(capacity),
}
}
pub fn intern(&mut self, s: &str) -> Arc<str> {
if let Some((existing, ())) = self.strings.get_key_value(s) {
return Arc::clone(existing);
}
let arc: Arc<str> = Arc::from(s);
self.strings.insert(Arc::clone(&arc), ());
arc
}
pub fn intern_string(&mut self, s: String) -> Arc<str> {
if let Some((existing, ())) = self.strings.get_key_value(s.as_str()) {
return Arc::clone(existing);
}
let arc: Arc<str> = Arc::from(s);
self.strings.insert(Arc::clone(&arc), ());
arc
}
#[must_use]
pub fn len(&self) -> usize {
self.strings.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
pub fn clear(&mut self) {
self.strings.clear();
}
#[must_use]
pub fn memory_usage(&self) -> usize {
let mut total = std::mem::size_of::<Self>();
total += self.strings.capacity() * std::mem::size_of::<(Arc<str>, ())>();
for (s, ()) in &self.strings {
total += std::mem::size_of::<Arc<str>>() + s.len();
}
total
}
#[must_use]
pub fn stats(&self) -> StringPoolStats {
let mut total_string_bytes = 0;
let mut min_len = usize::MAX;
let mut max_len = 0;
for (s, ()) in &self.strings {
let len = s.len();
total_string_bytes += len;
min_len = min_len.min(len);
max_len = max_len.max(len);
}
if self.strings.is_empty() {
min_len = 0;
}
StringPoolStats {
count: self.strings.len(),
total_bytes: total_string_bytes,
avg_length: if self.strings.is_empty() {
0.0
} else {
total_string_bytes as f64 / self.strings.len() as f64
},
min_length: min_len,
max_length: max_len,
memory_usage: self.memory_usage(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct StringPoolStats {
pub count: usize,
pub total_bytes: usize,
pub avg_length: f64,
pub min_length: usize,
pub max_length: usize,
pub memory_usage: usize,
}
#[derive(Debug, Default)]
pub struct ConcurrentStringPool {
inner: std::sync::RwLock<StringPool>,
}
impl ConcurrentStringPool {
#[must_use]
pub fn new() -> Self {
Self {
inner: std::sync::RwLock::new(StringPool::new()),
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
inner: std::sync::RwLock::new(StringPool::with_capacity(capacity)),
}
}
pub fn intern(&self, s: &str) -> Arc<str> {
{
let pool = self.inner.read().unwrap();
if let Some((existing, ())) = pool.strings.get_key_value(s) {
return Arc::clone(existing);
}
}
let mut pool = self.inner.write().unwrap();
pool.intern(s)
}
#[must_use]
pub fn len(&self) -> usize {
self.inner.read().unwrap().len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.read().unwrap().is_empty()
}
#[must_use]
pub fn stats(&self) -> StringPoolStats {
self.inner.read().unwrap().stats()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_pool_basic() {
let mut pool = StringPool::new();
let s1 = pool.intern("hello");
let s2 = pool.intern("hello");
let s3 = pool.intern("world");
assert!(Arc::ptr_eq(&s1, &s2));
assert!(!Arc::ptr_eq(&s1, &s3));
assert_eq!(pool.len(), 2);
}
#[test]
fn test_string_pool_korean() {
let mut pool = StringPool::new();
let s1 = pool.intern("안녕하세요");
let s2 = pool.intern("안녕하세요");
let s3 = pool.intern("감사합니다");
assert!(Arc::ptr_eq(&s1, &s2));
assert!(!Arc::ptr_eq(&s1, &s3));
assert_eq!(pool.len(), 2);
}
#[test]
fn test_string_pool_stats() {
let mut pool = StringPool::new();
pool.intern("a");
pool.intern("bb");
pool.intern("ccc");
let stats = pool.stats();
assert_eq!(stats.count, 3);
assert_eq!(stats.total_bytes, 6); assert!((stats.avg_length - 2.0).abs() < f64::EPSILON);
assert_eq!(stats.min_length, 1);
assert_eq!(stats.max_length, 3);
}
#[test]
fn test_string_pool_intern_string() {
let mut pool = StringPool::new();
let owned = String::from("test");
let s1 = pool.intern_string(owned);
let s2 = pool.intern("test");
assert!(Arc::ptr_eq(&s1, &s2));
assert_eq!(pool.len(), 1);
}
#[test]
fn test_concurrent_string_pool() {
let pool = ConcurrentStringPool::new();
let s1 = pool.intern("test");
let s2 = pool.intern("test");
assert!(Arc::ptr_eq(&s1, &s2));
assert_eq!(pool.len(), 1);
}
#[test]
fn test_string_pool_memory_usage() {
let mut pool = StringPool::new();
let initial = pool.memory_usage();
pool.intern("a short string");
pool.intern("another string");
let after = pool.memory_usage();
assert!(after > initial);
}
#[test]
fn test_string_pool_clear() {
let mut pool = StringPool::new();
pool.intern("test1");
pool.intern("test2");
assert_eq!(pool.len(), 2);
pool.clear();
assert!(pool.is_empty());
}
}