use std::collections::HashSet;
use std::iter;
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct StringPool {
pool: HashSet<Arc<str>>,
}
impl StringPool {
#[must_use]
pub fn new() -> Self {
Self::with_capacity(16)
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
pool: HashSet::with_capacity(capacity),
}
}
#[must_use]
pub fn intern_str(self, s: &str) -> (Self, Arc<str>) {
if let Some(existing) = self.pool.get(s).map(Arc::clone) {
return (self, existing);
}
let interned: Arc<str> = Arc::from(s);
let pool = self
.pool
.into_iter()
.chain(iter::once(Arc::clone(&interned)))
.collect();
(Self { pool }, interned)
}
#[must_use]
pub fn intern_string(self, s: String) -> (Self, Arc<str>) {
if let Some(existing) = self.pool.get(s.as_str()).map(Arc::clone) {
return (self, existing);
}
let interned: Arc<str> = Arc::from(s);
let pool = self
.pool
.into_iter()
.chain(iter::once(Arc::clone(&interned)))
.collect();
(Self { pool }, interned)
}
pub fn intern(self, s: impl Into<String>) -> (Self, Arc<str>) {
self.intern_string(s.into())
}
#[must_use]
pub fn len(&self) -> usize {
self.pool.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.pool.is_empty()
}
#[must_use]
pub fn clear(self) -> Self {
Self::with_capacity(16)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_pool_new() {
let pool = StringPool::new();
assert_eq!(pool.len(), 0);
assert!(pool.is_empty());
}
#[test]
fn test_string_pool_with_capacity() {
let pool = StringPool::with_capacity(32);
assert_eq!(pool.len(), 0);
assert!(pool.is_empty());
}
#[test]
fn test_identical_strings_return_same_arc() {
let (pool, s1) = StringPool::new().intern_str("Development");
let (pool, s2) = pool.intern_str("Development");
assert!(Arc::ptr_eq(&s1, &s2));
assert_eq!(*s1, *s2);
assert_eq!(pool.len(), 1);
}
#[test]
fn test_different_strings_return_different_arc() {
let (pool, s1) = StringPool::new().intern_str("Development");
let (pool, s2) = pool.intern_str("Review");
assert!(!Arc::ptr_eq(&s1, &s2));
assert_ne!(*s1, *s2);
assert_eq!(pool.len(), 2);
}
#[test]
fn test_pool_size_does_not_grow_for_repeated_strings() {
let pool = (0..100).fold(StringPool::new(), |pool, _| {
pool.intern_str("Development").0
});
assert_eq!(pool.len(), 1);
}
#[test]
fn test_intern_different_string_types() {
let (pool, s1) = StringPool::new().intern_str("test");
let (pool, s2) = pool.intern("test".to_string());
let (pool, s3) = pool.intern(String::from("test"));
assert!(Arc::ptr_eq(&s1, &s2));
assert!(Arc::ptr_eq(&s2, &s3));
assert_eq!(pool.len(), 1);
}
#[test]
fn test_intern_str_and_intern_string_share_entries() {
let (pool, s1) = StringPool::new().intern_str("test");
let (pool, s2) = pool.intern("test".to_string());
let (pool, s3) = pool.intern(String::from("test"));
assert!(Arc::ptr_eq(&s1, &s2));
assert!(Arc::ptr_eq(&s2, &s3));
assert_eq!(pool.len(), 1);
}
#[test]
fn test_clear() {
let pool = StringPool::new()
.intern_str("Development")
.0
.intern_str("Review")
.0;
assert_eq!(pool.len(), 2);
let pool = pool.clear();
assert_eq!(pool.len(), 0);
assert!(pool.is_empty());
}
#[test]
fn test_arc_content_matches_input() {
let arc = StringPool::new().intern_str("Development").1;
assert_eq!(&*arc, "Development");
}
#[test]
fn test_memory_efficiency_multiple_calls() {
let pool = (0..1000).fold(StringPool::new(), |pool, _| {
pool.intern_str("Development").0
});
assert_eq!(pool.len(), 1);
let arcs: Vec<_> = (0..1000)
.map(|_| pool.clone().intern_str("Development").1)
.collect();
assert!((1..arcs.len()).all(|i| Arc::ptr_eq(&arcs[0], &arcs[i])));
}
#[test]
fn test_empty_string() {
let (pool, s1) = StringPool::new().intern_str("");
let (pool, s2) = pool.intern_str("");
assert!(Arc::ptr_eq(&s1, &s2));
assert_eq!(&*s1, "");
assert_eq!(pool.len(), 1);
}
#[test]
fn test_clone_pool() {
let pool = StringPool::new()
.intern_str("Development")
.0
.intern_str("Review")
.0;
let cloned = pool.clone();
assert_eq!(pool.len(), cloned.len());
}
}