use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct InternPoolStats {
pub len: usize,
pub total_bytes: usize,
}
pub struct InternPool {
pub(super) strings: Vec<String>,
pub(super) map: HashMap<String, u32>,
pub(super) static_ptrs: Vec<Option<&'static str>>,
}
impl InternPool {
pub fn new() -> Self {
Self {
strings: Vec::new(),
map: HashMap::new(),
static_ptrs: Vec::new(),
}
}
pub fn intern_str(&mut self, s: &str) -> u32 {
if let Some(&id) = self.map.get(s) {
return id;
}
let id = self.strings.len() as u32;
self.strings.push(s.to_string());
self.static_ptrs.push(None);
self.map.insert(s.to_string(), id);
id
}
pub fn resolve_str(&mut self, id: u32) -> Option<&'static str> {
let idx = id as usize;
if idx >= self.strings.len() {
return None;
}
if let Some(ptr) = self.static_ptrs[idx] {
return Some(ptr);
}
let leaked: &'static str = Box::leak(self.strings[idx].clone().into_boxed_str());
self.static_ptrs[idx] = Some(leaked);
Some(leaked)
}
pub fn len(&self) -> usize {
self.strings.len()
}
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
pub fn total_bytes(&self) -> usize {
self.strings.iter().map(|s| s.len()).sum()
}
pub fn stats(&self) -> InternPoolStats {
InternPoolStats {
len: self.len(),
total_bytes: self.total_bytes(),
}
}
}
impl Default for InternPool {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pool_intern_returns_same_index_for_same_string() {
let mut pool = InternPool::new();
let id1 = pool.intern_str("hello");
let id2 = pool.intern_str("hello");
assert_eq!(id1, id2);
}
#[test]
fn test_pool_intern_returns_different_indices_for_different_strings() {
let mut pool = InternPool::new();
let id1 = pool.intern_str("hello");
let id2 = pool.intern_str("world");
assert_ne!(id1, id2);
}
#[test]
fn test_pool_resolve_returns_correct_string() {
let mut pool = InternPool::new();
let id = pool.intern_str("kernel");
let s = pool.resolve_str(id);
assert_eq!(s, Some("kernel"));
}
#[test]
fn test_pool_resolve_out_of_range_returns_none() {
let mut pool = InternPool::new();
assert!(pool.resolve_str(99).is_none());
}
#[test]
fn test_pool_len_counts_unique_strings() {
let mut pool = InternPool::new();
pool.intern_str("a");
pool.intern_str("b");
pool.intern_str("a"); assert_eq!(pool.len(), 2);
}
#[test]
fn test_pool_total_bytes() {
let mut pool = InternPool::new();
pool.intern_str("abc"); pool.intern_str("de"); assert_eq!(pool.total_bytes(), 5);
}
#[test]
fn test_pool_stats_snapshot() {
let mut pool = InternPool::new();
pool.intern_str("hello");
pool.intern_str("world");
let stats = pool.stats();
assert_eq!(stats.len, 2);
assert_eq!(stats.total_bytes, 10);
}
#[test]
fn test_pool_is_empty_initially() {
let pool = InternPool::new();
assert!(pool.is_empty());
}
#[test]
fn test_pool_default_is_empty() {
let pool = InternPool::default();
assert!(pool.is_empty());
}
#[test]
fn test_pool_sequential_indices() {
let mut pool = InternPool::new();
let ids: Vec<u32> = ["x", "y", "z"].iter().map(|s| pool.intern_str(s)).collect();
assert_eq!(ids, vec![0, 1, 2]);
}
#[test]
fn test_pool_empty_string_internable() {
let mut pool = InternPool::new();
let id = pool.intern_str("");
assert_eq!(pool.resolve_str(id), Some(""));
}
#[test]
fn test_pool_unicode_string() {
let mut pool = InternPool::new();
let id = pool.intern_str("日本語");
assert_eq!(pool.resolve_str(id), Some("日本語"));
}
#[test]
fn test_pool_resolve_same_pointer_on_repeated_calls() {
let mut pool = InternPool::new();
let id = pool.intern_str("stable");
let ptr1 = pool.resolve_str(id).map(|s| s.as_ptr());
let ptr2 = pool.resolve_str(id).map(|s| s.as_ptr());
assert_eq!(
ptr1, ptr2,
"resolve must return the same static pointer each time"
);
}
}