use crate::stdlib::text::Text;
use std::sync::{Arc, RwLock, Mutex};
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
pub struct StringInterningPool {
pool: RwLock<HashMap<u64, Arc<String>>>,
stats: Mutex<InterningStats>,
}
#[derive(Debug, Default)]
pub struct InterningStats {
pub total_requests: usize,
pub cache_hits: usize,
pub cache_misses: usize,
pub memory_saved: usize,
}
static GLOBAL_INTERNING_POOL: std::sync::OnceLock<StringInterningPool> = std::sync::OnceLock::new();
impl StringInterningPool {
pub fn new() -> Self {
Self {
pool: RwLock::new(HashMap::new()),
stats: Mutex::new(InterningStats::default()),
}
}
pub fn global() -> &'static StringInterningPool {
GLOBAL_INTERNING_POOL.get_or_init(StringInterningPool::new)
}
pub fn intern(&self, s: String) -> Arc<String> {
let hash = self.hash_string(&s);
{
let pool = self.pool.read().unwrap();
if let Some(interned) = pool.get(&hash) {
let mut stats = self.stats.lock().unwrap();
stats.total_requests += 1;
stats.cache_hits += 1;
stats.memory_saved += s.len();
return interned.clone();
}
}
{
let mut pool = self.pool.write().unwrap();
if let Some(interned) = pool.get(&hash) {
let mut stats = self.stats.lock().unwrap();
stats.total_requests += 1;
stats.cache_hits += 1;
stats.memory_saved += s.len();
return interned.clone();
}
let interned = Arc::new(s);
pool.insert(hash, interned.clone());
let mut stats = self.stats.lock().unwrap();
stats.total_requests += 1;
stats.cache_misses += 1;
interned
}
}
pub fn intern_str(&self, s: &str) -> Arc<String> {
self.intern(s.to_string())
}
pub fn stats(&self) -> InterningStats {
self.stats.lock().unwrap().clone()
}
pub fn clear(&self) {
let mut pool = self.pool.write().unwrap();
pool.clear();
let mut stats = self.stats.lock().unwrap();
*stats = InterningStats::default();
}
fn hash_string(&self, s: &str) -> u64 {
let mut hasher = DefaultHasher::new();
s.hash(&mut hasher);
hasher.finish()
}
}
impl Default for StringInterningPool {
fn default() -> Self {
Self::new()
}
}
impl Clone for InterningStats {
fn clone(&self) -> Self {
Self {
total_requests: self.total_requests,
cache_hits: self.cache_hits,
cache_misses: self.cache_misses,
memory_saved: self.memory_saved,
}
}
}
pub struct TextMemoryPool {
small_pool: Mutex<Vec<Vec<u8>>>,
medium_pool: Mutex<Vec<Vec<u8>>>,
large_pool: Mutex<Vec<Vec<u8>>>,
stats: Mutex<PoolStats>,
}
#[derive(Debug, Default)]
pub struct PoolStats {
pub pool_allocations: usize,
pub new_allocations: usize,
pub returns_to_pool: usize,
pub memory_reused: usize,
}
static GLOBAL_MEMORY_POOL: std::sync::OnceLock<TextMemoryPool> = std::sync::OnceLock::new();
impl TextMemoryPool {
pub fn new() -> Self {
Self {
small_pool: Mutex::new(Vec::new()),
medium_pool: Mutex::new(Vec::new()),
large_pool: Mutex::new(Vec::new()),
stats: Mutex::new(PoolStats::default()),
}
}
pub fn global() -> &'static TextMemoryPool {
GLOBAL_MEMORY_POOL.get_or_init(TextMemoryPool::new)
}
pub fn allocate(&self, capacity: usize) -> Vec<u8> {
let pool = match capacity {
0..=64 => &self.small_pool,
65..=1024 => &self.medium_pool,
_ => &self.large_pool,
};
let mut pool_guard = pool.lock().unwrap();
if let Some(mut buf) = pool_guard.pop() {
if buf.capacity() >= capacity {
buf.clear();
buf.reserve(capacity);
let mut stats = self.stats.lock().unwrap();
stats.pool_allocations += 1;
stats.memory_reused += buf.capacity();
return buf;
} else {
pool_guard.push(buf);
}
}
let mut stats = self.stats.lock().unwrap();
stats.new_allocations += 1;
Vec::with_capacity(capacity)
}
pub fn deallocate(&self, mut buf: Vec<u8>) {
let capacity = buf.capacity();
if capacity == 0 {
return;
}
let pool = match capacity {
0..=64 => &self.small_pool,
65..=1024 => &self.medium_pool,
_ => &self.large_pool,
};
buf.clear();
let mut pool_guard = pool.lock().unwrap();
if pool_guard.len() < 100 {
pool_guard.push(buf);
let mut stats = self.stats.lock().unwrap();
stats.returns_to_pool += 1;
}
}
pub fn stats(&self) -> PoolStats {
self.stats.lock().unwrap().clone()
}
pub fn clear(&self) {
self.small_pool.lock().unwrap().clear();
self.medium_pool.lock().unwrap().clear();
self.large_pool.lock().unwrap().clear();
let mut stats = self.stats.lock().unwrap();
*stats = PoolStats::default();
}
}
impl Default for TextMemoryPool {
fn default() -> Self {
Self::new()
}
}
impl Clone for PoolStats {
fn clone(&self) -> Self {
Self {
pool_allocations: self.pool_allocations,
new_allocations: self.new_allocations,
returns_to_pool: self.returns_to_pool,
memory_reused: self.memory_reused,
}
}
}
pub struct SimdTextOps;
impl SimdTextOps {
pub fn count_char(text: &Text, ch: char) -> usize {
let s = text.to_string();
if ch.is_ascii() {
Self::count_ascii_char(s.as_bytes(), ch as u8)
} else {
s.chars().filter(|&c| c == ch).count()
}
}
fn count_ascii_char(bytes: &[u8], target: u8) -> usize {
bytes.iter().filter(|&&b| b == target).count()
}
pub fn find_substring(haystack: &Text, needle: &Text) -> Option<usize> {
let haystack_str = haystack.to_string();
let needle_str = needle.to_string();
if needle_str.len() <= 4 {
haystack_str.find(&needle_str).map(|byte_pos| {
haystack_str[..byte_pos].chars().count()
})
} else {
haystack_str.find(&needle_str).map(|byte_pos| {
haystack_str[..byte_pos].chars().count()
})
}
}
pub fn to_ascii_uppercase(text: &Text) -> Text {
let s = text.to_string();
if s.is_ascii() {
Text::from_string(s.to_ascii_uppercase())
} else {
text.to_uppercase()
}
}
pub fn to_ascii_lowercase(text: &Text) -> Text {
let s = text.to_string();
if s.is_ascii() {
Text::from_string(s.to_ascii_lowercase())
} else {
text.to_lowercase()
}
}
}
pub struct TextPerformanceMonitor {
counters: RwLock<HashMap<String, u64>>,
timings: RwLock<HashMap<String, Vec<u64>>>,
}
impl TextPerformanceMonitor {
pub fn new() -> Self {
Self {
counters: RwLock::new(HashMap::new()),
timings: RwLock::new(HashMap::new()),
}
}
pub fn increment_counter(&self, name: &str) {
let mut counters = self.counters.write().unwrap();
*counters.entry(name.to_string()).or_insert(0) += 1;
}
pub fn record_timing(&self, name: &str, duration_nanos: u64) {
let mut timings = self.timings.write().unwrap();
timings.entry(name.to_string()).or_default().push(duration_nanos);
}
pub fn get_counter(&self, name: &str) -> u64 {
let counters = self.counters.read().unwrap();
counters.get(name).copied().unwrap_or(0)
}
pub fn get_average_timing(&self, name: &str) -> Option<f64> {
let timings = self.timings.read().unwrap();
if let Some(times) = timings.get(name) {
if !times.is_empty() {
let sum: u64 = times.iter().sum();
Some(sum as f64 / times.len() as f64)
} else {
None
}
} else {
None
}
}
pub fn counter_names(&self) -> Vec<String> {
let counters = self.counters.read().unwrap();
counters.keys().cloned().collect()
}
pub fn timing_names(&self) -> Vec<String> {
let timings = self.timings.read().unwrap();
timings.keys().cloned().collect()
}
pub fn clear(&self) {
self.counters.write().unwrap().clear();
self.timings.write().unwrap().clear();
}
}
impl Default for TextPerformanceMonitor {
fn default() -> Self {
Self::new()
}
}
pub struct TextCache<T> {
cache: RwLock<lru::LruCache<String, T>>,
stats: Mutex<CacheStats>,
}
#[derive(Debug, Default)]
pub struct CacheStats {
pub hits: usize,
pub misses: usize,
pub evictions: usize,
}
impl<T: Clone> TextCache<T> {
pub fn new(capacity: usize) -> Self {
Self {
cache: RwLock::new(lru::LruCache::new(capacity.try_into().unwrap())),
stats: Mutex::new(CacheStats::default()),
}
}
pub fn get(&self, key: &str) -> Option<T> {
let mut cache = self.cache.write().unwrap();
let result = cache.get(key).cloned();
let mut stats = self.stats.lock().unwrap();
if result.is_some() {
stats.hits += 1;
} else {
stats.misses += 1;
}
result
}
pub fn put(&self, key: String, value: T) {
let mut cache = self.cache.write().unwrap();
if cache.len() >= cache.cap().get() {
let mut stats = self.stats.lock().unwrap();
stats.evictions += 1;
}
cache.put(key, value);
}
pub fn stats(&self) -> CacheStats {
self.stats.lock().unwrap().clone()
}
pub fn clear(&self) {
self.cache.write().unwrap().clear();
let mut stats = self.stats.lock().unwrap();
*stats = CacheStats::default();
}
}
impl Clone for CacheStats {
fn clone(&self) -> Self {
Self {
hits: self.hits,
misses: self.misses,
evictions: self.evictions,
}
}
}
pub struct OptimizedTextBuilder {
buffer: Vec<u8>,
char_length: usize,
}
impl OptimizedTextBuilder {
pub fn new() -> Self {
Self::with_capacity(0)
}
pub fn with_capacity(capacity: usize) -> Self {
let buffer = TextMemoryPool::global().allocate(capacity);
Self {
buffer,
char_length: 0,
}
}
pub fn push_str(&mut self, s: &str) {
self.buffer.extend_from_slice(s.as_bytes());
self.char_length += s.chars().count();
}
pub fn push_char(&mut self, ch: char) {
let mut buf = [0; 4];
let s = ch.encode_utf8(&mut buf);
self.push_str(s);
}
pub fn push_text(&mut self, text: &Text) {
self.push_str(&text.to_string());
}
pub fn build(mut self) -> Text {
let s = String::from_utf8(self.buffer.clone())
.unwrap_or_else(|_| String::new());
TextMemoryPool::global().deallocate(std::mem::take(&mut self.buffer));
if s.len() <= 1024 {
let interned = StringInterningPool::global().intern(s);
Text::from_string((*interned).clone())
} else {
Text::from_string(s)
}
}
pub fn len(&self) -> usize {
self.buffer.len()
}
pub fn char_len(&self) -> usize {
self.char_length
}
pub fn is_empty(&self) -> bool {
self.buffer.is_empty()
}
pub fn clear(&mut self) {
self.buffer.clear();
self.char_length = 0;
}
}
impl Default for OptimizedTextBuilder {
fn default() -> Self {
Self::new()
}
}
impl Drop for OptimizedTextBuilder {
fn drop(&mut self) {
if !self.buffer.is_empty() {
TextMemoryPool::global().deallocate(std::mem::take(&mut self.buffer));
}
}
}
pub fn get_performance_stats() -> HashMap<String, serde_json::Value> {
let mut stats = HashMap::new();
let interning_stats = StringInterningPool::global().stats();
stats.insert("interning".to_string(), serde_json::json!({
"total_requests": interning_stats.total_requests,
"cache_hits": interning_stats.cache_hits,
"cache_misses": interning_stats.cache_misses,
"memory_saved": interning_stats.memory_saved,
"hit_rate": if interning_stats.total_requests > 0 {
interning_stats.cache_hits as f64 / interning_stats.total_requests as f64
} else {
0.0
}
}));
let pool_stats = TextMemoryPool::global().stats();
stats.insert("memory_pool".to_string(), serde_json::json!({
"pool_allocations": pool_stats.pool_allocations,
"new_allocations": pool_stats.new_allocations,
"returns_to_pool": pool_stats.returns_to_pool,
"memory_reused": pool_stats.memory_reused,
"pool_efficiency": if pool_stats.pool_allocations + pool_stats.new_allocations > 0 {
pool_stats.pool_allocations as f64 / (pool_stats.pool_allocations + pool_stats.new_allocations) as f64
} else {
0.0
}
}));
stats
}
pub fn clear_performance_caches() {
StringInterningPool::global().clear();
TextMemoryPool::global().clear();
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_interning() {
let pool = StringInterningPool::new();
let s1 = pool.intern("hello".to_string());
let s2 = pool.intern("hello".to_string());
assert!(Arc::ptr_eq(&s1, &s2));
let stats = pool.stats();
assert_eq!(stats.total_requests, 2);
assert_eq!(stats.cache_hits, 1);
assert_eq!(stats.cache_misses, 1);
}
#[test]
fn test_memory_pool() {
let pool = TextMemoryPool::new();
let buf1 = pool.allocate(100);
assert!(buf1.capacity() >= 100);
pool.deallocate(buf1);
let buf2 = pool.allocate(100);
assert!(buf2.capacity() >= 100);
let stats = pool.stats();
assert!(stats.pool_allocations > 0 || stats.new_allocations > 0);
}
#[test]
fn test_simd_operations() {
let text = Text::from_string_slice("hello world hello");
let count = SimdTextOps::count_char(&text, 'l');
assert_eq!(count, 5);
let needle = Text::from_string_slice("world");
let pos = SimdTextOps::find_substring(&text, &needle);
assert_eq!(pos, Some(6));
}
#[test]
fn test_optimized_text_builder() {
let mut builder = OptimizedTextBuilder::new();
builder.push_str("hello");
builder.push_char(' ');
builder.push_str("world");
let text = builder.build();
assert_eq!(text.to_string(), "hello world");
}
#[test]
fn test_text_cache() {
let cache: TextCache<String> = TextCache::new(2);
cache.put("key1".to_string(), "value1".to_string());
cache.put("key2".to_string(), "value2".to_string());
assert_eq!(cache.get("key1"), Some("value1".to_string()));
assert_eq!(cache.get("key3"), None);
let stats = cache.stats();
assert_eq!(stats.hits, 1);
assert_eq!(stats.misses, 1);
}
#[test]
fn test_performance_monitor() {
let monitor = TextPerformanceMonitor::new();
monitor.increment_counter("test_op");
monitor.increment_counter("test_op");
monitor.record_timing("test_op", 1000);
monitor.record_timing("test_op", 2000);
assert_eq!(monitor.get_counter("test_op"), 2);
assert_eq!(monitor.get_average_timing("test_op"), Some(1500.0));
}
}