Skip to main content

tensorlogic_adapters/
performance.rs

1//! Performance optimizations for symbol tables.
2//!
3//! This module provides optimizations for:
4//! - String interning to reduce memory usage
5//! - Lookup caching for frequently accessed data
6//! - Efficient data structure selection
7
8use std::collections::HashMap;
9use std::sync::{Arc, RwLock};
10
11/// String interner for reducing memory usage of repeated strings.
12///
13/// Domain and predicate names are often repeated throughout the symbol table.
14/// This interner ensures each unique string is stored only once in memory.
15///
16/// # Example
17///
18/// ```rust
19/// use tensorlogic_adapters::StringInterner;
20///
21/// let mut interner = StringInterner::new();
22/// let id1 = interner.intern("Person");
23/// let id2 = interner.intern("Person");
24/// assert_eq!(id1, id2); // Same string gets same ID
25///
26/// assert_eq!(interner.resolve(id1), Some("Person"));
27/// ```
28#[derive(Clone, Debug, Default)]
29pub struct StringInterner {
30    strings: Arc<RwLock<HashMap<String, usize>>>,
31    ids: Arc<RwLock<Vec<String>>>,
32}
33
34impl StringInterner {
35    /// Create a new string interner.
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Intern a string and return its unique ID.
41    ///
42    /// If the string already exists, returns the existing ID.
43    /// Otherwise, allocates a new ID and stores the string.
44    pub fn intern(&mut self, s: &str) -> usize {
45        let mut strings = self.strings.write().unwrap();
46
47        if let Some(&id) = strings.get(s) {
48            return id;
49        }
50
51        let mut ids = self.ids.write().unwrap();
52        let id = ids.len();
53        ids.push(s.to_string());
54        strings.insert(s.to_string(), id);
55        id
56    }
57
58    /// Resolve an ID back to its string.
59    pub fn resolve(&self, id: usize) -> Option<&str> {
60        let ids = self.ids.read().unwrap();
61        ids.get(id).map(|s| {
62            // SAFETY: We're converting the reference to have a 'static lifetime.
63            // This is safe because:
64            // 1. The Arc ensures the data lives as long as any StringInterner instance
65            // 2. The RwLock ensures no concurrent modifications while reading
66            // 3. Strings in the interner are never removed, only added
67            unsafe { std::mem::transmute::<&str, &str>(s.as_str()) }
68        })
69    }
70
71    /// Get the number of unique strings interned.
72    pub fn len(&self) -> usize {
73        self.ids.read().unwrap().len()
74    }
75
76    /// Check if the interner is empty.
77    pub fn is_empty(&self) -> bool {
78        self.len() == 0
79    }
80
81    /// Clear all interned strings.
82    pub fn clear(&mut self) {
83        self.strings.write().unwrap().clear();
84        self.ids.write().unwrap().clear();
85    }
86
87    /// Get memory usage statistics.
88    pub fn memory_usage(&self) -> MemoryStats {
89        let ids = self.ids.read().unwrap();
90        let total_string_bytes: usize = ids.iter().map(|s| s.len()).sum();
91        let count = ids.len();
92
93        MemoryStats {
94            string_count: count,
95            total_string_bytes,
96            index_overhead_bytes: count * std::mem::size_of::<String>(),
97            hash_table_overhead_bytes: count * std::mem::size_of::<(String, usize)>(),
98        }
99    }
100}
101
102/// Memory usage statistics for a string interner.
103#[derive(Clone, Debug)]
104pub struct MemoryStats {
105    /// Number of unique strings interned.
106    pub string_count: usize,
107    /// Total bytes used by string data.
108    pub total_string_bytes: usize,
109    /// Overhead for the index vector.
110    pub index_overhead_bytes: usize,
111    /// Overhead for the hash table.
112    pub hash_table_overhead_bytes: usize,
113}
114
115impl MemoryStats {
116    /// Total memory usage in bytes.
117    pub fn total_bytes(&self) -> usize {
118        self.total_string_bytes + self.index_overhead_bytes + self.hash_table_overhead_bytes
119    }
120}
121
122/// Cache for frequently accessed lookups.
123///
124/// This provides a simple LRU-like cache for domain and predicate lookups
125/// to avoid repeated hash table accesses.
126///
127/// # Example
128///
129/// ```rust
130/// use tensorlogic_adapters::LookupCache;
131///
132/// let mut cache = LookupCache::new(100);
133/// cache.insert("Person".to_string(), 42);
134/// assert_eq!(cache.get(&"Person".to_string()), Some(&42));
135/// ```
136#[derive(Clone, Debug)]
137pub struct LookupCache<K, V> {
138    cache: HashMap<K, V>,
139    capacity: usize,
140    access_count: HashMap<K, usize>,
141}
142
143impl<K: Clone + Eq + std::hash::Hash, V: Clone> LookupCache<K, V> {
144    /// Create a new lookup cache with the given capacity.
145    pub fn new(capacity: usize) -> Self {
146        Self {
147            cache: HashMap::with_capacity(capacity),
148            capacity,
149            access_count: HashMap::with_capacity(capacity),
150        }
151    }
152
153    /// Insert a key-value pair into the cache.
154    ///
155    /// If the cache is at capacity, removes the least recently used item.
156    pub fn insert(&mut self, key: K, value: V) {
157        if self.cache.len() >= self.capacity && !self.cache.contains_key(&key) {
158            self.evict_lru();
159        }
160
161        self.cache.insert(key.clone(), value);
162        *self.access_count.entry(key).or_insert(0) += 1;
163    }
164
165    /// Get a value from the cache, updating access count.
166    pub fn get(&mut self, key: &K) -> Option<&V> {
167        if let Some(count) = self.access_count.get_mut(key) {
168            *count += 1;
169        }
170        self.cache.get(key)
171    }
172
173    /// Remove the least recently used item from the cache.
174    fn evict_lru(&mut self) {
175        if let Some((key_to_remove, _)) = self.access_count.iter().min_by_key(|(_, &count)| count) {
176            let key_to_remove = key_to_remove.clone();
177            self.cache.remove(&key_to_remove);
178            self.access_count.remove(&key_to_remove);
179        }
180    }
181
182    /// Clear the cache.
183    pub fn clear(&mut self) {
184        self.cache.clear();
185        self.access_count.clear();
186    }
187
188    /// Get the number of items in the cache.
189    pub fn len(&self) -> usize {
190        self.cache.len()
191    }
192
193    /// Check if the cache is empty.
194    pub fn is_empty(&self) -> bool {
195        self.cache.is_empty()
196    }
197
198    /// Get cache hit statistics.
199    pub fn stats(&self) -> CacheStats {
200        let total_accesses: usize = self.access_count.values().sum();
201        CacheStats {
202            size: self.cache.len(),
203            capacity: self.capacity,
204            total_accesses,
205        }
206    }
207}
208
209/// Cache statistics.
210#[derive(Clone, Debug)]
211pub struct CacheStats {
212    /// Current number of items in cache.
213    pub size: usize,
214    /// Maximum cache capacity.
215    pub capacity: usize,
216    /// Total number of cache accesses.
217    pub total_accesses: usize,
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn test_string_interner_basic() {
226        let mut interner = StringInterner::new();
227
228        let id1 = interner.intern("Person");
229        let id2 = interner.intern("Agent");
230        let id3 = interner.intern("Person");
231
232        assert_eq!(id1, id3);
233        assert_ne!(id1, id2);
234        assert_eq!(interner.resolve(id1), Some("Person"));
235        assert_eq!(interner.resolve(id2), Some("Agent"));
236    }
237
238    #[test]
239    fn test_string_interner_len() {
240        let mut interner = StringInterner::new();
241        assert_eq!(interner.len(), 0);
242
243        interner.intern("Person");
244        assert_eq!(interner.len(), 1);
245
246        interner.intern("Person");
247        assert_eq!(interner.len(), 1);
248
249        interner.intern("Agent");
250        assert_eq!(interner.len(), 2);
251    }
252
253    #[test]
254    fn test_string_interner_clear() {
255        let mut interner = StringInterner::new();
256        interner.intern("Person");
257        interner.intern("Agent");
258        assert_eq!(interner.len(), 2);
259
260        interner.clear();
261        assert_eq!(interner.len(), 0);
262    }
263
264    #[test]
265    fn test_string_interner_memory_stats() {
266        let mut interner = StringInterner::new();
267        interner.intern("Person");
268        interner.intern("Agent");
269
270        let stats = interner.memory_usage();
271        assert_eq!(stats.string_count, 2);
272        assert_eq!(stats.total_string_bytes, 6 + 5); // "Person" + "Agent"
273        assert!(stats.total_bytes() > 0);
274    }
275
276    #[test]
277    fn test_lookup_cache_basic() {
278        let mut cache = LookupCache::new(3);
279
280        cache.insert("key1", 1);
281        cache.insert("key2", 2);
282
283        assert_eq!(cache.get(&"key1"), Some(&1));
284        assert_eq!(cache.get(&"key2"), Some(&2));
285        assert_eq!(cache.get(&"key3"), None);
286    }
287
288    #[test]
289    fn test_lookup_cache_eviction() {
290        let mut cache = LookupCache::new(2);
291
292        cache.insert("key1", 1);
293        cache.insert("key2", 2);
294        cache.get(&"key1"); // Access key1 multiple times
295        cache.get(&"key1");
296        cache.insert("key3", 3); // Should evict key2 (least accessed)
297
298        assert_eq!(cache.get(&"key1"), Some(&1));
299        assert_eq!(cache.get(&"key2"), None);
300        assert_eq!(cache.get(&"key3"), Some(&3));
301    }
302
303    #[test]
304    fn test_lookup_cache_clear() {
305        let mut cache = LookupCache::new(10);
306        cache.insert("key1", 1);
307        cache.insert("key2", 2);
308        assert_eq!(cache.len(), 2);
309
310        cache.clear();
311        assert_eq!(cache.len(), 0);
312    }
313
314    #[test]
315    fn test_lookup_cache_stats() {
316        let mut cache = LookupCache::new(10);
317        cache.insert("key1", 1);
318        cache.insert("key2", 2);
319        cache.get(&"key1");
320        cache.get(&"key1");
321
322        let stats = cache.stats();
323        assert_eq!(stats.size, 2);
324        assert_eq!(stats.capacity, 10);
325        assert_eq!(stats.total_accesses, 4); // 2 inserts + 2 gets
326    }
327}