Skip to main content

tensorlogic_adapters/
performance.rs

1//! Performance optimizations for symbol tables.
2//!
3//! This module provides optimizations for:
4//! - String interning to reduce memory usage
5//! - Lookup caching for frequently accessed data
6//! - Efficient data structure selection
7
8use std::collections::HashMap;
9use std::sync::{Arc, RwLock};
10
11/// String interner for reducing memory usage of repeated strings.
12///
13/// Domain and predicate names are often repeated throughout the symbol table.
14/// This interner ensures each unique string is stored only once in memory.
15///
16/// # Example
17///
18/// ```rust
19/// use tensorlogic_adapters::StringInterner;
20///
21/// let mut interner = StringInterner::new();
22/// let id1 = interner.intern("Person");
23/// let id2 = interner.intern("Person");
24/// assert_eq!(id1, id2); // Same string gets same ID
25///
26/// assert_eq!(interner.resolve(id1), Some("Person"));
27/// ```
28#[derive(Clone, Debug, Default)]
29pub struct StringInterner {
30    strings: Arc<RwLock<HashMap<String, usize>>>,
31    ids: Arc<RwLock<Vec<String>>>,
32}
33
34impl StringInterner {
35    /// Create a new string interner.
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Intern a string and return its unique ID.
41    ///
42    /// If the string already exists, returns the existing ID.
43    /// Otherwise, allocates a new ID and stores the string.
44    pub fn intern(&mut self, s: &str) -> usize {
45        let mut strings = self.strings.write().expect("lock should not be poisoned");
46
47        if let Some(&id) = strings.get(s) {
48            return id;
49        }
50
51        let mut ids = self.ids.write().expect("lock should not be poisoned");
52        let id = ids.len();
53        ids.push(s.to_string());
54        strings.insert(s.to_string(), id);
55        id
56    }
57
58    /// Resolve an ID back to its string.
59    pub fn resolve(&self, id: usize) -> Option<&str> {
60        let ids = self.ids.read().expect("lock should not be poisoned");
61        ids.get(id).map(|s| {
62            // SAFETY: We're converting the reference to have a 'static lifetime.
63            // This is safe because:
64            // 1. The Arc ensures the data lives as long as any StringInterner instance
65            // 2. The RwLock ensures no concurrent modifications while reading
66            // 3. Strings in the interner are never removed, only added
67            unsafe { std::mem::transmute::<&str, &str>(s.as_str()) }
68        })
69    }
70
71    /// Get the number of unique strings interned.
72    pub fn len(&self) -> usize {
73        self.ids.read().expect("lock should not be poisoned").len()
74    }
75
76    /// Check if the interner is empty.
77    pub fn is_empty(&self) -> bool {
78        self.len() == 0
79    }
80
81    /// Clear all interned strings.
82    pub fn clear(&mut self) {
83        self.strings
84            .write()
85            .expect("lock should not be poisoned")
86            .clear();
87        self.ids
88            .write()
89            .expect("lock should not be poisoned")
90            .clear();
91    }
92
93    /// Get memory usage statistics.
94    pub fn memory_usage(&self) -> MemoryStats {
95        let ids = self.ids.read().expect("lock should not be poisoned");
96        let total_string_bytes: usize = ids.iter().map(|s| s.len()).sum();
97        let count = ids.len();
98
99        MemoryStats {
100            string_count: count,
101            total_string_bytes,
102            index_overhead_bytes: count * std::mem::size_of::<String>(),
103            hash_table_overhead_bytes: count * std::mem::size_of::<(String, usize)>(),
104        }
105    }
106}
107
108/// Memory usage statistics for a string interner.
109#[derive(Clone, Debug)]
110pub struct MemoryStats {
111    /// Number of unique strings interned.
112    pub string_count: usize,
113    /// Total bytes used by string data.
114    pub total_string_bytes: usize,
115    /// Overhead for the index vector.
116    pub index_overhead_bytes: usize,
117    /// Overhead for the hash table.
118    pub hash_table_overhead_bytes: usize,
119}
120
121impl MemoryStats {
122    /// Total memory usage in bytes.
123    pub fn total_bytes(&self) -> usize {
124        self.total_string_bytes + self.index_overhead_bytes + self.hash_table_overhead_bytes
125    }
126}
127
128/// Cache for frequently accessed lookups.
129///
130/// This provides a simple LRU-like cache for domain and predicate lookups
131/// to avoid repeated hash table accesses.
132///
133/// # Example
134///
135/// ```rust
136/// use tensorlogic_adapters::LookupCache;
137///
138/// let mut cache = LookupCache::new(100);
139/// cache.insert("Person".to_string(), 42);
140/// assert_eq!(cache.get(&"Person".to_string()), Some(&42));
141/// ```
142#[derive(Clone, Debug)]
143pub struct LookupCache<K, V> {
144    cache: HashMap<K, V>,
145    capacity: usize,
146    access_count: HashMap<K, usize>,
147}
148
149impl<K: Clone + Eq + std::hash::Hash, V: Clone> LookupCache<K, V> {
150    /// Create a new lookup cache with the given capacity.
151    pub fn new(capacity: usize) -> Self {
152        Self {
153            cache: HashMap::with_capacity(capacity),
154            capacity,
155            access_count: HashMap::with_capacity(capacity),
156        }
157    }
158
159    /// Insert a key-value pair into the cache.
160    ///
161    /// If the cache is at capacity, removes the least recently used item.
162    pub fn insert(&mut self, key: K, value: V) {
163        if self.cache.len() >= self.capacity && !self.cache.contains_key(&key) {
164            self.evict_lru();
165        }
166
167        self.cache.insert(key.clone(), value);
168        *self.access_count.entry(key).or_insert(0) += 1;
169    }
170
171    /// Get a value from the cache, updating access count.
172    pub fn get(&mut self, key: &K) -> Option<&V> {
173        if let Some(count) = self.access_count.get_mut(key) {
174            *count += 1;
175        }
176        self.cache.get(key)
177    }
178
179    /// Remove the least recently used item from the cache.
180    fn evict_lru(&mut self) {
181        if let Some((key_to_remove, _)) = self.access_count.iter().min_by_key(|(_, &count)| count) {
182            let key_to_remove = key_to_remove.clone();
183            self.cache.remove(&key_to_remove);
184            self.access_count.remove(&key_to_remove);
185        }
186    }
187
188    /// Clear the cache.
189    pub fn clear(&mut self) {
190        self.cache.clear();
191        self.access_count.clear();
192    }
193
194    /// Get the number of items in the cache.
195    pub fn len(&self) -> usize {
196        self.cache.len()
197    }
198
199    /// Check if the cache is empty.
200    pub fn is_empty(&self) -> bool {
201        self.cache.is_empty()
202    }
203
204    /// Get cache hit statistics.
205    pub fn stats(&self) -> CacheStats {
206        let total_accesses: usize = self.access_count.values().sum();
207        CacheStats {
208            size: self.cache.len(),
209            capacity: self.capacity,
210            total_accesses,
211        }
212    }
213}
214
215/// Cache statistics.
216#[derive(Clone, Debug)]
217pub struct CacheStats {
218    /// Current number of items in cache.
219    pub size: usize,
220    /// Maximum cache capacity.
221    pub capacity: usize,
222    /// Total number of cache accesses.
223    pub total_accesses: usize,
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn test_string_interner_basic() {
232        let mut interner = StringInterner::new();
233
234        let id1 = interner.intern("Person");
235        let id2 = interner.intern("Agent");
236        let id3 = interner.intern("Person");
237
238        assert_eq!(id1, id3);
239        assert_ne!(id1, id2);
240        assert_eq!(interner.resolve(id1), Some("Person"));
241        assert_eq!(interner.resolve(id2), Some("Agent"));
242    }
243
244    #[test]
245    fn test_string_interner_len() {
246        let mut interner = StringInterner::new();
247        assert_eq!(interner.len(), 0);
248
249        interner.intern("Person");
250        assert_eq!(interner.len(), 1);
251
252        interner.intern("Person");
253        assert_eq!(interner.len(), 1);
254
255        interner.intern("Agent");
256        assert_eq!(interner.len(), 2);
257    }
258
259    #[test]
260    fn test_string_interner_clear() {
261        let mut interner = StringInterner::new();
262        interner.intern("Person");
263        interner.intern("Agent");
264        assert_eq!(interner.len(), 2);
265
266        interner.clear();
267        assert_eq!(interner.len(), 0);
268    }
269
270    #[test]
271    fn test_string_interner_memory_stats() {
272        let mut interner = StringInterner::new();
273        interner.intern("Person");
274        interner.intern("Agent");
275
276        let stats = interner.memory_usage();
277        assert_eq!(stats.string_count, 2);
278        assert_eq!(stats.total_string_bytes, 6 + 5); // "Person" + "Agent"
279        assert!(stats.total_bytes() > 0);
280    }
281
282    #[test]
283    fn test_lookup_cache_basic() {
284        let mut cache = LookupCache::new(3);
285
286        cache.insert("key1", 1);
287        cache.insert("key2", 2);
288
289        assert_eq!(cache.get(&"key1"), Some(&1));
290        assert_eq!(cache.get(&"key2"), Some(&2));
291        assert_eq!(cache.get(&"key3"), None);
292    }
293
294    #[test]
295    fn test_lookup_cache_eviction() {
296        let mut cache = LookupCache::new(2);
297
298        cache.insert("key1", 1);
299        cache.insert("key2", 2);
300        cache.get(&"key1"); // Access key1 multiple times
301        cache.get(&"key1");
302        cache.insert("key3", 3); // Should evict key2 (least accessed)
303
304        assert_eq!(cache.get(&"key1"), Some(&1));
305        assert_eq!(cache.get(&"key2"), None);
306        assert_eq!(cache.get(&"key3"), Some(&3));
307    }
308
309    #[test]
310    fn test_lookup_cache_clear() {
311        let mut cache = LookupCache::new(10);
312        cache.insert("key1", 1);
313        cache.insert("key2", 2);
314        assert_eq!(cache.len(), 2);
315
316        cache.clear();
317        assert_eq!(cache.len(), 0);
318    }
319
320    #[test]
321    fn test_lookup_cache_stats() {
322        let mut cache = LookupCache::new(10);
323        cache.insert("key1", 1);
324        cache.insert("key2", 2);
325        cache.get(&"key1");
326        cache.get(&"key1");
327
328        let stats = cache.stats();
329        assert_eq!(stats.size, 2);
330        assert_eq!(stats.capacity, 10);
331        assert_eq!(stats.total_accesses, 4); // 2 inserts + 2 gets
332    }
333}