llm_shield_models/
cache.rs

1//! Result caching with LRU eviction and TTL
2//!
3//! ## Design Philosophy
4//!
5//! This cache implementation follows enterprise-grade patterns:
6//! - **Thread-Safe**: Uses Arc + RwLock for concurrent access
7//! - **LRU Eviction**: Least Recently Used items are evicted first
8//! - **TTL Support**: Entries expire after configured time-to-live
9//! - **Statistics**: Tracks hits, misses, and hit rates
10//! - **Lazy Cleanup**: Expired items cleaned on access (no background threads)
11//!
12//! ## Usage Example
13//!
14//! ```rust
15//! use llm_shield_models::cache::{ResultCache, CacheConfig};
16//! use llm_shield_core::ScanResult;
17//! use std::time::Duration;
18//!
19//! let cache = ResultCache::new(CacheConfig {
20//!     max_size: 1000,
21//!     ttl: Duration::from_secs(300),
22//! });
23//!
24//! // Insert a result
25//! let result = ScanResult::pass("safe text".to_string());
26//! cache.insert("key1".to_string(), result);
27//!
28//! // Retrieve it
29//! if let Some(cached_result) = cache.get("key1") {
30//!     println!("Cache hit!");
31//! }
32//!
33//! // Check statistics
34//! let stats = cache.stats();
35//! println!("Hit rate: {:.2}%", stats.hit_rate() * 100.0);
36//! ```
37
38use llm_shield_core::ScanResult;
39use std::collections::{HashMap, hash_map::DefaultHasher};
40use std::hash::{Hash, Hasher};
41use std::sync::{Arc, RwLock};
42use std::time::{Duration, Instant};
43
44/// Configuration for the result cache
45#[derive(Debug, Clone)]
46pub struct CacheConfig {
47    /// Maximum number of entries in the cache
48    pub max_size: usize,
49    /// Time-to-live for cache entries
50    pub ttl: Duration,
51}
52
53impl Default for CacheConfig {
54    fn default() -> Self {
55        Self {
56            max_size: 10_000,
57            ttl: Duration::from_secs(300), // 5 minutes
58        }
59    }
60}
61
62/// Thread-safe result cache with LRU eviction and TTL
63///
64/// ## Performance Characteristics
65///
66/// - **Get**: O(1) average, O(n) worst case for access order update
67/// - **Insert**: O(1) average, O(n) worst case for eviction
68/// - **Memory**: O(max_size * entry_size)
69///
70/// ## Thread Safety
71///
72/// Uses `Arc<RwLock<_>>` for interior mutability:
73/// - Multiple concurrent readers
74/// - Exclusive writer access
75/// - Clone creates a new reference to same cache
76pub struct ResultCache {
77    inner: Arc<RwLock<CacheInner>>,
78}
79
80/// Internal cache state
81struct CacheInner {
82    config: CacheConfig,
83    entries: HashMap<String, CacheEntry>,
84    access_order: Vec<String>, // LRU tracking (oldest first, newest last)
85    stats: CacheStats,
86}
87
88/// A single cache entry with metadata
89struct CacheEntry {
90    result: ScanResult,
91    inserted_at: Instant,
92}
93
94/// Cache performance statistics
95#[derive(Debug, Clone, Default)]
96pub struct CacheStats {
97    /// Number of cache hits
98    pub hits: u64,
99    /// Number of cache misses
100    pub misses: u64,
101}
102
103impl CacheStats {
104    /// Total number of cache requests
105    pub fn total_requests(&self) -> u64 {
106        self.hits + self.misses
107    }
108
109    /// Hit rate as a value between 0.0 and 1.0
110    pub fn hit_rate(&self) -> f64 {
111        let total = self.total_requests();
112        if total == 0 {
113            0.0
114        } else {
115            self.hits as f64 / total as f64
116        }
117    }
118}
119
120impl ResultCache {
121    /// Create a new result cache with the given configuration
122    ///
123    /// # Example
124    ///
125    /// ```
126    /// use llm_shield_models::cache::{ResultCache, CacheConfig};
127    /// use std::time::Duration;
128    ///
129    /// let cache = ResultCache::new(CacheConfig {
130    ///     max_size: 1000,
131    ///     ttl: Duration::from_secs(300),
132    /// });
133    /// ```
134    pub fn new(config: CacheConfig) -> Self {
135        Self {
136            inner: Arc::new(RwLock::new(CacheInner {
137                config,
138                entries: HashMap::new(),
139                access_order: Vec::new(),
140                stats: CacheStats::default(),
141            })),
142        }
143    }
144
145    /// Get a cached result by key
146    ///
147    /// Returns `None` if:
148    /// - Key doesn't exist
149    /// - Entry has expired (and removes it)
150    ///
151    /// Updates LRU access order on cache hit.
152    pub fn get(&self, key: &str) -> Option<ScanResult> {
153        let mut inner = self.inner.write().unwrap();
154
155        // Check if key exists
156        if let Some(entry) = inner.entries.get(key) {
157            // Check if expired
158            if entry.inserted_at.elapsed() < inner.config.ttl {
159                // Clone the result before updating access order
160                let result = entry.result.clone();
161
162                // Cache hit - update stats and access order
163                inner.stats.hits += 1;
164
165                // Update LRU: move to end (most recently used)
166                inner.access_order.retain(|k| k != key);
167                inner.access_order.push(key.to_string());
168
169                return Some(result);
170            } else {
171                // Expired - remove it (lazy cleanup)
172                inner.entries.remove(key);
173                inner.access_order.retain(|k| k != key);
174            }
175        }
176
177        // Cache miss
178        inner.stats.misses += 1;
179        None
180    }
181
182    /// Insert or update a cache entry
183    ///
184    /// If the cache is at capacity, evicts the least recently used entry.
185    /// If the key already exists, updates it and refreshes the TTL.
186    pub fn insert(&self, key: String, result: ScanResult) {
187        let mut inner = self.inner.write().unwrap();
188
189        // Handle zero capacity edge case
190        if inner.config.max_size == 0 {
191            return;
192        }
193
194        // If key already exists, remove it from access order
195        if inner.entries.contains_key(&key) {
196            inner.access_order.retain(|k| k != &key);
197        } else if inner.entries.len() >= inner.config.max_size {
198            // At capacity and new key - evict oldest
199            if let Some(oldest_key) = inner.access_order.first().cloned() {
200                inner.entries.remove(&oldest_key);
201                inner.access_order.remove(0);
202            }
203        }
204
205        // Insert new entry
206        inner.entries.insert(
207            key.clone(),
208            CacheEntry {
209                result,
210                inserted_at: Instant::now(),
211            },
212        );
213
214        // Add to end of access order (most recently used)
215        inner.access_order.push(key);
216    }
217
218    /// Clear all entries from the cache
219    ///
220    /// This does not reset statistics.
221    pub fn clear(&self) {
222        let mut inner = self.inner.write().unwrap();
223        inner.entries.clear();
224        inner.access_order.clear();
225    }
226
227    /// Get the number of entries in the cache
228    ///
229    /// Note: This includes expired entries that haven't been lazily cleaned yet.
230    pub fn len(&self) -> usize {
231        self.inner.read().unwrap().entries.len()
232    }
233
234    /// Check if the cache is empty
235    pub fn is_empty(&self) -> bool {
236        self.len() == 0
237    }
238
239    /// Get cache statistics
240    pub fn stats(&self) -> CacheStats {
241        self.inner.read().unwrap().stats.clone()
242    }
243
244    /// Reset cache statistics
245    ///
246    /// This does not affect cached entries.
247    pub fn reset_stats(&self) {
248        let mut inner = self.inner.write().unwrap();
249        inner.stats = CacheStats::default();
250    }
251
252    /// Generate a deterministic hash key from input text
253    ///
254    /// Useful for caching scan results based on input content.
255    ///
256    /// # Example
257    ///
258    /// ```
259    /// use llm_shield_models::cache::ResultCache;
260    ///
261    /// let input = "some text to scan";
262    /// let key = ResultCache::hash_key(input);
263    /// ```
264    pub fn hash_key(input: &str) -> String {
265        let mut hasher = DefaultHasher::new();
266        input.hash(&mut hasher);
267        format!("{:x}", hasher.finish())
268    }
269}
270
271impl Clone for ResultCache {
272    /// Clone creates a new reference to the same underlying cache
273    ///
274    /// All clones share the same cache data and statistics.
275    fn clone(&self) -> Self {
276        Self {
277            inner: Arc::clone(&self.inner),
278        }
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    fn create_test_result(text: &str) -> ScanResult {
287        ScanResult::pass(text.to_string())
288    }
289
290    #[test]
291    fn test_cache_config_default() {
292        let config = CacheConfig::default();
293        assert_eq!(config.max_size, 10_000);
294        assert_eq!(config.ttl, Duration::from_secs(300));
295    }
296
297    #[test]
298    fn test_cache_stats_empty() {
299        let stats = CacheStats::default();
300        assert_eq!(stats.total_requests(), 0);
301        assert_eq!(stats.hit_rate(), 0.0);
302    }
303
304    #[test]
305    fn test_cache_stats_calculation() {
306        let stats = CacheStats {
307            hits: 7,
308            misses: 3,
309        };
310        assert_eq!(stats.total_requests(), 10);
311        assert!((stats.hit_rate() - 0.7).abs() < 0.001);
312    }
313
314    #[test]
315    fn test_basic_insert_get() {
316        let cache = ResultCache::new(CacheConfig {
317            max_size: 10,
318            ttl: Duration::from_secs(60),
319        });
320
321        let result = create_test_result("test");
322        cache.insert("key1".to_string(), result.clone());
323
324        assert_eq!(cache.get("key1"), Some(result));
325    }
326
327    #[test]
328    fn test_cache_miss() {
329        let cache = ResultCache::new(CacheConfig {
330            max_size: 10,
331            ttl: Duration::from_secs(60),
332        });
333
334        assert_eq!(cache.get("nonexistent"), None);
335    }
336
337    #[test]
338    fn test_is_empty() {
339        let cache = ResultCache::new(CacheConfig::default());
340        assert!(cache.is_empty());
341
342        cache.insert("key".to_string(), create_test_result("test"));
343        assert!(!cache.is_empty());
344    }
345
346    #[test]
347    fn test_hash_key_deterministic() {
348        let key1 = ResultCache::hash_key("test input");
349        let key2 = ResultCache::hash_key("test input");
350        assert_eq!(key1, key2);
351    }
352
353    #[test]
354    fn test_hash_key_different_inputs() {
355        let key1 = ResultCache::hash_key("input1");
356        let key2 = ResultCache::hash_key("input2");
357        assert_ne!(key1, key2);
358    }
359}