llm_shield_models/cache.rs
1//! Result caching with LRU eviction and TTL
2//!
3//! ## Design Philosophy
4//!
5//! This cache implementation follows enterprise-grade patterns:
6//! - **Thread-Safe**: Uses Arc + RwLock for concurrent access
7//! - **LRU Eviction**: Least Recently Used items are evicted first
8//! - **TTL Support**: Entries expire after configured time-to-live
9//! - **Statistics**: Tracks hits, misses, and hit rates
10//! - **Lazy Cleanup**: Expired items cleaned on access (no background threads)
11//!
12//! ## Usage Example
13//!
14//! ```rust
15//! use llm_shield_models::cache::{ResultCache, CacheConfig};
16//! use llm_shield_core::ScanResult;
17//! use std::time::Duration;
18//!
19//! let cache = ResultCache::new(CacheConfig {
20//! max_size: 1000,
21//! ttl: Duration::from_secs(300),
22//! });
23//!
24//! // Insert a result
25//! let result = ScanResult::pass("safe text".to_string());
26//! cache.insert("key1".to_string(), result);
27//!
28//! // Retrieve it
29//! if let Some(cached_result) = cache.get("key1") {
30//! println!("Cache hit!");
31//! }
32//!
33//! // Check statistics
34//! let stats = cache.stats();
35//! println!("Hit rate: {:.2}%", stats.hit_rate() * 100.0);
36//! ```
37
38use llm_shield_core::ScanResult;
39use std::collections::{HashMap, hash_map::DefaultHasher};
40use std::hash::{Hash, Hasher};
41use std::sync::{Arc, RwLock};
42use std::time::{Duration, Instant};
43
44/// Configuration for the result cache
45#[derive(Debug, Clone)]
46pub struct CacheConfig {
47 /// Maximum number of entries in the cache
48 pub max_size: usize,
49 /// Time-to-live for cache entries
50 pub ttl: Duration,
51}
52
53impl Default for CacheConfig {
54 fn default() -> Self {
55 Self {
56 max_size: 10_000,
57 ttl: Duration::from_secs(300), // 5 minutes
58 }
59 }
60}
61
62/// Thread-safe result cache with LRU eviction and TTL
63///
64/// ## Performance Characteristics
65///
66/// - **Get**: O(1) average, O(n) worst case for access order update
67/// - **Insert**: O(1) average, O(n) worst case for eviction
68/// - **Memory**: O(max_size * entry_size)
69///
70/// ## Thread Safety
71///
72/// Uses `Arc<RwLock<_>>` for interior mutability:
73/// - Multiple concurrent readers
74/// - Exclusive writer access
75/// - Clone creates a new reference to same cache
76pub struct ResultCache {
77 inner: Arc<RwLock<CacheInner>>,
78}
79
80/// Internal cache state
81struct CacheInner {
82 config: CacheConfig,
83 entries: HashMap<String, CacheEntry>,
84 access_order: Vec<String>, // LRU tracking (oldest first, newest last)
85 stats: CacheStats,
86}
87
88/// A single cache entry with metadata
89struct CacheEntry {
90 result: ScanResult,
91 inserted_at: Instant,
92}
93
94/// Cache performance statistics
95#[derive(Debug, Clone, Default)]
96pub struct CacheStats {
97 /// Number of cache hits
98 pub hits: u64,
99 /// Number of cache misses
100 pub misses: u64,
101}
102
103impl CacheStats {
104 /// Total number of cache requests
105 pub fn total_requests(&self) -> u64 {
106 self.hits + self.misses
107 }
108
109 /// Hit rate as a value between 0.0 and 1.0
110 pub fn hit_rate(&self) -> f64 {
111 let total = self.total_requests();
112 if total == 0 {
113 0.0
114 } else {
115 self.hits as f64 / total as f64
116 }
117 }
118}
119
120impl ResultCache {
121 /// Create a new result cache with the given configuration
122 ///
123 /// # Example
124 ///
125 /// ```
126 /// use llm_shield_models::cache::{ResultCache, CacheConfig};
127 /// use std::time::Duration;
128 ///
129 /// let cache = ResultCache::new(CacheConfig {
130 /// max_size: 1000,
131 /// ttl: Duration::from_secs(300),
132 /// });
133 /// ```
134 pub fn new(config: CacheConfig) -> Self {
135 Self {
136 inner: Arc::new(RwLock::new(CacheInner {
137 config,
138 entries: HashMap::new(),
139 access_order: Vec::new(),
140 stats: CacheStats::default(),
141 })),
142 }
143 }
144
145 /// Get a cached result by key
146 ///
147 /// Returns `None` if:
148 /// - Key doesn't exist
149 /// - Entry has expired (and removes it)
150 ///
151 /// Updates LRU access order on cache hit.
152 pub fn get(&self, key: &str) -> Option<ScanResult> {
153 let mut inner = self.inner.write().unwrap();
154
155 // Check if key exists
156 if let Some(entry) = inner.entries.get(key) {
157 // Check if expired
158 if entry.inserted_at.elapsed() < inner.config.ttl {
159 // Clone the result before updating access order
160 let result = entry.result.clone();
161
162 // Cache hit - update stats and access order
163 inner.stats.hits += 1;
164
165 // Update LRU: move to end (most recently used)
166 inner.access_order.retain(|k| k != key);
167 inner.access_order.push(key.to_string());
168
169 return Some(result);
170 } else {
171 // Expired - remove it (lazy cleanup)
172 inner.entries.remove(key);
173 inner.access_order.retain(|k| k != key);
174 }
175 }
176
177 // Cache miss
178 inner.stats.misses += 1;
179 None
180 }
181
182 /// Insert or update a cache entry
183 ///
184 /// If the cache is at capacity, evicts the least recently used entry.
185 /// If the key already exists, updates it and refreshes the TTL.
186 pub fn insert(&self, key: String, result: ScanResult) {
187 let mut inner = self.inner.write().unwrap();
188
189 // Handle zero capacity edge case
190 if inner.config.max_size == 0 {
191 return;
192 }
193
194 // If key already exists, remove it from access order
195 if inner.entries.contains_key(&key) {
196 inner.access_order.retain(|k| k != &key);
197 } else if inner.entries.len() >= inner.config.max_size {
198 // At capacity and new key - evict oldest
199 if let Some(oldest_key) = inner.access_order.first().cloned() {
200 inner.entries.remove(&oldest_key);
201 inner.access_order.remove(0);
202 }
203 }
204
205 // Insert new entry
206 inner.entries.insert(
207 key.clone(),
208 CacheEntry {
209 result,
210 inserted_at: Instant::now(),
211 },
212 );
213
214 // Add to end of access order (most recently used)
215 inner.access_order.push(key);
216 }
217
218 /// Clear all entries from the cache
219 ///
220 /// This does not reset statistics.
221 pub fn clear(&self) {
222 let mut inner = self.inner.write().unwrap();
223 inner.entries.clear();
224 inner.access_order.clear();
225 }
226
227 /// Get the number of entries in the cache
228 ///
229 /// Note: This includes expired entries that haven't been lazily cleaned yet.
230 pub fn len(&self) -> usize {
231 self.inner.read().unwrap().entries.len()
232 }
233
234 /// Check if the cache is empty
235 pub fn is_empty(&self) -> bool {
236 self.len() == 0
237 }
238
239 /// Get cache statistics
240 pub fn stats(&self) -> CacheStats {
241 self.inner.read().unwrap().stats.clone()
242 }
243
244 /// Reset cache statistics
245 ///
246 /// This does not affect cached entries.
247 pub fn reset_stats(&self) {
248 let mut inner = self.inner.write().unwrap();
249 inner.stats = CacheStats::default();
250 }
251
252 /// Generate a deterministic hash key from input text
253 ///
254 /// Useful for caching scan results based on input content.
255 ///
256 /// # Example
257 ///
258 /// ```
259 /// use llm_shield_models::cache::ResultCache;
260 ///
261 /// let input = "some text to scan";
262 /// let key = ResultCache::hash_key(input);
263 /// ```
264 pub fn hash_key(input: &str) -> String {
265 let mut hasher = DefaultHasher::new();
266 input.hash(&mut hasher);
267 format!("{:x}", hasher.finish())
268 }
269}
270
271impl Clone for ResultCache {
272 /// Clone creates a new reference to the same underlying cache
273 ///
274 /// All clones share the same cache data and statistics.
275 fn clone(&self) -> Self {
276 Self {
277 inner: Arc::clone(&self.inner),
278 }
279 }
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 fn create_test_result(text: &str) -> ScanResult {
287 ScanResult::pass(text.to_string())
288 }
289
290 #[test]
291 fn test_cache_config_default() {
292 let config = CacheConfig::default();
293 assert_eq!(config.max_size, 10_000);
294 assert_eq!(config.ttl, Duration::from_secs(300));
295 }
296
297 #[test]
298 fn test_cache_stats_empty() {
299 let stats = CacheStats::default();
300 assert_eq!(stats.total_requests(), 0);
301 assert_eq!(stats.hit_rate(), 0.0);
302 }
303
304 #[test]
305 fn test_cache_stats_calculation() {
306 let stats = CacheStats {
307 hits: 7,
308 misses: 3,
309 };
310 assert_eq!(stats.total_requests(), 10);
311 assert!((stats.hit_rate() - 0.7).abs() < 0.001);
312 }
313
314 #[test]
315 fn test_basic_insert_get() {
316 let cache = ResultCache::new(CacheConfig {
317 max_size: 10,
318 ttl: Duration::from_secs(60),
319 });
320
321 let result = create_test_result("test");
322 cache.insert("key1".to_string(), result.clone());
323
324 assert_eq!(cache.get("key1"), Some(result));
325 }
326
327 #[test]
328 fn test_cache_miss() {
329 let cache = ResultCache::new(CacheConfig {
330 max_size: 10,
331 ttl: Duration::from_secs(60),
332 });
333
334 assert_eq!(cache.get("nonexistent"), None);
335 }
336
337 #[test]
338 fn test_is_empty() {
339 let cache = ResultCache::new(CacheConfig::default());
340 assert!(cache.is_empty());
341
342 cache.insert("key".to_string(), create_test_result("test"));
343 assert!(!cache.is_empty());
344 }
345
346 #[test]
347 fn test_hash_key_deterministic() {
348 let key1 = ResultCache::hash_key("test input");
349 let key2 = ResultCache::hash_key("test input");
350 assert_eq!(key1, key2);
351 }
352
353 #[test]
354 fn test_hash_key_different_inputs() {
355 let key1 = ResultCache::hash_key("input1");
356 let key2 = ResultCache::hash_key("input2");
357 assert_ne!(key1, key2);
358 }
359}