rexis_rag/caching/
result_cache.rs

1//! # Result Cache Implementation
2//!
3//! High-performance caching for search results with compression.
4
5use super::{
6    Cache, CacheEntryMetadata, CacheStats, CachedSearchResult, ResultCacheConfig, ResultCacheEntry,
7};
8use crate::RragResult;
9use std::collections::HashMap;
10
11/// Result cache optimized for search results
12pub struct ResultCache {
13    /// Configuration
14    config: ResultCacheConfig,
15
16    /// Main storage
17    storage: HashMap<String, ResultCacheEntry>,
18
19    /// Compressed storage for large results
20    compressed_storage: HashMap<String, CompressedResults>,
21
22    /// Parameter hash index for fast lookups
23    param_index: HashMap<String, Vec<String>>,
24
25    /// Cache statistics
26    stats: CacheStats,
27}
28
29/// Compressed search results
30#[derive(Debug, Clone)]
31pub struct CompressedResults {
32    /// Compressed data
33    pub data: Vec<u8>,
34
35    /// Original size
36    pub original_size: usize,
37
38    /// Compression method used
39    pub method: CompressionMethod,
40
41    /// Number of results
42    pub result_count: usize,
43}
44
45/// Compression methods for results
46#[derive(Debug, Clone, Copy)]
47pub enum CompressionMethod {
48    None,
49    Gzip,
50    Snappy,
51    Zstd,
52}
53
54impl ResultCache {
55    /// Create new result cache
56    pub fn new(config: ResultCacheConfig) -> RragResult<Self> {
57        Ok(Self {
58            config,
59            storage: HashMap::new(),
60            compressed_storage: HashMap::new(),
61            param_index: HashMap::new(),
62            stats: CacheStats::default(),
63        })
64    }
65
66    /// Get cached results by parameters
67    pub fn get_by_params(&self, params_hash: &str) -> Option<Vec<CachedSearchResult>> {
68        // Try direct lookup
69        if let Some(entry) = self.storage.get(params_hash) {
70            if !entry.metadata.is_expired() {
71                return Some(entry.results.clone());
72            }
73        }
74
75        // Try compressed storage
76        if let Some(compressed) = self.compressed_storage.get(params_hash) {
77            return Some(self.decompress_results(compressed));
78        }
79
80        None
81    }
82
83    /// Cache search results
84    pub fn cache_results(
85        &mut self,
86        params_hash: String,
87        results: Vec<CachedSearchResult>,
88        metadata: HashMap<String, String>,
89    ) -> RragResult<()> {
90        // Check capacity
91        if self.storage.len() >= self.config.max_size {
92            self.evict_entry()?;
93        }
94
95        // Check if results should be compressed
96        let should_compress = self.config.compress_large_results && results.len() > 100;
97
98        if should_compress {
99            let compressed = self.compress_results(&results);
100            self.compressed_storage
101                .insert(params_hash.clone(), compressed);
102        } else {
103            let mut entry_metadata = CacheEntryMetadata::new();
104            entry_metadata.ttl = Some(self.config.ttl);
105
106            let entry = ResultCacheEntry {
107                params_hash: params_hash.clone(),
108                results,
109                result_metadata: metadata,
110                metadata: entry_metadata,
111            };
112
113            self.storage.insert(params_hash.clone(), entry);
114        }
115
116        // Update parameter index
117        self.update_param_index(&params_hash);
118
119        Ok(())
120    }
121
122    /// Compress results
123    fn compress_results(&self, results: &[CachedSearchResult]) -> CompressedResults {
124        // Serialize results
125        let serialized = bincode::serialize(results).unwrap_or_default();
126        let original_size = serialized.len();
127
128        // For now, just store serialized data (real implementation would use compression)
129        CompressedResults {
130            data: serialized,
131            original_size,
132            method: CompressionMethod::None,
133            result_count: results.len(),
134        }
135    }
136
137    /// Decompress results
138    fn decompress_results(&self, compressed: &CompressedResults) -> Vec<CachedSearchResult> {
139        // Deserialize results
140        bincode::deserialize(&compressed.data).unwrap_or_default()
141    }
142
143    /// Update parameter index
144    fn update_param_index(&mut self, params_hash: &str) {
145        // Extract parameter components (simplified)
146        let components = self.extract_param_components(params_hash);
147
148        for component in components {
149            self.param_index
150                .entry(component)
151                .or_insert_with(Vec::new)
152                .push(params_hash.to_string());
153        }
154    }
155
156    /// Extract parameter components for indexing
157    fn extract_param_components(&self, params_hash: &str) -> Vec<String> {
158        // Simplified: split hash into chunks for indexing
159        let mut components = Vec::new();
160
161        if params_hash.len() >= 8 {
162            components.push(params_hash[0..4].to_string());
163            components.push(params_hash[4..8].to_string());
164        }
165
166        components
167    }
168
169    /// Invalidate cache entries by pattern
170    pub fn invalidate_pattern(&mut self, pattern: &str) {
171        let keys_to_remove: Vec<String> = self
172            .storage
173            .keys()
174            .filter(|k| k.contains(pattern))
175            .cloned()
176            .collect();
177
178        for key in keys_to_remove {
179            self.storage.remove(&key);
180            self.compressed_storage.remove(&key);
181        }
182    }
183
184    /// Evict entry based on policy
185    fn evict_entry(&mut self) -> RragResult<()> {
186        use super::EvictionPolicy;
187
188        match self.config.eviction_policy {
189            EvictionPolicy::TTL => self.evict_expired(),
190            EvictionPolicy::LRU => self.evict_lru(),
191            _ => self.evict_lru(),
192        }
193    }
194
195    /// Evict expired entries
196    fn evict_expired(&mut self) -> RragResult<()> {
197        let before_count = self.storage.len();
198        self.storage.retain(|_, entry| !entry.metadata.is_expired());
199
200        let evicted = before_count - self.storage.len();
201        self.stats.evictions += evicted as u64;
202
203        // If still over capacity, evict oldest
204        if self.storage.len() >= self.config.max_size {
205            self.evict_lru()?;
206        }
207
208        Ok(())
209    }
210
211    /// Evict least recently used entry
212    fn evict_lru(&mut self) -> RragResult<()> {
213        if let Some((key, _)) = self
214            .storage
215            .iter()
216            .min_by_key(|(_, entry)| entry.metadata.last_accessed)
217        {
218            let key = key.clone();
219            self.storage.remove(&key);
220            self.compressed_storage.remove(&key);
221            self.stats.evictions += 1;
222        }
223        Ok(())
224    }
225
226    /// Get cache insights
227    pub fn get_insights(&self) -> ResultCacheInsights {
228        let total_entries = self.storage.len() + self.compressed_storage.len();
229        let compressed_entries = self.compressed_storage.len();
230
231        let avg_results_per_entry = if !self.storage.is_empty() {
232            self.storage
233                .values()
234                .map(|e| e.results.len())
235                .sum::<usize>() as f32
236                / self.storage.len() as f32
237        } else {
238            0.0
239        };
240
241        let compression_ratio = if !self.compressed_storage.is_empty() {
242            let total_original: usize = self
243                .compressed_storage
244                .values()
245                .map(|c| c.original_size)
246                .sum();
247            let total_compressed: usize =
248                self.compressed_storage.values().map(|c| c.data.len()).sum();
249
250            if total_compressed > 0 {
251                total_original as f32 / total_compressed as f32
252            } else {
253                1.0
254            }
255        } else {
256            1.0
257        };
258
259        ResultCacheInsights {
260            total_entries,
261            compressed_entries,
262            avg_results_per_entry,
263            compression_ratio,
264            memory_usage: self.estimate_memory_usage(),
265        }
266    }
267
268    /// Estimate memory usage
269    fn estimate_memory_usage(&self) -> usize {
270        let storage_size: usize = self
271            .storage
272            .values()
273            .map(|e| {
274                std::mem::size_of::<ResultCacheEntry>()
275                    + e.results.len() * std::mem::size_of::<CachedSearchResult>()
276            })
277            .sum();
278
279        let compressed_size: usize = self
280            .compressed_storage
281            .values()
282            .map(|c| std::mem::size_of::<CompressedResults>() + c.data.len())
283            .sum();
284
285        storage_size + compressed_size
286    }
287}
288
289impl Cache<String, ResultCacheEntry> for ResultCache {
290    fn get(&self, key: &String) -> Option<ResultCacheEntry> {
291        self.storage.get(key).cloned()
292    }
293
294    fn put(&mut self, key: String, value: ResultCacheEntry) -> RragResult<()> {
295        if self.storage.len() >= self.config.max_size {
296            self.evict_entry()?;
297        }
298
299        self.storage.insert(key, value);
300        Ok(())
301    }
302
303    fn remove(&mut self, key: &String) -> Option<ResultCacheEntry> {
304        self.compressed_storage.remove(key);
305        self.storage.remove(key)
306    }
307
308    fn contains(&self, key: &String) -> bool {
309        self.storage.contains_key(key) || self.compressed_storage.contains_key(key)
310    }
311
312    fn clear(&mut self) {
313        self.storage.clear();
314        self.compressed_storage.clear();
315        self.param_index.clear();
316        self.stats = CacheStats::default();
317    }
318
319    fn size(&self) -> usize {
320        self.storage.len() + self.compressed_storage.len()
321    }
322
323    fn stats(&self) -> CacheStats {
324        self.stats.clone()
325    }
326}
327
328/// Result cache insights
329#[derive(Debug, Clone)]
330pub struct ResultCacheInsights {
331    /// Total cache entries
332    pub total_entries: usize,
333
334    /// Number of compressed entries
335    pub compressed_entries: usize,
336
337    /// Average results per entry
338    pub avg_results_per_entry: f32,
339
340    /// Compression ratio achieved
341    pub compression_ratio: f32,
342
343    /// Estimated memory usage in bytes
344    pub memory_usage: usize,
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use std::time::Duration;
351
352    fn create_test_config() -> ResultCacheConfig {
353        ResultCacheConfig {
354            enabled: true,
355            max_size: 100,
356            ttl: Duration::from_secs(3600),
357            eviction_policy: super::super::EvictionPolicy::TTL,
358            compress_large_results: true,
359        }
360    }
361
362    fn create_test_results(count: usize) -> Vec<CachedSearchResult> {
363        (0..count)
364            .map(|i| CachedSearchResult {
365                document_id: format!("doc{}", i),
366                content: format!("content {}", i),
367                score: 0.9 - (i as f32 * 0.01),
368                rank: i,
369                metadata: HashMap::new(),
370            })
371            .collect()
372    }
373
374    #[test]
375    fn test_result_cache_creation() {
376        let config = create_test_config();
377        let cache = ResultCache::new(config).unwrap();
378
379        assert_eq!(cache.size(), 0);
380        assert_eq!(cache.param_index.len(), 0);
381    }
382
383    #[test]
384    fn test_basic_caching() {
385        let config = create_test_config();
386        let mut cache = ResultCache::new(config).unwrap();
387
388        let params_hash = "hash123".to_string();
389        let results = create_test_results(5);
390        let metadata = HashMap::new();
391
392        cache
393            .cache_results(params_hash.clone(), results.clone(), metadata)
394            .unwrap();
395
396        let cached = cache.get_by_params(&params_hash);
397        assert!(cached.is_some());
398        assert_eq!(cached.unwrap().len(), 5);
399    }
400
401    #[test]
402    fn test_compression() {
403        let config = create_test_config();
404        let mut cache = ResultCache::new(config).unwrap();
405
406        let params_hash = "hash_large".to_string();
407        let results = create_test_results(150); // Should trigger compression
408        let metadata = HashMap::new();
409
410        cache
411            .cache_results(params_hash.clone(), results.clone(), metadata)
412            .unwrap();
413
414        // Should be in compressed storage
415        assert!(cache.compressed_storage.contains_key(&params_hash));
416        assert!(!cache.storage.contains_key(&params_hash));
417
418        // Should still be retrievable
419        let cached = cache.get_by_params(&params_hash);
420        assert!(cached.is_some());
421        assert_eq!(cached.unwrap().len(), 150);
422    }
423
424    #[test]
425    fn test_invalidation() {
426        let config = create_test_config();
427        let mut cache = ResultCache::new(config).unwrap();
428
429        let results = create_test_results(5);
430        let metadata = HashMap::new();
431
432        cache
433            .cache_results("user_123".to_string(), results.clone(), metadata.clone())
434            .unwrap();
435        cache
436            .cache_results("user_456".to_string(), results.clone(), metadata.clone())
437            .unwrap();
438        cache
439            .cache_results("product_789".to_string(), results.clone(), metadata)
440            .unwrap();
441
442        assert_eq!(cache.size(), 3);
443
444        // Invalidate all user-related entries
445        cache.invalidate_pattern("user_");
446
447        assert_eq!(cache.size(), 1);
448        assert!(cache.get_by_params("product_789").is_some());
449        assert!(cache.get_by_params("user_123").is_none());
450    }
451
452    #[test]
453    fn test_insights() {
454        let config = create_test_config();
455        let mut cache = ResultCache::new(config).unwrap();
456
457        let results_small = create_test_results(10);
458        let results_large = create_test_results(150);
459        let metadata = HashMap::new();
460
461        cache
462            .cache_results("small".to_string(), results_small, metadata.clone())
463            .unwrap();
464        cache
465            .cache_results("large".to_string(), results_large, metadata)
466            .unwrap();
467
468        let insights = cache.get_insights();
469        assert_eq!(insights.total_entries, 2);
470        assert_eq!(insights.compressed_entries, 1);
471        assert!(insights.memory_usage > 0);
472    }
473}