hyperscan_tokio/
cache.rs

1// src/cache.rs - Proper implementation of database caching
2use crate::{Database, Pattern, Mode, Platform, Result};
3use dashmap::DashMap;
4use std::sync::Arc;
5use std::time::{Duration, Instant};
6use std::hash::{Hash, Hasher};
7use std::collections::hash_map::DefaultHasher;
8use parking_lot::Mutex;
9
10/// Cacheable pattern configuration
11#[derive(Clone, Debug, Hash, PartialEq, Eq)]
12pub struct CacheKey {
13    patterns: Vec<CachePattern>,
14    mode: u32,
15    platform_hash: u64,
16}
17
18#[derive(Clone, Debug, Hash, PartialEq, Eq)]
19struct CachePattern {
20    expression: String,
21    id: u32,
22    flags: u32,
23    min_offset: Option<u64>,
24    max_offset: Option<u64>,
25    min_length: Option<u64>,
26    edit_distance: Option<u32>,
27    hamming_distance: Option<u32>,
28}
29
30impl From<&Pattern> for CachePattern {
31    fn from(p: &Pattern) -> Self {
32        Self {
33            expression: p.expression.clone(),
34            id: p.id,
35            flags: p.flags.bits(),
36            min_offset: p.min_offset,
37            max_offset: p.max_offset,
38            min_length: p.min_length,
39            edit_distance: p.edit_distance,
40            hamming_distance: p.hamming_distance,
41        }
42    }
43}
44
45/// Cache entry with metadata
46#[derive(Clone)]
47struct CacheEntry {
48    database: Arc<Database>,
49    created_at: Instant,
50    access_count: u64,
51    last_accessed: Instant,
52    size_bytes: usize,
53}
54
55/// Thread-safe database cache with proper key generation
56pub struct DatabaseCache {
57    cache: Arc<DashMap<CacheKey, CacheEntry>>,
58    max_size: usize,
59    max_memory_bytes: usize,
60    ttl: Option<Duration>,
61    current_memory_usage: Arc<Mutex<usize>>,
62}
63
64/// Builder for database cache
65#[derive(Default)]
66pub struct DatabaseCacheBuilder {
67    max_size: Option<usize>,
68    max_memory_bytes: Option<usize>,
69    ttl: Option<Duration>,
70}
71
72impl DatabaseCacheBuilder {
73    pub fn max_size(mut self, size: usize) -> Self {
74        self.max_size = Some(size);
75        self
76    }
77    
78    pub fn max_memory_bytes(mut self, bytes: usize) -> Self {
79        self.max_memory_bytes = Some(bytes);
80        self
81    }
82    
83    pub fn ttl(mut self, duration: Duration) -> Self {
84        self.ttl = Some(duration);
85        self
86    }
87    
88    pub fn build(self) -> DatabaseCache {
89        DatabaseCache {
90            cache: Arc::new(DashMap::new()),
91            max_size: self.max_size.unwrap_or(1000),
92            max_memory_bytes: self.max_memory_bytes.unwrap_or(512 * 1024 * 1024),
93            ttl: self.ttl,
94            current_memory_usage: Arc::new(Mutex::new(0)),
95        }
96    }
97}
98
99impl DatabaseCache {
100    pub fn new() -> Self {
101        Self {
102            cache: Arc::new(DashMap::new()),
103            max_size: 1000,
104            max_memory_bytes: 512 * 1024 * 1024, // 512MB default
105            ttl: Some(Duration::from_secs(3600)), // 1 hour default
106            current_memory_usage: Arc::new(Mutex::new(0)),
107        }
108    }
109    
110    /// Create a cache with custom settings
111    pub fn builder() -> DatabaseCacheBuilder {
112        DatabaseCacheBuilder::default()
113    }
114    
115    /// Create cache key from patterns and configuration
116    pub fn create_key(patterns: &[Pattern], mode: Mode, platform: Option<&Platform>) -> CacheKey {
117        let mut cache_patterns: Vec<CachePattern> = patterns
118            .iter()
119            .map(|p| p.into())
120            .collect();
121        
122        // Sort for consistent hashing
123        cache_patterns.sort_by(|a, b| a.id.cmp(&b.id));
124        
125        let platform_hash = platform
126            .map(|p| {
127                let mut hasher = DefaultHasher::new();
128                p.tune.hash(&mut hasher);
129                p.cpu_features.hash(&mut hasher);
130                hasher.finish()
131            })
132            .unwrap_or(0);
133        
134        CacheKey {
135            patterns: cache_patterns,
136            mode: mode.bits(),
137            platform_hash,
138        }
139    }
140    
141    /// Get or compile a database
142    pub async fn get_or_compile<F>(
143        &self,
144        key: CacheKey,
145        compile_fn: F,
146    ) -> Result<Arc<Database>>
147    where
148        F: FnOnce() -> Result<Database>,
149    {
150        // Check cache first
151        if let Some(db) = self.get(&key) {
152            return Ok(db);
153        }
154        
155        // Compile if not found
156        let db = compile_fn()?;
157        let db_arc = Arc::new(db);
158        
159        // Insert into cache
160        self.insert(key, db_arc.clone());
161        
162        Ok(db_arc)
163    }
164    
165    /// Get database by key
166    pub fn get(&self, key: &CacheKey) -> Option<Arc<Database>> {
167        let mut should_remove = false;
168        let result = self.cache.get_mut(key).and_then(|mut entry| {
169            // Check TTL
170            if let Some(ttl) = self.ttl {
171                if entry.created_at.elapsed() > ttl {
172                    should_remove = true;
173                    return None;
174                }
175            }
176            
177            // Update access metadata
178            entry.access_count += 1;
179            entry.last_accessed = Instant::now();
180            
181            Some(entry.database.clone())
182        });
183        
184        if should_remove {
185            if let Some((_, entry)) = self.cache.remove(key) {
186                *self.current_memory_usage.lock() -= entry.size_bytes;
187            }
188        }
189        
190        result
191    }
192    
193    /// Insert a compiled database
194    pub fn insert(&self, key: CacheKey, database: Arc<Database>) {
195        let size = database.size().unwrap_or(0);
196        
197        // Check memory limit
198        let mut current_usage = self.current_memory_usage.lock();
199        if *current_usage + size > self.max_memory_bytes {
200            drop(current_usage);
201            self.evict_until_space(size);
202        } else {
203            *current_usage += size;
204        }
205        
206        // Check item count limit
207        if self.cache.len() >= self.max_size {
208            self.evict_lru();
209        }
210        
211        let entry = CacheEntry {
212            database,
213            created_at: Instant::now(),
214            access_count: 1,
215            last_accessed: Instant::now(),
216            size_bytes: size,
217        };
218        
219        self.cache.insert(key, entry);
220    }
221    
222    /// Clear the cache
223    pub fn clear(&self) {
224        self.cache.clear();
225        *self.current_memory_usage.lock() = 0;
226    }
227    
228    /// Get cache statistics
229    pub fn stats(&self) -> CacheStats {
230        let mut total_hits = 0u64;
231        let mut total_size = 0usize;
232        
233        for entry in self.cache.iter() {
234            total_hits += entry.access_count;
235            total_size += entry.size_bytes;
236        }
237        
238        CacheStats {
239            entries: self.cache.len(),
240            capacity: self.max_size,
241            memory_usage_bytes: *self.current_memory_usage.lock(),
242            memory_limit_bytes: self.max_memory_bytes,
243            total_hits,
244        }
245    }
246    
247    /// Evict least recently used entries until we have space
248    fn evict_until_space(&self, needed_bytes: usize) {
249        let mut entries: Vec<_> = self.cache
250            .iter()
251            .map(|entry| (entry.key().clone(), entry.last_accessed, entry.size_bytes))
252            .collect();
253        
254        // Sort by last accessed time (oldest first)
255        entries.sort_by_key(|(_, last_accessed, _)| *last_accessed);
256        
257        let mut freed = 0;
258        let mut memory_usage = self.current_memory_usage.lock();
259        
260        for (key, _, size) in entries {
261            // Check if we still need to free more space
262            if freed >= needed_bytes && *memory_usage + needed_bytes <= self.max_memory_bytes {
263                break;
264            }
265            
266            if let Some((_, entry)) = self.cache.remove(&key) {
267                freed += entry.size_bytes;
268                *memory_usage = memory_usage.saturating_sub(entry.size_bytes);
269            }
270        }
271    }
272    
273    /// Evict least recently used entry
274    fn evict_lru(&self) {
275        if let Some((key, _)) = self.cache
276            .iter()
277            .min_by_key(|entry| entry.value().last_accessed)
278            .map(|entry| (entry.key().clone(), entry.value().clone()))
279        {
280            if let Some((_, entry)) = self.cache.remove(&key) {
281                *self.current_memory_usage.lock() -= entry.size_bytes;
282            }
283        }
284    }
285}
286
287/// Enhanced cache statistics
288#[derive(Debug, Clone)]
289pub struct CacheStats {
290    pub entries: usize,
291    pub capacity: usize,
292    pub memory_usage_bytes: usize,
293    pub memory_limit_bytes: usize,
294    pub total_hits: u64,
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use crate::{DatabaseBuilder, prelude::*};
301    
302    #[test]
303    fn test_cache_key_generation() {
304        let patterns = vec![
305            Pattern::new(r"\d+").id(1).build().unwrap(),
306            Pattern::new(r"[a-z]+").id(2).flags(Flags::CASELESS).build().unwrap(),
307        ];
308        
309        let key1 = DatabaseCache::create_key(&patterns, Mode::BLOCK, None);
310        let key2 = DatabaseCache::create_key(&patterns, Mode::BLOCK, None);
311        
312        assert_eq!(key1, key2);
313        
314        // Different mode should produce different key
315        let key3 = DatabaseCache::create_key(&patterns, Mode::STREAM, None);
316        assert_ne!(key1, key3);
317    }
318    
319    #[tokio::test]
320    async fn test_cache_functionality() {
321        let cache = DatabaseCache::new();
322        
323        let patterns = vec![
324            Pattern::new(r"\d+").id(1).build().unwrap(),
325        ];
326        
327        let key = DatabaseCache::create_key(&patterns, Mode::BLOCK, None);
328        
329        // First access - compile
330        let db1 = cache.get_or_compile(key.clone(), || {
331            DatabaseBuilder::new()
332                .add_pattern(patterns[0].clone())
333                .build()
334        }).await.unwrap();
335        
336        // Second access - should hit cache
337        let db2 = cache.get(&key).unwrap();
338        
339        assert_eq!(db1.fingerprint(), db2.fingerprint());
340        
341        let stats = cache.stats();
342        assert_eq!(stats.entries, 1);
343        assert_eq!(stats.total_hits, 2); // Initial insert + one get
344    }
345    
346    #[tokio::test]
347    async fn test_ttl_expiration() {
348        let cache = DatabaseCache::builder()
349            .ttl(Duration::from_millis(100))
350            .build();
351        
352        let patterns = vec![
353            Pattern::new(r"\d+").id(1).build().unwrap(),
354        ];
355        
356        let key = DatabaseCache::create_key(&patterns, Mode::BLOCK, None);
357        
358        // Insert into cache
359        let db = cache.get_or_compile(key.clone(), || {
360            DatabaseBuilder::new()
361                .add_pattern(patterns[0].clone())
362                .build()
363        }).await.unwrap();
364        
365        // Should be in cache
366        assert!(cache.get(&key).is_some());
367        
368        // Wait for TTL to expire
369        tokio::time::sleep(Duration::from_millis(150)).await;
370        
371        // Should be expired and removed
372        assert!(cache.get(&key).is_none());
373        
374        // Memory should be freed
375        let stats = cache.stats();
376        assert_eq!(stats.entries, 0);
377        assert_eq!(stats.memory_usage_bytes, 0);
378    }
379    
380    #[test]
381    fn test_memory_limit() {
382        let cache = DatabaseCache::builder()
383            .max_memory_bytes(1024) // Very small limit
384            .build();
385        
386        // Create multiple patterns
387        for i in 0..10 {
388            let patterns = vec![
389                Pattern::new(&format!("pattern{}", i)).id(i).build().unwrap(),
390            ];
391            
392            let key = DatabaseCache::create_key(&patterns, Mode::BLOCK, None);
393            let db = DatabaseBuilder::new()
394                .add_pattern(patterns[0].clone())
395                .build()
396                .unwrap();
397            
398            cache.insert(key, Arc::new(db));
399        }
400        
401        // Should have evicted some entries to stay under memory limit
402        let stats = cache.stats();
403        assert!(stats.memory_usage_bytes <= 1024);
404        assert!(stats.entries < 10);
405    }
406}