Skip to main content

tldr_cli/commands/daemon/
salsa.rs

1//! Salsa-style incremental computation cache
2//!
3//! Implements query memoization with automatic invalidation based on input changes.
4//! Uses DashMap for thread-safe concurrent access.
5//!
6//! # Design
7//!
8//! - Query results are keyed by `(query_name, args_hash)`
9//! - Each entry tracks which inputs (files) it depends on
10//! - When an input changes, all dependent queries are invalidated
11//! - LRU eviction when cache exceeds max entries
12//!
13//! # Security Mitigations
14//!
15//! - TIGER-P2-01: Atomic writes with checksum validation for persistence
16//! - Cache size limits to prevent OOM
17
18use std::collections::hash_map::DefaultHasher;
19use std::collections::HashSet;
20use std::fs::{self, File};
21use std::hash::{Hash, Hasher};
22use std::io::{BufReader, BufWriter, Read, Write};
23use std::path::Path;
24use std::sync::atomic::{AtomicU64, Ordering};
25use std::sync::RwLock;
26use std::time::SystemTime;
27
28use dashmap::DashMap;
29use serde::{de::DeserializeOwned, Deserialize, Serialize};
30
31use super::error::{DaemonError, DaemonResult};
32use super::types::SalsaCacheStats;
33
34// =============================================================================
35// Constants
36// =============================================================================
37
38/// Default maximum number of cache entries
39pub const DEFAULT_MAX_ENTRIES: usize = 10_000;
40
41/// Default maximum cache size in bytes (512 MB)
42pub const DEFAULT_MAX_BYTES: usize = 512 * 1024 * 1024;
43
44/// Magic bytes for cache file validation
45const CACHE_MAGIC: &[u8; 4] = b"TLDR";
46
47/// Cache file version
48const CACHE_VERSION: u8 = 1;
49
50// =============================================================================
51// Core Types
52// =============================================================================
53
54/// Key for looking up cached query results
55#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
56pub struct QueryKey {
57    /// Name of the query (e.g., "extract", "structure", "calls")
58    pub query_name: String,
59    /// Hash of the query arguments
60    pub args_hash: u64,
61}
62
63impl QueryKey {
64    /// Create a new query key
65    pub fn new(query_name: impl Into<String>, args_hash: u64) -> Self {
66        Self {
67            query_name: query_name.into(),
68            args_hash,
69        }
70    }
71}
72
73/// Cached query result with metadata for invalidation
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct CacheEntry {
76    /// Serialized result value (JSON bytes)
77    pub value: Vec<u8>,
78    /// Revision number when this entry was created
79    pub revision: u64,
80    /// Hashes of inputs this query depends on (for invalidation tracking)
81    pub input_hashes: Vec<u64>,
82    /// When this entry was created
83    #[serde(with = "system_time_serde")]
84    pub created_at: SystemTime,
85    /// Last access time (for LRU eviction)
86    #[serde(with = "system_time_serde")]
87    pub last_accessed: SystemTime,
88}
89
90impl CacheEntry {
91    /// Create a new cache entry
92    pub fn new(value: Vec<u8>, revision: u64, input_hashes: Vec<u64>) -> Self {
93        let now = SystemTime::now();
94        Self {
95            value,
96            revision,
97            input_hashes,
98            created_at: now,
99            last_accessed: now,
100        }
101    }
102
103    /// Estimated heap bytes used by this entry (value + input_hashes + overhead).
104    pub fn estimated_bytes(&self) -> usize {
105        self.value.len()
106            + self.input_hashes.len() * std::mem::size_of::<u64>()
107            + std::mem::size_of::<Self>()
108    }
109}
110
111/// Salsa-style query cache with automatic invalidation
112pub struct QueryCache {
113    /// Cached query results: QueryKey -> CacheEntry
114    entries: DashMap<QueryKey, CacheEntry>,
115    /// Reverse index: input_hash -> Set of QueryKeys that depend on it
116    dependents: DashMap<u64, HashSet<QueryKey>>,
117    /// Global revision counter (incremented on any input change)
118    revision: AtomicU64,
119    /// Cache statistics
120    stats: RwLock<SalsaCacheStats>,
121    /// Maximum number of entries before eviction
122    max_entries: usize,
123    /// Maximum total bytes before eviction
124    max_bytes: usize,
125    /// Current total estimated bytes across all entries
126    current_bytes: AtomicU64,
127}
128
129// =============================================================================
130// QueryCache Implementation
131// =============================================================================
132
133impl QueryCache {
134    /// Create a new query cache with the given max entries limit
135    pub fn new(max_entries: usize) -> Self {
136        Self::with_limits(max_entries, DEFAULT_MAX_BYTES)
137    }
138
139    /// Create a cache with explicit entry and byte limits
140    pub fn with_limits(max_entries: usize, max_bytes: usize) -> Self {
141        Self {
142            entries: DashMap::new(),
143            dependents: DashMap::new(),
144            revision: AtomicU64::new(0),
145            stats: RwLock::new(SalsaCacheStats::default()),
146            max_entries,
147            max_bytes,
148            current_bytes: AtomicU64::new(0),
149        }
150    }
151
152    /// Create a cache with default settings
153    pub fn with_defaults() -> Self {
154        Self::new(DEFAULT_MAX_ENTRIES)
155    }
156
157    /// Get a cached value if it exists and is valid
158    ///
159    /// Returns `None` if:
160    /// - The key doesn't exist in cache
161    /// - Deserialization fails
162    pub fn get<T: DeserializeOwned>(&self, key: &QueryKey) -> Option<T> {
163        if let Some(mut entry) = self.entries.get_mut(key) {
164            // Update last accessed time
165            entry.last_accessed = SystemTime::now();
166
167            // Record hit
168            if let Ok(mut stats) = self.stats.write() {
169                stats.hits += 1;
170            }
171
172            // Try to deserialize the value
173            match serde_json::from_slice(&entry.value) {
174                Ok(value) => Some(value),
175                Err(_) => {
176                    // Corrupted entry - remove it
177                    drop(entry);
178                    self.entries.remove(key);
179                    None
180                }
181            }
182        } else {
183            // Record miss
184            if let Ok(mut stats) = self.stats.write() {
185                stats.misses += 1;
186            }
187            None
188        }
189    }
190
191    /// Insert a value into the cache
192    ///
193    /// The `input_hashes` are used for invalidation - when any of these
194    /// inputs change, this entry will be invalidated.
195    pub fn insert<T: Serialize>(&self, key: QueryKey, value: &T, input_hashes: Vec<u64>) {
196        // Serialize the value
197        let serialized = match serde_json::to_vec(value) {
198            Ok(v) => v,
199            Err(_) => return, // Can't serialize - skip caching
200        };
201
202        let revision = self.revision.load(Ordering::Acquire);
203        let entry = CacheEntry::new(serialized, revision, input_hashes.clone());
204
205        // Track dependencies for invalidation
206        for &hash in &input_hashes {
207            self.dependents
208                .entry(hash)
209                .or_default()
210                .insert(key.clone());
211        }
212
213        // Track bytes: subtract old entry if replacing
214        if let Some(old) = self.entries.get(&key) {
215            self.current_bytes
216                .fetch_sub(old.estimated_bytes() as u64, Ordering::Relaxed);
217        }
218
219        // Track bytes for new entry
220        self.current_bytes
221            .fetch_add(entry.estimated_bytes() as u64, Ordering::Relaxed);
222
223        // Insert the entry
224        self.entries.insert(key, entry);
225
226        // Evict if over entry count OR byte limit
227        self.maybe_evict();
228    }
229
230    /// Invalidate all cache entries that depend on the given input
231    ///
232    /// Returns the number of entries invalidated.
233    pub fn invalidate_by_input(&self, input_hash: u64) -> usize {
234        // Increment global revision
235        self.revision.fetch_add(1, Ordering::Release);
236
237        let mut invalidated = 0;
238
239        // Remove all entries that depend on this input
240        if let Some((_, keys)) = self.dependents.remove(&input_hash) {
241            for key in keys {
242                if let Some((_, entry)) = self.entries.remove(&key) {
243                    self.current_bytes
244                        .fetch_sub(entry.estimated_bytes() as u64, Ordering::Relaxed);
245                    invalidated += 1;
246                }
247            }
248        }
249
250        // Update stats
251        if let Ok(mut stats) = self.stats.write() {
252            stats.invalidations += invalidated as u64;
253        }
254
255        invalidated
256    }
257
258    /// Invalidate a cache entry by key
259    ///
260    /// Returns true if an entry was removed.
261    pub fn invalidate(&self, key: &QueryKey) -> bool {
262        if let Some((_, entry)) = self.entries.remove(key) {
263            // Track bytes removed
264            self.current_bytes
265                .fetch_sub(entry.estimated_bytes() as u64, Ordering::Relaxed);
266
267            // Clean up dependent tracking
268            for hash in entry.input_hashes {
269                if let Some(mut deps) = self.dependents.get_mut(&hash) {
270                    deps.remove(key);
271                }
272            }
273
274            if let Ok(mut stats) = self.stats.write() {
275                stats.invalidations += 1;
276            }
277
278            true
279        } else {
280            false
281        }
282    }
283
284    /// Get cache statistics
285    pub fn stats(&self) -> SalsaCacheStats {
286        self.stats.read().map(|s| s.clone()).unwrap_or_default()
287    }
288
289    /// Get current number of entries
290    pub fn len(&self) -> usize {
291        self.entries.len()
292    }
293
294    /// Check if cache is empty
295    pub fn is_empty(&self) -> bool {
296        self.entries.is_empty()
297    }
298
299    /// Get current revision number
300    pub fn revision(&self) -> u64 {
301        self.revision.load(Ordering::Acquire)
302    }
303
304    /// Clear all cache entries
305    pub fn clear(&self) {
306        self.entries.clear();
307        self.dependents.clear();
308        self.revision.store(0, Ordering::Release);
309        self.current_bytes.store(0, Ordering::Relaxed);
310
311        if let Ok(mut stats) = self.stats.write() {
312            *stats = SalsaCacheStats::default();
313        }
314    }
315
316    /// Total estimated bytes currently used by cached entries
317    pub fn total_bytes(&self) -> usize {
318        self.current_bytes.load(Ordering::Relaxed) as usize
319    }
320
321    /// Evict oldest entries if cache exceeds entry count or byte limit
322    fn maybe_evict(&self) {
323        let over_entries = self.entries.len() > self.max_entries;
324        let over_bytes = self.total_bytes() > self.max_bytes;
325
326        if !over_entries && !over_bytes {
327            return;
328        }
329
330        // Collect entries with their last access times and sizes
331        let mut entries_by_time: Vec<(QueryKey, SystemTime, usize)> = self
332            .entries
333            .iter()
334            .map(|e| {
335                (
336                    e.key().clone(),
337                    e.value().last_accessed,
338                    e.value().estimated_bytes(),
339                )
340            })
341            .collect();
342
343        // Sort by last accessed time (oldest first)
344        entries_by_time.sort_by(|a, b| a.1.cmp(&b.1));
345
346        // Evict oldest entries until we're under BOTH limits
347        for (key, _, _) in entries_by_time {
348            if self.entries.len() <= self.max_entries && self.total_bytes() <= self.max_bytes {
349                break;
350            }
351            self.invalidate(&key);
352        }
353    }
354
355    // =========================================================================
356    // Persistence
357    // =========================================================================
358
359    /// Save cache to a file with atomic write and checksum validation
360    ///
361    /// TIGER-P2-01: Uses write-to-temp + rename pattern for atomic writes.
362    pub fn save_to_file(&self, path: &Path) -> DaemonResult<()> {
363        // Collect entries for serialization
364        let entries: Vec<(QueryKey, CacheEntry)> = self
365            .entries
366            .iter()
367            .map(|e| (e.key().clone(), e.value().clone()))
368            .collect();
369
370        let dependents: Vec<(u64, Vec<QueryKey>)> = self
371            .dependents
372            .iter()
373            .map(|e| (*e.key(), e.value().iter().cloned().collect()))
374            .collect();
375
376        let stats = self.stats();
377        let revision = self.revision();
378
379        let cache_data = CacheFileData {
380            entries,
381            dependents,
382            stats,
383            revision,
384        };
385
386        // Serialize to JSON
387        let json = serde_json::to_vec(&cache_data)?;
388
389        // Calculate checksum
390        let checksum = calculate_checksum(&json);
391
392        // Write to temp file first (atomic write pattern)
393        let temp_path = path.with_extension("tmp");
394        {
395            let file = File::create(&temp_path)?;
396            let mut writer = BufWriter::new(file);
397
398            // Write header: magic + version + checksum
399            writer.write_all(CACHE_MAGIC)?;
400            writer.write_all(&[CACHE_VERSION])?;
401            writer.write_all(&checksum.to_le_bytes())?;
402            writer.write_all(&json)?;
403            writer.flush()?;
404        }
405
406        // Atomic rename
407        fs::rename(&temp_path, path)?;
408
409        Ok(())
410    }
411
412    /// Load cache from a file with checksum validation
413    pub fn load_from_file(path: &Path) -> DaemonResult<Self> {
414        let file = File::open(path)?;
415        let mut reader = BufReader::new(file);
416
417        // Read and validate header
418        let mut magic = [0u8; 4];
419        reader.read_exact(&mut magic)?;
420        if &magic != CACHE_MAGIC {
421            return Err(DaemonError::InvalidMessage(
422                "invalid cache file magic".to_string(),
423            ));
424        }
425
426        let mut version = [0u8; 1];
427        reader.read_exact(&mut version)?;
428        if version[0] != CACHE_VERSION {
429            return Err(DaemonError::InvalidMessage(format!(
430                "unsupported cache version: {}",
431                version[0]
432            )));
433        }
434
435        let mut checksum_bytes = [0u8; 8];
436        reader.read_exact(&mut checksum_bytes)?;
437        let stored_checksum = u64::from_le_bytes(checksum_bytes);
438
439        // Read remaining data
440        let mut data = Vec::new();
441        reader.read_to_end(&mut data)?;
442
443        // Validate checksum
444        let actual_checksum = calculate_checksum(&data);
445        if stored_checksum != actual_checksum {
446            return Err(DaemonError::InvalidMessage(
447                "cache file checksum mismatch".to_string(),
448            ));
449        }
450
451        // Deserialize
452        let cache_data: CacheFileData = serde_json::from_slice(&data)?;
453
454        // Reconstruct cache
455        let cache = Self::with_defaults();
456
457        let mut total_bytes: u64 = 0;
458        for (key, entry) in cache_data.entries {
459            total_bytes += entry.estimated_bytes() as u64;
460            cache.entries.insert(key, entry);
461        }
462        cache.current_bytes.store(total_bytes, Ordering::Relaxed);
463
464        for (hash, keys) in cache_data.dependents {
465            cache.dependents.insert(hash, keys.into_iter().collect());
466        }
467
468        cache.revision.store(cache_data.revision, Ordering::Release);
469
470        if let Ok(mut stats) = cache.stats.write() {
471            *stats = cache_data.stats;
472        }
473
474        Ok(cache)
475    }
476}
477
478impl Default for QueryCache {
479    fn default() -> Self {
480        Self::with_defaults()
481    }
482}
483
484// =============================================================================
485// Helper Types
486// =============================================================================
487
488/// Serializable cache file data
489#[derive(Serialize, Deserialize)]
490struct CacheFileData {
491    entries: Vec<(QueryKey, CacheEntry)>,
492    dependents: Vec<(u64, Vec<QueryKey>)>,
493    stats: SalsaCacheStats,
494    revision: u64,
495}
496
497/// Calculate a checksum for data validation
498fn calculate_checksum(data: &[u8]) -> u64 {
499    let mut hasher = DefaultHasher::new();
500    data.hash(&mut hasher);
501    hasher.finish()
502}
503
504/// Serde module for SystemTime
505mod system_time_serde {
506    use serde::{Deserialize, Deserializer, Serialize, Serializer};
507    use std::time::{Duration, SystemTime, UNIX_EPOCH};
508
509    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
510    where
511        S: Serializer,
512    {
513        let duration = time.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
514        duration.as_secs().serialize(serializer)
515    }
516
517    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
518    where
519        D: Deserializer<'de>,
520    {
521        let secs = u64::deserialize(deserializer)?;
522        Ok(UNIX_EPOCH + Duration::from_secs(secs))
523    }
524}
525
526// =============================================================================
527// Utility Functions
528// =============================================================================
529
530/// Hash arguments for cache key generation
531pub fn hash_args<T: Hash>(args: &T) -> u64 {
532    let mut hasher = DefaultHasher::new();
533    args.hash(&mut hasher);
534    hasher.finish()
535}
536
537/// Hash a file path for input tracking
538pub fn hash_path(path: &Path) -> u64 {
539    let mut hasher = DefaultHasher::new();
540    path.hash(&mut hasher);
541    hasher.finish()
542}
543
544// =============================================================================
545// Tests
546// =============================================================================
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551    use tempfile::tempdir;
552
553    #[test]
554    fn test_query_cache_new() {
555        let cache = QueryCache::new(100);
556        assert_eq!(cache.max_entries, 100);
557        assert!(cache.is_empty());
558        assert_eq!(cache.revision(), 0);
559    }
560
561    #[test]
562    fn test_query_cache_insert_and_get() {
563        let cache = QueryCache::new(100);
564        let key = QueryKey::new("test", 12345);
565        let value = vec!["hello", "world"];
566
567        cache.insert(key.clone(), &value, vec![]);
568
569        let result: Option<Vec<String>> = cache.get(&key);
570        assert!(result.is_some());
571        assert_eq!(result.unwrap(), vec!["hello", "world"]);
572    }
573
574    #[test]
575    fn test_query_cache_miss() {
576        let cache = QueryCache::new(100);
577        let key = QueryKey::new("nonexistent", 99999);
578
579        let result: Option<String> = cache.get(&key);
580        assert!(result.is_none());
581
582        let stats = cache.stats();
583        assert_eq!(stats.misses, 1);
584        assert_eq!(stats.hits, 0);
585    }
586
587    #[test]
588    fn test_query_cache_hit_tracking() {
589        let cache = QueryCache::new(100);
590        let key = QueryKey::new("test", 12345);
591        cache.insert(key.clone(), &"value", vec![]);
592
593        // First get - hit
594        let _: Option<String> = cache.get(&key);
595        // Second get - hit
596        let _: Option<String> = cache.get(&key);
597
598        let stats = cache.stats();
599        assert_eq!(stats.hits, 2);
600    }
601
602    #[test]
603    fn test_query_cache_invalidate_by_input() {
604        let cache = QueryCache::new(100);
605        let input_hash = hash_path(Path::new("/test/file.rs"));
606
607        // Insert entries that depend on the input
608        let key1 = QueryKey::new("query1", 1);
609        let key2 = QueryKey::new("query2", 2);
610        let key3 = QueryKey::new("query3", 3); // No dependency
611
612        cache.insert(key1.clone(), &"value1", vec![input_hash]);
613        cache.insert(key2.clone(), &"value2", vec![input_hash]);
614        cache.insert(key3.clone(), &"value3", vec![]);
615
616        assert_eq!(cache.len(), 3);
617
618        // Invalidate by input
619        let invalidated = cache.invalidate_by_input(input_hash);
620        assert_eq!(invalidated, 2);
621        assert_eq!(cache.len(), 1);
622
623        // key3 should still be accessible
624        let result: Option<String> = cache.get(&key3);
625        assert!(result.is_some());
626
627        // key1 and key2 should be gone
628        let result: Option<String> = cache.get(&key1);
629        assert!(result.is_none());
630    }
631
632    #[test]
633    fn test_query_cache_invalidation_stats() {
634        let cache = QueryCache::new(100);
635        let key = QueryKey::new("test", 1);
636        cache.insert(key.clone(), &"value", vec![12345]);
637
638        cache.invalidate_by_input(12345);
639
640        let stats = cache.stats();
641        assert_eq!(stats.invalidations, 1);
642    }
643
644    #[test]
645    fn test_query_cache_clear() {
646        let cache = QueryCache::new(100);
647
648        // Insert some entries
649        cache.insert(QueryKey::new("q1", 1), &"v1", vec![]);
650        cache.insert(QueryKey::new("q2", 2), &"v2", vec![]);
651
652        assert_eq!(cache.len(), 2);
653
654        cache.clear();
655
656        assert!(cache.is_empty());
657        assert_eq!(cache.revision(), 0);
658    }
659
660    #[test]
661    fn test_query_cache_lru_eviction() {
662        let cache = QueryCache::new(3); // Max 3 entries
663
664        // Insert 4 entries
665        cache.insert(QueryKey::new("q1", 1), &"v1", vec![]);
666        std::thread::sleep(std::time::Duration::from_millis(10));
667        cache.insert(QueryKey::new("q2", 2), &"v2", vec![]);
668        std::thread::sleep(std::time::Duration::from_millis(10));
669        cache.insert(QueryKey::new("q3", 3), &"v3", vec![]);
670        std::thread::sleep(std::time::Duration::from_millis(10));
671
672        // Access q1 to make it recently used
673        let _: Option<String> = cache.get(&QueryKey::new("q1", 1));
674        std::thread::sleep(std::time::Duration::from_millis(10));
675
676        // Insert q4 - should evict q2 (oldest accessed)
677        cache.insert(QueryKey::new("q4", 4), &"v4", vec![]);
678
679        assert!(cache.len() <= 3);
680
681        // q1 should still exist (was accessed recently)
682        let result: Option<String> = cache.get(&QueryKey::new("q1", 1));
683        assert!(result.is_some());
684    }
685
686    #[test]
687    fn test_query_cache_persistence() {
688        let dir = tempdir().unwrap();
689        let cache_path = dir.path().join("test_cache.bin");
690
691        // Create and populate cache
692        let cache = QueryCache::new(100);
693        cache.insert(QueryKey::new("test", 12345), &"hello world", vec![1, 2, 3]);
694        cache.insert(QueryKey::new("test2", 67890), &vec![1, 2, 3], vec![]);
695
696        // Save to file
697        cache.save_to_file(&cache_path).unwrap();
698
699        // Load from file
700        let loaded = QueryCache::load_from_file(&cache_path).unwrap();
701
702        // Verify contents
703        assert_eq!(loaded.len(), 2);
704
705        let result: Option<String> = loaded.get(&QueryKey::new("test", 12345));
706        assert_eq!(result, Some("hello world".to_string()));
707
708        let result: Option<Vec<i32>> = loaded.get(&QueryKey::new("test2", 67890));
709        assert_eq!(result, Some(vec![1, 2, 3]));
710    }
711
712    #[test]
713    fn test_query_cache_persistence_checksum_validation() {
714        let dir = tempdir().unwrap();
715        let cache_path = dir.path().join("test_cache.bin");
716
717        // Create and save cache
718        let cache = QueryCache::new(100);
719        cache.insert(QueryKey::new("test", 1), &"value", vec![]);
720        cache.save_to_file(&cache_path).unwrap();
721
722        // Corrupt the file
723        let mut data = fs::read(&cache_path).unwrap();
724        if data.len() > 20 {
725            data[20] ^= 0xFF; // Flip some bits
726        }
727        fs::write(&cache_path, data).unwrap();
728
729        // Loading should fail due to checksum mismatch
730        let result = QueryCache::load_from_file(&cache_path);
731        assert!(result.is_err());
732    }
733
734    #[test]
735    fn test_hash_args() {
736        let args1 = ("query", "/path/to/file.rs", 42);
737        let args2 = ("query", "/path/to/file.rs", 42);
738        let args3 = ("query", "/path/to/other.rs", 42);
739
740        assert_eq!(hash_args(&args1), hash_args(&args2));
741        assert_ne!(hash_args(&args1), hash_args(&args3));
742    }
743
744    #[test]
745    fn test_hash_path() {
746        let path1 = Path::new("/foo/bar.rs");
747        let path2 = Path::new("/foo/bar.rs");
748        let path3 = Path::new("/foo/baz.rs");
749
750        assert_eq!(hash_path(path1), hash_path(path2));
751        assert_ne!(hash_path(path1), hash_path(path3));
752    }
753
754    #[test]
755    fn test_query_key_equality() {
756        let key1 = QueryKey::new("test", 12345);
757        let key2 = QueryKey::new("test", 12345);
758        let key3 = QueryKey::new("test", 99999);
759        let key4 = QueryKey::new("other", 12345);
760
761        assert_eq!(key1, key2);
762        assert_ne!(key1, key3);
763        assert_ne!(key1, key4);
764    }
765
766    #[test]
767    fn test_cache_entry_creation() {
768        let entry = CacheEntry::new(vec![1, 2, 3], 5, vec![100, 200]);
769
770        assert_eq!(entry.value, vec![1, 2, 3]);
771        assert_eq!(entry.revision, 5);
772        assert_eq!(entry.input_hashes, vec![100, 200]);
773        assert!(entry.created_at <= SystemTime::now());
774        assert!(entry.last_accessed <= SystemTime::now());
775    }
776
777    #[test]
778    fn test_stats_hit_rate_calculation() {
779        let cache = QueryCache::new(100);
780
781        // No queries yet
782        let stats = cache.stats();
783        assert_eq!(stats.hit_rate(), 0.0);
784
785        // Insert and query
786        cache.insert(QueryKey::new("test", 1), &"value", vec![]);
787        let _: Option<String> = cache.get(&QueryKey::new("test", 1)); // hit
788        let _: Option<String> = cache.get(&QueryKey::new("test", 2)); // miss
789        let _: Option<String> = cache.get(&QueryKey::new("test", 1)); // hit
790
791        let stats = cache.stats();
792        assert_eq!(stats.hits, 2);
793        assert_eq!(stats.misses, 1);
794        // hit_rate = 2 / 3 * 100 = 66.67
795        assert!((stats.hit_rate() - 66.67).abs() < 0.1);
796    }
797
798    #[test]
799    fn test_revision_increments_on_invalidation() {
800        let cache = QueryCache::new(100);
801        assert_eq!(cache.revision(), 0);
802
803        cache.invalidate_by_input(12345);
804        assert_eq!(cache.revision(), 1);
805
806        cache.invalidate_by_input(67890);
807        assert_eq!(cache.revision(), 2);
808    }
809
810    #[test]
811    fn test_multiple_entries_same_input() {
812        let cache = QueryCache::new(100);
813        let shared_input = 12345u64;
814
815        // Multiple queries depend on the same input
816        cache.insert(QueryKey::new("q1", 1), &"v1", vec![shared_input]);
817        cache.insert(QueryKey::new("q2", 2), &"v2", vec![shared_input]);
818        cache.insert(QueryKey::new("q3", 3), &"v3", vec![shared_input]);
819
820        assert_eq!(cache.len(), 3);
821
822        // Invalidating the shared input should remove all three
823        let count = cache.invalidate_by_input(shared_input);
824        assert_eq!(count, 3);
825        assert!(cache.is_empty());
826    }
827
828    #[test]
829    fn test_entry_with_multiple_inputs() {
830        let cache = QueryCache::new(100);
831        let input1 = 111u64;
832        let input2 = 222u64;
833
834        // Entry depends on multiple inputs
835        cache.insert(QueryKey::new("q1", 1), &"v1", vec![input1, input2]);
836
837        // Invalidating either input should remove the entry
838        assert_eq!(cache.len(), 1);
839        cache.invalidate_by_input(input1);
840        assert!(cache.is_empty());
841    }
842
843    // =========================================================================
844    // Memory-bounded cache tests
845    // =========================================================================
846
847    #[test]
848    fn test_total_bytes_tracking() {
849        let cache = QueryCache::new(100);
850        assert_eq!(cache.total_bytes(), 0);
851
852        // Insert a value and check bytes increased
853        cache.insert(QueryKey::new("q1", 1), &"hello", vec![]);
854        let bytes_after_one = cache.total_bytes();
855        assert!(bytes_after_one > 0, "total_bytes should increase after insert");
856
857        // Insert another and check it increased further
858        cache.insert(QueryKey::new("q2", 2), &"world", vec![]);
859        let bytes_after_two = cache.total_bytes();
860        assert!(
861            bytes_after_two > bytes_after_one,
862            "total_bytes should increase with more entries"
863        );
864
865        // Clear and check it resets
866        cache.clear();
867        assert_eq!(cache.total_bytes(), 0);
868    }
869
870    #[test]
871    fn test_bytes_decrease_on_invalidate() {
872        let cache = QueryCache::new(100);
873        cache.insert(QueryKey::new("q1", 1), &"value1", vec![]);
874        cache.insert(QueryKey::new("q2", 2), &"value2", vec![]);
875        let bytes_before = cache.total_bytes();
876
877        cache.invalidate(&QueryKey::new("q1", 1));
878        let bytes_after = cache.total_bytes();
879        assert!(
880            bytes_after < bytes_before,
881            "total_bytes should decrease after invalidation"
882        );
883    }
884
885    #[test]
886    fn test_bytes_decrease_on_invalidate_by_input() {
887        let cache = QueryCache::new(100);
888        let input_hash = 42u64;
889
890        cache.insert(QueryKey::new("q1", 1), &"value1", vec![input_hash]);
891        cache.insert(QueryKey::new("q2", 2), &"value2", vec![input_hash]);
892        let bytes_before = cache.total_bytes();
893        assert!(bytes_before > 0);
894
895        cache.invalidate_by_input(input_hash);
896        assert_eq!(
897            cache.total_bytes(),
898            0,
899            "total_bytes should be 0 after all entries invalidated"
900        );
901    }
902
903    #[test]
904    fn test_byte_limit_eviction() {
905        // Set a very small byte limit (1 KB)
906        let cache = QueryCache::with_limits(10_000, 1024);
907
908        // Insert entries until we exceed the byte limit
909        // Each entry with a 200-byte payload
910        let payload = "x".repeat(200);
911        for i in 0..20 {
912            cache.insert(QueryKey::new("q", i), &payload, vec![]);
913        }
914
915        // Cache should have evicted to stay under 1 KB
916        assert!(
917            cache.total_bytes() <= 1024,
918            "total_bytes ({}) should be <= 1024 after eviction",
919            cache.total_bytes()
920        );
921        assert!(
922            cache.len() < 20,
923            "entry count ({}) should be < 20 after byte-based eviction",
924            cache.len()
925        );
926    }
927
928    #[test]
929    fn test_large_entry_evicts_many_small() {
930        // 2 KB limit
931        let cache = QueryCache::with_limits(10_000, 2048);
932
933        // Insert 10 small entries (~50 bytes each)
934        for i in 0..10 {
935            cache.insert(QueryKey::new("small", i), &"tiny", vec![]);
936        }
937        let count_before = cache.len();
938        assert_eq!(count_before, 10);
939
940        // Insert one large entry (~1500 bytes)
941        let big_payload = "x".repeat(1500);
942        cache.insert(QueryKey::new("big", 0), &big_payload, vec![]);
943
944        // Should have evicted some small entries to make room
945        assert!(
946            cache.total_bytes() <= 2048,
947            "total_bytes ({}) should be <= 2048",
948            cache.total_bytes()
949        );
950        // The big entry should still be present (most recently inserted)
951        let result: Option<String> = cache.get(&QueryKey::new("big", 0));
952        assert!(result.is_some(), "large entry should survive eviction");
953    }
954
955    #[test]
956    fn test_byte_tracking_on_replace() {
957        let cache = QueryCache::new(100);
958
959        // Insert a small value
960        cache.insert(QueryKey::new("q1", 1), &"small", vec![]);
961        let bytes_small = cache.total_bytes();
962
963        // Replace with a large value
964        let big = "x".repeat(10_000);
965        cache.insert(QueryKey::new("q1", 1), &big, vec![]);
966        let bytes_big = cache.total_bytes();
967
968        assert!(
969            bytes_big > bytes_small,
970            "bytes should increase when replacing small with large"
971        );
972        assert_eq!(cache.len(), 1, "should still be one entry after replace");
973    }
974
975    #[test]
976    fn test_memory_bounded_cache_under_stress() {
977        // 100 KB limit
978        let cache = QueryCache::with_limits(10_000, 100 * 1024);
979
980        // Insert 1000 entries with varied sizes
981        for i in 0..1000u64 {
982            let size = ((i % 10) + 1) as usize * 100; // 100 to 1000 bytes
983            let payload = "x".repeat(size);
984            cache.insert(QueryKey::new("stress", i), &payload, vec![]);
985        }
986
987        // Cache must respect byte limit
988        assert!(
989            cache.total_bytes() <= 100 * 1024,
990            "total_bytes ({}) should be <= 102400 after stress test",
991            cache.total_bytes()
992        );
993
994        // Most recent entries should be accessible
995        let result: Option<String> = cache.get(&QueryKey::new("stress", 999));
996        assert!(result.is_some(), "most recent entry should be cached");
997    }
998
999    #[test]
1000    fn test_estimated_bytes_accuracy() {
1001        let small = CacheEntry::new(vec![1, 2, 3], 0, vec![]);
1002        let large = CacheEntry::new(vec![0u8; 10_000], 0, vec![1, 2, 3]);
1003
1004        assert!(small.estimated_bytes() < large.estimated_bytes());
1005        assert!(small.estimated_bytes() > 0);
1006        // Large entry should account for the 10K payload
1007        assert!(
1008            large.estimated_bytes() >= 10_000,
1009            "estimated_bytes ({}) should be >= payload size",
1010            large.estimated_bytes()
1011        );
1012    }
1013
1014    #[test]
1015    fn test_default_max_bytes() {
1016        let cache = QueryCache::with_defaults();
1017        assert_eq!(cache.max_bytes, DEFAULT_MAX_BYTES);
1018        assert_eq!(cache.max_bytes, 512 * 1024 * 1024); // 512 MB
1019    }
1020
1021    // =========================================================================
1022    // Property-based tests (proptest)
1023    // =========================================================================
1024
1025    mod proptest_cache {
1026        use super::*;
1027        use proptest::prelude::*;
1028
1029        /// Recompute total bytes by summing all entries — ground truth.
1030        fn recompute_bytes(cache: &QueryCache) -> usize {
1031            cache
1032                .entries
1033                .iter()
1034                .map(|e| e.value().estimated_bytes())
1035                .sum()
1036        }
1037
1038        /// Arbitrary cache operation
1039        #[derive(Debug, Clone)]
1040        enum CacheOp {
1041            Insert { key_id: u8, payload_len: usize, input_hash: u64 },
1042            InvalidateByInput(u64),
1043            InvalidateByKey(u8),
1044            Clear,
1045        }
1046
1047        fn arb_cache_op() -> impl Strategy<Value = CacheOp> {
1048            prop_oneof![
1049                (any::<u8>(), 0..2000usize, any::<u64>())
1050                    .prop_map(|(k, p, h)| CacheOp::Insert {
1051                        key_id: k,
1052                        payload_len: p,
1053                        input_hash: h % 16, // cluster hashes for overlap
1054                    }),
1055                (any::<u64>()).prop_map(|h| CacheOp::InvalidateByInput(h % 16)),
1056                (any::<u8>()).prop_map(CacheOp::InvalidateByKey),
1057                Just(CacheOp::Clear),
1058            ]
1059        }
1060
1061        proptest! {
1062            /// Invariant: tracked bytes == sum of all entry sizes after any
1063            /// sequence of insert/invalidate/clear operations.
1064            #[test]
1065            fn bytes_tracking_consistent(ops in prop::collection::vec(arb_cache_op(), 1..150)) {
1066                let cache = QueryCache::with_limits(500, 10_000_000);
1067
1068                for op in ops {
1069                    match op {
1070                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1071                            let key = QueryKey::new("prop", key_id as u64);
1072                            let payload = vec![0u8; payload_len];
1073                            cache.insert(key, &payload, vec![input_hash]);
1074                        }
1075                        CacheOp::InvalidateByInput(hash) => {
1076                            cache.invalidate_by_input(hash);
1077                        }
1078                        CacheOp::InvalidateByKey(key_id) => {
1079                            let key = QueryKey::new("prop", key_id as u64);
1080                            cache.invalidate(&key);
1081                        }
1082                        CacheOp::Clear => {
1083                            cache.clear();
1084                        }
1085                    }
1086                }
1087
1088                let tracked = cache.total_bytes();
1089                let actual = recompute_bytes(&cache);
1090                prop_assert_eq!(tracked, actual,
1091                    "tracked bytes ({}) != recomputed bytes ({})", tracked, actual);
1092            }
1093
1094            /// Invariant: entry count never exceeds max_entries after operations.
1095            #[test]
1096            fn entry_count_bounded(ops in prop::collection::vec(arb_cache_op(), 1..200)) {
1097                let max = 50;
1098                let cache = QueryCache::with_limits(max, 10_000_000);
1099
1100                for op in ops {
1101                    match op {
1102                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1103                            let key = QueryKey::new("prop", key_id as u64);
1104                            let payload = vec![0u8; payload_len];
1105                            cache.insert(key, &payload, vec![input_hash]);
1106                        }
1107                        CacheOp::InvalidateByInput(hash) => {
1108                            cache.invalidate_by_input(hash);
1109                        }
1110                        CacheOp::InvalidateByKey(key_id) => {
1111                            let key = QueryKey::new("prop", key_id as u64);
1112                            cache.invalidate(&key);
1113                        }
1114                        CacheOp::Clear => {
1115                            cache.clear();
1116                        }
1117                    }
1118                }
1119
1120                prop_assert!(cache.len() <= max,
1121                    "cache size {} exceeds max {}", cache.len(), max);
1122            }
1123
1124            /// Invariant: total bytes never exceeds max_bytes after operations.
1125            #[test]
1126            fn byte_limit_bounded(ops in prop::collection::vec(arb_cache_op(), 1..200)) {
1127                let max_bytes = 50_000;
1128                let cache = QueryCache::with_limits(500, max_bytes);
1129
1130                for op in ops {
1131                    match op {
1132                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1133                            let key = QueryKey::new("prop", key_id as u64);
1134                            let payload = vec![0u8; payload_len];
1135                            cache.insert(key, &payload, vec![input_hash]);
1136                        }
1137                        CacheOp::InvalidateByInput(hash) => {
1138                            cache.invalidate_by_input(hash);
1139                        }
1140                        CacheOp::InvalidateByKey(key_id) => {
1141                            let key = QueryKey::new("prop", key_id as u64);
1142                            cache.invalidate(&key);
1143                        }
1144                        CacheOp::Clear => {
1145                            cache.clear();
1146                        }
1147                    }
1148                }
1149
1150                prop_assert!(cache.total_bytes() <= max_bytes,
1151                    "total bytes {} exceeds max {}", cache.total_bytes(), max_bytes);
1152            }
1153
1154            /// Invariant: after clear(), cache is empty and bytes are zero.
1155            #[test]
1156            fn clear_resets_everything(
1157                inserts in prop::collection::vec((any::<u8>(), 0..500usize), 1..50)
1158            ) {
1159                let cache = QueryCache::with_limits(500, 10_000_000);
1160
1161                for (key_id, payload_len) in inserts {
1162                    let key = QueryKey::new("prop", key_id as u64);
1163                    cache.insert(key, &vec![0u8; payload_len], vec![]);
1164                }
1165
1166                cache.clear();
1167
1168                prop_assert_eq!(cache.len(), 0);
1169                prop_assert_eq!(cache.total_bytes(), 0);
1170                prop_assert_eq!(recompute_bytes(&cache), 0);
1171            }
1172
1173            /// Invariant: inserting same key twice updates bytes correctly
1174            /// (no double-counting).
1175            #[test]
1176            fn replace_in_place_no_leak(
1177                sizes in prop::collection::vec(0..5000usize, 2..20)
1178            ) {
1179                let cache = QueryCache::with_limits(500, 10_000_000);
1180                let key = QueryKey::new("same", 42);
1181
1182                for size in &sizes {
1183                    cache.insert(key.clone(), &vec![0u8; *size], vec![]);
1184                }
1185
1186                // Only one entry should exist
1187                prop_assert_eq!(cache.len(), 1);
1188                // Tracked bytes should match actual
1189                let tracked = cache.total_bytes();
1190                let actual = recompute_bytes(&cache);
1191                prop_assert_eq!(tracked, actual);
1192            }
1193        }
1194    }
1195}