Skip to main content

tldr_cli/commands/daemon/
salsa.rs

1//! Salsa-style incremental computation cache
2//!
3//! Implements query memoization with automatic invalidation based on input changes.
4//! Uses DashMap for thread-safe concurrent access.
5//!
6//! # Design
7//!
8//! - Query results are keyed by `(query_name, args_hash)`
9//! - Each entry tracks which inputs (files) it depends on
10//! - When an input changes, all dependent queries are invalidated
11//! - LRU eviction when cache exceeds max entries
12//!
13//! # Security Mitigations
14//!
15//! - TIGER-P2-01: Atomic writes with checksum validation for persistence
16//! - Cache size limits to prevent OOM
17
18use std::collections::hash_map::DefaultHasher;
19use std::collections::HashSet;
20use std::fs::{self, File};
21use std::hash::{Hash, Hasher};
22use std::io::{BufReader, BufWriter, Read, Write};
23use std::path::Path;
24use std::sync::atomic::{AtomicU64, Ordering};
25use std::sync::RwLock;
26use std::time::SystemTime;
27
28use dashmap::DashMap;
29use serde::{de::DeserializeOwned, Deserialize, Serialize};
30
31use super::error::{DaemonError, DaemonResult};
32use super::types::SalsaCacheStats;
33
34// =============================================================================
35// Constants
36// =============================================================================
37
38/// Default maximum number of cache entries
39pub const DEFAULT_MAX_ENTRIES: usize = 10_000;
40
41/// Default maximum cache size in bytes (512 MB)
42pub const DEFAULT_MAX_BYTES: usize = 512 * 1024 * 1024;
43
44/// Magic bytes for cache file validation
45const CACHE_MAGIC: &[u8; 4] = b"TLDR";
46
47/// Cache file version
48const CACHE_VERSION: u8 = 1;
49
50// =============================================================================
51// Core Types
52// =============================================================================
53
54/// Key for looking up cached query results
55#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
56pub struct QueryKey {
57    /// Name of the query (e.g., "extract", "structure", "calls")
58    pub query_name: String,
59    /// Hash of the query arguments
60    pub args_hash: u64,
61}
62
63impl QueryKey {
64    /// Create a new query key
65    pub fn new(query_name: impl Into<String>, args_hash: u64) -> Self {
66        Self {
67            query_name: query_name.into(),
68            args_hash,
69        }
70    }
71}
72
73/// Cached query result with metadata for invalidation
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct CacheEntry {
76    /// Serialized result value (JSON bytes)
77    pub value: Vec<u8>,
78    /// Revision number when this entry was created
79    pub revision: u64,
80    /// Hashes of inputs this query depends on (for invalidation tracking)
81    pub input_hashes: Vec<u64>,
82    /// When this entry was created
83    #[serde(with = "system_time_serde")]
84    pub created_at: SystemTime,
85    /// Last access time (for LRU eviction)
86    #[serde(with = "system_time_serde")]
87    pub last_accessed: SystemTime,
88}
89
90impl CacheEntry {
91    /// Create a new cache entry
92    pub fn new(value: Vec<u8>, revision: u64, input_hashes: Vec<u64>) -> Self {
93        let now = SystemTime::now();
94        Self {
95            value,
96            revision,
97            input_hashes,
98            created_at: now,
99            last_accessed: now,
100        }
101    }
102
103    /// Estimated heap bytes used by this entry (value + input_hashes + overhead).
104    pub fn estimated_bytes(&self) -> usize {
105        self.value.len()
106            + self.input_hashes.len() * std::mem::size_of::<u64>()
107            + std::mem::size_of::<Self>()
108    }
109}
110
111/// Salsa-style query cache with automatic invalidation
112pub struct QueryCache {
113    /// Cached query results: QueryKey -> CacheEntry
114    entries: DashMap<QueryKey, CacheEntry>,
115    /// Reverse index: input_hash -> Set of QueryKeys that depend on it
116    dependents: DashMap<u64, HashSet<QueryKey>>,
117    /// Global revision counter (incremented on any input change)
118    revision: AtomicU64,
119    /// Cache statistics
120    stats: RwLock<SalsaCacheStats>,
121    /// Maximum number of entries before eviction
122    max_entries: usize,
123    /// Maximum total bytes before eviction
124    max_bytes: usize,
125    /// Current total estimated bytes across all entries
126    current_bytes: AtomicU64,
127}
128
129// =============================================================================
130// QueryCache Implementation
131// =============================================================================
132
133impl QueryCache {
134    /// Create a new query cache with the given max entries limit
135    pub fn new(max_entries: usize) -> Self {
136        Self::with_limits(max_entries, DEFAULT_MAX_BYTES)
137    }
138
139    /// Create a cache with explicit entry and byte limits
140    pub fn with_limits(max_entries: usize, max_bytes: usize) -> Self {
141        Self {
142            entries: DashMap::new(),
143            dependents: DashMap::new(),
144            revision: AtomicU64::new(0),
145            stats: RwLock::new(SalsaCacheStats::default()),
146            max_entries,
147            max_bytes,
148            current_bytes: AtomicU64::new(0),
149        }
150    }
151
152    /// Create a cache with default settings
153    pub fn with_defaults() -> Self {
154        Self::new(DEFAULT_MAX_ENTRIES)
155    }
156
157    /// Get a cached value if it exists and is valid
158    ///
159    /// Returns `None` if:
160    /// - The key doesn't exist in cache
161    /// - Deserialization fails
162    pub fn get<T: DeserializeOwned>(&self, key: &QueryKey) -> Option<T> {
163        if let Some(mut entry) = self.entries.get_mut(key) {
164            // Update last accessed time
165            entry.last_accessed = SystemTime::now();
166
167            // Record hit
168            if let Ok(mut stats) = self.stats.write() {
169                stats.hits += 1;
170            }
171
172            // Try to deserialize the value
173            match serde_json::from_slice(&entry.value) {
174                Ok(value) => Some(value),
175                Err(_) => {
176                    // Corrupted entry - remove it
177                    drop(entry);
178                    self.entries.remove(key);
179                    None
180                }
181            }
182        } else {
183            // Record miss
184            if let Ok(mut stats) = self.stats.write() {
185                stats.misses += 1;
186            }
187            None
188        }
189    }
190
191    /// Insert a value into the cache
192    ///
193    /// The `input_hashes` are used for invalidation - when any of these
194    /// inputs change, this entry will be invalidated.
195    pub fn insert<T: Serialize>(&self, key: QueryKey, value: &T, input_hashes: Vec<u64>) {
196        // Serialize the value
197        let serialized = match serde_json::to_vec(value) {
198            Ok(v) => v,
199            Err(_) => return, // Can't serialize - skip caching
200        };
201
202        let revision = self.revision.load(Ordering::Acquire);
203        let entry = CacheEntry::new(serialized, revision, input_hashes.clone());
204
205        // Track dependencies for invalidation
206        for &hash in &input_hashes {
207            self.dependents.entry(hash).or_default().insert(key.clone());
208        }
209
210        // Track bytes: subtract old entry if replacing
211        if let Some(old) = self.entries.get(&key) {
212            self.current_bytes
213                .fetch_sub(old.estimated_bytes() as u64, Ordering::Relaxed);
214        }
215
216        // Track bytes for new entry
217        self.current_bytes
218            .fetch_add(entry.estimated_bytes() as u64, Ordering::Relaxed);
219
220        // Insert the entry
221        self.entries.insert(key, entry);
222
223        // Evict if over entry count OR byte limit
224        self.maybe_evict();
225    }
226
227    /// Invalidate all cache entries that depend on the given input
228    ///
229    /// Returns the number of entries invalidated.
230    pub fn invalidate_by_input(&self, input_hash: u64) -> usize {
231        // Increment global revision
232        self.revision.fetch_add(1, Ordering::Release);
233
234        let mut invalidated = 0;
235
236        // Remove all entries that depend on this input
237        if let Some((_, keys)) = self.dependents.remove(&input_hash) {
238            for key in keys {
239                if let Some((_, entry)) = self.entries.remove(&key) {
240                    self.current_bytes
241                        .fetch_sub(entry.estimated_bytes() as u64, Ordering::Relaxed);
242                    invalidated += 1;
243                }
244            }
245        }
246
247        // Update stats
248        if let Ok(mut stats) = self.stats.write() {
249            stats.invalidations += invalidated as u64;
250        }
251
252        invalidated
253    }
254
255    /// Invalidate a cache entry by key
256    ///
257    /// Returns true if an entry was removed.
258    pub fn invalidate(&self, key: &QueryKey) -> bool {
259        if let Some((_, entry)) = self.entries.remove(key) {
260            // Track bytes removed
261            self.current_bytes
262                .fetch_sub(entry.estimated_bytes() as u64, Ordering::Relaxed);
263
264            // Clean up dependent tracking
265            for hash in entry.input_hashes {
266                if let Some(mut deps) = self.dependents.get_mut(&hash) {
267                    deps.remove(key);
268                }
269            }
270
271            if let Ok(mut stats) = self.stats.write() {
272                stats.invalidations += 1;
273            }
274
275            true
276        } else {
277            false
278        }
279    }
280
281    /// Get cache statistics
282    pub fn stats(&self) -> SalsaCacheStats {
283        self.stats.read().map(|s| s.clone()).unwrap_or_default()
284    }
285
286    /// Get current number of entries
287    pub fn len(&self) -> usize {
288        self.entries.len()
289    }
290
291    /// Check if cache is empty
292    pub fn is_empty(&self) -> bool {
293        self.entries.is_empty()
294    }
295
296    /// Get current revision number
297    pub fn revision(&self) -> u64 {
298        self.revision.load(Ordering::Acquire)
299    }
300
301    /// Clear all cache entries
302    pub fn clear(&self) {
303        self.entries.clear();
304        self.dependents.clear();
305        self.revision.store(0, Ordering::Release);
306        self.current_bytes.store(0, Ordering::Relaxed);
307
308        if let Ok(mut stats) = self.stats.write() {
309            *stats = SalsaCacheStats::default();
310        }
311    }
312
313    /// Total estimated bytes currently used by cached entries
314    pub fn total_bytes(&self) -> usize {
315        self.current_bytes.load(Ordering::Relaxed) as usize
316    }
317
318    /// Evict oldest entries if cache exceeds entry count or byte limit
319    fn maybe_evict(&self) {
320        let over_entries = self.entries.len() > self.max_entries;
321        let over_bytes = self.total_bytes() > self.max_bytes;
322
323        if !over_entries && !over_bytes {
324            return;
325        }
326
327        // Collect entries with their last access times and sizes
328        let mut entries_by_time: Vec<(QueryKey, SystemTime, usize)> = self
329            .entries
330            .iter()
331            .map(|e| {
332                (
333                    e.key().clone(),
334                    e.value().last_accessed,
335                    e.value().estimated_bytes(),
336                )
337            })
338            .collect();
339
340        // Sort by last accessed time (oldest first)
341        entries_by_time.sort_by(|a, b| a.1.cmp(&b.1));
342
343        // Evict oldest entries until we're under BOTH limits
344        for (key, _, _) in entries_by_time {
345            if self.entries.len() <= self.max_entries && self.total_bytes() <= self.max_bytes {
346                break;
347            }
348            self.invalidate(&key);
349        }
350    }
351
352    // =========================================================================
353    // Persistence
354    // =========================================================================
355
356    /// Save cache to a file with atomic write and checksum validation
357    ///
358    /// TIGER-P2-01: Uses write-to-temp + rename pattern for atomic writes.
359    pub fn save_to_file(&self, path: &Path) -> DaemonResult<()> {
360        // Collect entries for serialization
361        let entries: Vec<(QueryKey, CacheEntry)> = self
362            .entries
363            .iter()
364            .map(|e| (e.key().clone(), e.value().clone()))
365            .collect();
366
367        let dependents: Vec<(u64, Vec<QueryKey>)> = self
368            .dependents
369            .iter()
370            .map(|e| (*e.key(), e.value().iter().cloned().collect()))
371            .collect();
372
373        let stats = self.stats();
374        let revision = self.revision();
375
376        let cache_data = CacheFileData {
377            entries,
378            dependents,
379            stats,
380            revision,
381        };
382
383        // Serialize to JSON
384        let json = serde_json::to_vec(&cache_data)?;
385
386        // Calculate checksum
387        let checksum = calculate_checksum(&json);
388
389        // Write to temp file first (atomic write pattern)
390        let temp_path = path.with_extension("tmp");
391        {
392            let file = File::create(&temp_path)?;
393            let mut writer = BufWriter::new(file);
394
395            // Write header: magic + version + checksum
396            writer.write_all(CACHE_MAGIC)?;
397            writer.write_all(&[CACHE_VERSION])?;
398            writer.write_all(&checksum.to_le_bytes())?;
399            writer.write_all(&json)?;
400            writer.flush()?;
401        }
402
403        // Atomic rename
404        fs::rename(&temp_path, path)?;
405
406        Ok(())
407    }
408
409    /// Load cache from a file with checksum validation
410    pub fn load_from_file(path: &Path) -> DaemonResult<Self> {
411        let file = File::open(path)?;
412        let mut reader = BufReader::new(file);
413
414        // Read and validate header
415        let mut magic = [0u8; 4];
416        reader.read_exact(&mut magic)?;
417        if &magic != CACHE_MAGIC {
418            return Err(DaemonError::InvalidMessage(
419                "invalid cache file magic".to_string(),
420            ));
421        }
422
423        let mut version = [0u8; 1];
424        reader.read_exact(&mut version)?;
425        if version[0] != CACHE_VERSION {
426            return Err(DaemonError::InvalidMessage(format!(
427                "unsupported cache version: {}",
428                version[0]
429            )));
430        }
431
432        let mut checksum_bytes = [0u8; 8];
433        reader.read_exact(&mut checksum_bytes)?;
434        let stored_checksum = u64::from_le_bytes(checksum_bytes);
435
436        // Read remaining data
437        let mut data = Vec::new();
438        reader.read_to_end(&mut data)?;
439
440        // Validate checksum
441        let actual_checksum = calculate_checksum(&data);
442        if stored_checksum != actual_checksum {
443            return Err(DaemonError::InvalidMessage(
444                "cache file checksum mismatch".to_string(),
445            ));
446        }
447
448        // Deserialize
449        let cache_data: CacheFileData = serde_json::from_slice(&data)?;
450
451        // Reconstruct cache
452        let cache = Self::with_defaults();
453
454        let mut total_bytes: u64 = 0;
455        for (key, entry) in cache_data.entries {
456            total_bytes += entry.estimated_bytes() as u64;
457            cache.entries.insert(key, entry);
458        }
459        cache.current_bytes.store(total_bytes, Ordering::Relaxed);
460
461        for (hash, keys) in cache_data.dependents {
462            cache.dependents.insert(hash, keys.into_iter().collect());
463        }
464
465        cache.revision.store(cache_data.revision, Ordering::Release);
466
467        if let Ok(mut stats) = cache.stats.write() {
468            *stats = cache_data.stats;
469        }
470
471        Ok(cache)
472    }
473}
474
475impl Default for QueryCache {
476    fn default() -> Self {
477        Self::with_defaults()
478    }
479}
480
481// =============================================================================
482// Helper Types
483// =============================================================================
484
485/// Serializable cache file data
486#[derive(Serialize, Deserialize)]
487struct CacheFileData {
488    entries: Vec<(QueryKey, CacheEntry)>,
489    dependents: Vec<(u64, Vec<QueryKey>)>,
490    stats: SalsaCacheStats,
491    revision: u64,
492}
493
494/// Calculate a checksum for data validation
495fn calculate_checksum(data: &[u8]) -> u64 {
496    let mut hasher = DefaultHasher::new();
497    data.hash(&mut hasher);
498    hasher.finish()
499}
500
501/// Serde module for SystemTime
502mod system_time_serde {
503    use serde::{Deserialize, Deserializer, Serialize, Serializer};
504    use std::time::{Duration, SystemTime, UNIX_EPOCH};
505
506    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
507    where
508        S: Serializer,
509    {
510        let duration = time.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
511        duration.as_secs().serialize(serializer)
512    }
513
514    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
515    where
516        D: Deserializer<'de>,
517    {
518        let secs = u64::deserialize(deserializer)?;
519        Ok(UNIX_EPOCH + Duration::from_secs(secs))
520    }
521}
522
523// =============================================================================
524// Utility Functions
525// =============================================================================
526
527/// Hash arguments for cache key generation
528pub fn hash_args<T: Hash>(args: &T) -> u64 {
529    let mut hasher = DefaultHasher::new();
530    args.hash(&mut hasher);
531    hasher.finish()
532}
533
534/// Hash a file path for input tracking
535pub fn hash_path(path: &Path) -> u64 {
536    let mut hasher = DefaultHasher::new();
537    path.hash(&mut hasher);
538    hasher.finish()
539}
540
541// =============================================================================
542// Tests
543// =============================================================================
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548    use tempfile::tempdir;
549
550    #[test]
551    fn test_query_cache_new() {
552        let cache = QueryCache::new(100);
553        assert_eq!(cache.max_entries, 100);
554        assert!(cache.is_empty());
555        assert_eq!(cache.revision(), 0);
556    }
557
558    #[test]
559    fn test_query_cache_insert_and_get() {
560        let cache = QueryCache::new(100);
561        let key = QueryKey::new("test", 12345);
562        let value = vec!["hello", "world"];
563
564        cache.insert(key.clone(), &value, vec![]);
565
566        let result: Option<Vec<String>> = cache.get(&key);
567        assert!(result.is_some());
568        assert_eq!(result.unwrap(), vec!["hello", "world"]);
569    }
570
571    #[test]
572    fn test_query_cache_miss() {
573        let cache = QueryCache::new(100);
574        let key = QueryKey::new("nonexistent", 99999);
575
576        let result: Option<String> = cache.get(&key);
577        assert!(result.is_none());
578
579        let stats = cache.stats();
580        assert_eq!(stats.misses, 1);
581        assert_eq!(stats.hits, 0);
582    }
583
584    #[test]
585    fn test_query_cache_hit_tracking() {
586        let cache = QueryCache::new(100);
587        let key = QueryKey::new("test", 12345);
588        cache.insert(key.clone(), &"value", vec![]);
589
590        // First get - hit
591        let _: Option<String> = cache.get(&key);
592        // Second get - hit
593        let _: Option<String> = cache.get(&key);
594
595        let stats = cache.stats();
596        assert_eq!(stats.hits, 2);
597    }
598
599    #[test]
600    fn test_query_cache_invalidate_by_input() {
601        let cache = QueryCache::new(100);
602        let input_hash = hash_path(Path::new("/test/file.rs"));
603
604        // Insert entries that depend on the input
605        let key1 = QueryKey::new("query1", 1);
606        let key2 = QueryKey::new("query2", 2);
607        let key3 = QueryKey::new("query3", 3); // No dependency
608
609        cache.insert(key1.clone(), &"value1", vec![input_hash]);
610        cache.insert(key2.clone(), &"value2", vec![input_hash]);
611        cache.insert(key3.clone(), &"value3", vec![]);
612
613        assert_eq!(cache.len(), 3);
614
615        // Invalidate by input
616        let invalidated = cache.invalidate_by_input(input_hash);
617        assert_eq!(invalidated, 2);
618        assert_eq!(cache.len(), 1);
619
620        // key3 should still be accessible
621        let result: Option<String> = cache.get(&key3);
622        assert!(result.is_some());
623
624        // key1 and key2 should be gone
625        let result: Option<String> = cache.get(&key1);
626        assert!(result.is_none());
627    }
628
629    #[test]
630    fn test_query_cache_invalidation_stats() {
631        let cache = QueryCache::new(100);
632        let key = QueryKey::new("test", 1);
633        cache.insert(key.clone(), &"value", vec![12345]);
634
635        cache.invalidate_by_input(12345);
636
637        let stats = cache.stats();
638        assert_eq!(stats.invalidations, 1);
639    }
640
641    #[test]
642    fn test_query_cache_clear() {
643        let cache = QueryCache::new(100);
644
645        // Insert some entries
646        cache.insert(QueryKey::new("q1", 1), &"v1", vec![]);
647        cache.insert(QueryKey::new("q2", 2), &"v2", vec![]);
648
649        assert_eq!(cache.len(), 2);
650
651        cache.clear();
652
653        assert!(cache.is_empty());
654        assert_eq!(cache.revision(), 0);
655    }
656
657    #[test]
658    fn test_query_cache_lru_eviction() {
659        let cache = QueryCache::new(3); // Max 3 entries
660
661        // Insert 4 entries
662        cache.insert(QueryKey::new("q1", 1), &"v1", vec![]);
663        std::thread::sleep(std::time::Duration::from_millis(10));
664        cache.insert(QueryKey::new("q2", 2), &"v2", vec![]);
665        std::thread::sleep(std::time::Duration::from_millis(10));
666        cache.insert(QueryKey::new("q3", 3), &"v3", vec![]);
667        std::thread::sleep(std::time::Duration::from_millis(10));
668
669        // Access q1 to make it recently used
670        let _: Option<String> = cache.get(&QueryKey::new("q1", 1));
671        std::thread::sleep(std::time::Duration::from_millis(10));
672
673        // Insert q4 - should evict q2 (oldest accessed)
674        cache.insert(QueryKey::new("q4", 4), &"v4", vec![]);
675
676        assert!(cache.len() <= 3);
677
678        // q1 should still exist (was accessed recently)
679        let result: Option<String> = cache.get(&QueryKey::new("q1", 1));
680        assert!(result.is_some());
681    }
682
683    #[test]
684    fn test_query_cache_persistence() {
685        let dir = tempdir().unwrap();
686        let cache_path = dir.path().join("test_cache.bin");
687
688        // Create and populate cache
689        let cache = QueryCache::new(100);
690        cache.insert(QueryKey::new("test", 12345), &"hello world", vec![1, 2, 3]);
691        cache.insert(QueryKey::new("test2", 67890), &vec![1, 2, 3], vec![]);
692
693        // Save to file
694        cache.save_to_file(&cache_path).unwrap();
695
696        // Load from file
697        let loaded = QueryCache::load_from_file(&cache_path).unwrap();
698
699        // Verify contents
700        assert_eq!(loaded.len(), 2);
701
702        let result: Option<String> = loaded.get(&QueryKey::new("test", 12345));
703        assert_eq!(result, Some("hello world".to_string()));
704
705        let result: Option<Vec<i32>> = loaded.get(&QueryKey::new("test2", 67890));
706        assert_eq!(result, Some(vec![1, 2, 3]));
707    }
708
709    #[test]
710    fn test_query_cache_persistence_checksum_validation() {
711        let dir = tempdir().unwrap();
712        let cache_path = dir.path().join("test_cache.bin");
713
714        // Create and save cache
715        let cache = QueryCache::new(100);
716        cache.insert(QueryKey::new("test", 1), &"value", vec![]);
717        cache.save_to_file(&cache_path).unwrap();
718
719        // Corrupt the file
720        let mut data = fs::read(&cache_path).unwrap();
721        if data.len() > 20 {
722            data[20] ^= 0xFF; // Flip some bits
723        }
724        fs::write(&cache_path, data).unwrap();
725
726        // Loading should fail due to checksum mismatch
727        let result = QueryCache::load_from_file(&cache_path);
728        assert!(result.is_err());
729    }
730
731    #[test]
732    fn test_hash_args() {
733        let args1 = ("query", "/path/to/file.rs", 42);
734        let args2 = ("query", "/path/to/file.rs", 42);
735        let args3 = ("query", "/path/to/other.rs", 42);
736
737        assert_eq!(hash_args(&args1), hash_args(&args2));
738        assert_ne!(hash_args(&args1), hash_args(&args3));
739    }
740
741    #[test]
742    fn test_hash_path() {
743        let path1 = Path::new("/foo/bar.rs");
744        let path2 = Path::new("/foo/bar.rs");
745        let path3 = Path::new("/foo/baz.rs");
746
747        assert_eq!(hash_path(path1), hash_path(path2));
748        assert_ne!(hash_path(path1), hash_path(path3));
749    }
750
751    #[test]
752    fn test_query_key_equality() {
753        let key1 = QueryKey::new("test", 12345);
754        let key2 = QueryKey::new("test", 12345);
755        let key3 = QueryKey::new("test", 99999);
756        let key4 = QueryKey::new("other", 12345);
757
758        assert_eq!(key1, key2);
759        assert_ne!(key1, key3);
760        assert_ne!(key1, key4);
761    }
762
763    #[test]
764    fn test_cache_entry_creation() {
765        let entry = CacheEntry::new(vec![1, 2, 3], 5, vec![100, 200]);
766
767        assert_eq!(entry.value, vec![1, 2, 3]);
768        assert_eq!(entry.revision, 5);
769        assert_eq!(entry.input_hashes, vec![100, 200]);
770        assert!(entry.created_at <= SystemTime::now());
771        assert!(entry.last_accessed <= SystemTime::now());
772    }
773
774    #[test]
775    fn test_stats_hit_rate_calculation() {
776        let cache = QueryCache::new(100);
777
778        // No queries yet
779        let stats = cache.stats();
780        assert_eq!(stats.hit_rate(), 0.0);
781
782        // Insert and query
783        cache.insert(QueryKey::new("test", 1), &"value", vec![]);
784        let _: Option<String> = cache.get(&QueryKey::new("test", 1)); // hit
785        let _: Option<String> = cache.get(&QueryKey::new("test", 2)); // miss
786        let _: Option<String> = cache.get(&QueryKey::new("test", 1)); // hit
787
788        let stats = cache.stats();
789        assert_eq!(stats.hits, 2);
790        assert_eq!(stats.misses, 1);
791        // hit_rate = 2 / 3 * 100 = 66.67
792        assert!((stats.hit_rate() - 66.67).abs() < 0.1);
793    }
794
795    #[test]
796    fn test_revision_increments_on_invalidation() {
797        let cache = QueryCache::new(100);
798        assert_eq!(cache.revision(), 0);
799
800        cache.invalidate_by_input(12345);
801        assert_eq!(cache.revision(), 1);
802
803        cache.invalidate_by_input(67890);
804        assert_eq!(cache.revision(), 2);
805    }
806
807    #[test]
808    fn test_multiple_entries_same_input() {
809        let cache = QueryCache::new(100);
810        let shared_input = 12345u64;
811
812        // Multiple queries depend on the same input
813        cache.insert(QueryKey::new("q1", 1), &"v1", vec![shared_input]);
814        cache.insert(QueryKey::new("q2", 2), &"v2", vec![shared_input]);
815        cache.insert(QueryKey::new("q3", 3), &"v3", vec![shared_input]);
816
817        assert_eq!(cache.len(), 3);
818
819        // Invalidating the shared input should remove all three
820        let count = cache.invalidate_by_input(shared_input);
821        assert_eq!(count, 3);
822        assert!(cache.is_empty());
823    }
824
825    #[test]
826    fn test_entry_with_multiple_inputs() {
827        let cache = QueryCache::new(100);
828        let input1 = 111u64;
829        let input2 = 222u64;
830
831        // Entry depends on multiple inputs
832        cache.insert(QueryKey::new("q1", 1), &"v1", vec![input1, input2]);
833
834        // Invalidating either input should remove the entry
835        assert_eq!(cache.len(), 1);
836        cache.invalidate_by_input(input1);
837        assert!(cache.is_empty());
838    }
839
840    // =========================================================================
841    // Memory-bounded cache tests
842    // =========================================================================
843
844    #[test]
845    fn test_total_bytes_tracking() {
846        let cache = QueryCache::new(100);
847        assert_eq!(cache.total_bytes(), 0);
848
849        // Insert a value and check bytes increased
850        cache.insert(QueryKey::new("q1", 1), &"hello", vec![]);
851        let bytes_after_one = cache.total_bytes();
852        assert!(
853            bytes_after_one > 0,
854            "total_bytes should increase after insert"
855        );
856
857        // Insert another and check it increased further
858        cache.insert(QueryKey::new("q2", 2), &"world", vec![]);
859        let bytes_after_two = cache.total_bytes();
860        assert!(
861            bytes_after_two > bytes_after_one,
862            "total_bytes should increase with more entries"
863        );
864
865        // Clear and check it resets
866        cache.clear();
867        assert_eq!(cache.total_bytes(), 0);
868    }
869
870    #[test]
871    fn test_bytes_decrease_on_invalidate() {
872        let cache = QueryCache::new(100);
873        cache.insert(QueryKey::new("q1", 1), &"value1", vec![]);
874        cache.insert(QueryKey::new("q2", 2), &"value2", vec![]);
875        let bytes_before = cache.total_bytes();
876
877        cache.invalidate(&QueryKey::new("q1", 1));
878        let bytes_after = cache.total_bytes();
879        assert!(
880            bytes_after < bytes_before,
881            "total_bytes should decrease after invalidation"
882        );
883    }
884
885    #[test]
886    fn test_bytes_decrease_on_invalidate_by_input() {
887        let cache = QueryCache::new(100);
888        let input_hash = 42u64;
889
890        cache.insert(QueryKey::new("q1", 1), &"value1", vec![input_hash]);
891        cache.insert(QueryKey::new("q2", 2), &"value2", vec![input_hash]);
892        let bytes_before = cache.total_bytes();
893        assert!(bytes_before > 0);
894
895        cache.invalidate_by_input(input_hash);
896        assert_eq!(
897            cache.total_bytes(),
898            0,
899            "total_bytes should be 0 after all entries invalidated"
900        );
901    }
902
903    #[test]
904    fn test_byte_limit_eviction() {
905        // Set a very small byte limit (1 KB)
906        let cache = QueryCache::with_limits(10_000, 1024);
907
908        // Insert entries until we exceed the byte limit
909        // Each entry with a 200-byte payload
910        let payload = "x".repeat(200);
911        for i in 0..20 {
912            cache.insert(QueryKey::new("q", i), &payload, vec![]);
913        }
914
915        // Cache should have evicted to stay under 1 KB
916        assert!(
917            cache.total_bytes() <= 1024,
918            "total_bytes ({}) should be <= 1024 after eviction",
919            cache.total_bytes()
920        );
921        assert!(
922            cache.len() < 20,
923            "entry count ({}) should be < 20 after byte-based eviction",
924            cache.len()
925        );
926    }
927
928    #[test]
929    fn test_large_entry_evicts_many_small() {
930        // 2 KB limit
931        let cache = QueryCache::with_limits(10_000, 2048);
932
933        // Insert 10 small entries (~50 bytes each)
934        for i in 0..10 {
935            cache.insert(QueryKey::new("small", i), &"tiny", vec![]);
936        }
937        let count_before = cache.len();
938        assert_eq!(count_before, 10);
939
940        // Insert one large entry (~1500 bytes)
941        let big_payload = "x".repeat(1500);
942        cache.insert(QueryKey::new("big", 0), &big_payload, vec![]);
943
944        // Should have evicted some small entries to make room
945        assert!(
946            cache.total_bytes() <= 2048,
947            "total_bytes ({}) should be <= 2048",
948            cache.total_bytes()
949        );
950        // The big entry should still be present (most recently inserted)
951        let result: Option<String> = cache.get(&QueryKey::new("big", 0));
952        assert!(result.is_some(), "large entry should survive eviction");
953    }
954
955    #[test]
956    fn test_byte_tracking_on_replace() {
957        let cache = QueryCache::new(100);
958
959        // Insert a small value
960        cache.insert(QueryKey::new("q1", 1), &"small", vec![]);
961        let bytes_small = cache.total_bytes();
962
963        // Replace with a large value
964        let big = "x".repeat(10_000);
965        cache.insert(QueryKey::new("q1", 1), &big, vec![]);
966        let bytes_big = cache.total_bytes();
967
968        assert!(
969            bytes_big > bytes_small,
970            "bytes should increase when replacing small with large"
971        );
972        assert_eq!(cache.len(), 1, "should still be one entry after replace");
973    }
974
975    #[test]
976    fn test_memory_bounded_cache_under_stress() {
977        // 100 KB limit
978        let cache = QueryCache::with_limits(10_000, 100 * 1024);
979
980        // Insert 1000 entries with varied sizes
981        for i in 0..1000u64 {
982            let size = ((i % 10) + 1) as usize * 100; // 100 to 1000 bytes
983            let payload = "x".repeat(size);
984            cache.insert(QueryKey::new("stress", i), &payload, vec![]);
985        }
986
987        // Cache must respect byte limit
988        assert!(
989            cache.total_bytes() <= 100 * 1024,
990            "total_bytes ({}) should be <= 102400 after stress test",
991            cache.total_bytes()
992        );
993
994        // Most recent entries should be accessible
995        let result: Option<String> = cache.get(&QueryKey::new("stress", 999));
996        assert!(result.is_some(), "most recent entry should be cached");
997    }
998
999    #[test]
1000    fn test_estimated_bytes_accuracy() {
1001        let small = CacheEntry::new(vec![1, 2, 3], 0, vec![]);
1002        let large = CacheEntry::new(vec![0u8; 10_000], 0, vec![1, 2, 3]);
1003
1004        assert!(small.estimated_bytes() < large.estimated_bytes());
1005        assert!(small.estimated_bytes() > 0);
1006        // Large entry should account for the 10K payload
1007        assert!(
1008            large.estimated_bytes() >= 10_000,
1009            "estimated_bytes ({}) should be >= payload size",
1010            large.estimated_bytes()
1011        );
1012    }
1013
1014    #[test]
1015    fn test_default_max_bytes() {
1016        let cache = QueryCache::with_defaults();
1017        assert_eq!(cache.max_bytes, DEFAULT_MAX_BYTES);
1018        assert_eq!(cache.max_bytes, 512 * 1024 * 1024); // 512 MB
1019    }
1020
1021    // =========================================================================
1022    // Property-based tests (proptest)
1023    // =========================================================================
1024
1025    mod proptest_cache {
1026        use super::*;
1027        use proptest::prelude::*;
1028
1029        /// Recompute total bytes by summing all entries — ground truth.
1030        fn recompute_bytes(cache: &QueryCache) -> usize {
1031            cache
1032                .entries
1033                .iter()
1034                .map(|e| e.value().estimated_bytes())
1035                .sum()
1036        }
1037
1038        /// Arbitrary cache operation
1039        #[derive(Debug, Clone)]
1040        enum CacheOp {
1041            Insert {
1042                key_id: u8,
1043                payload_len: usize,
1044                input_hash: u64,
1045            },
1046            InvalidateByInput(u64),
1047            InvalidateByKey(u8),
1048            Clear,
1049        }
1050
1051        fn arb_cache_op() -> impl Strategy<Value = CacheOp> {
1052            prop_oneof![
1053                (any::<u8>(), 0..2000usize, any::<u64>()).prop_map(|(k, p, h)| CacheOp::Insert {
1054                    key_id: k,
1055                    payload_len: p,
1056                    input_hash: h % 16, // cluster hashes for overlap
1057                }),
1058                (any::<u64>()).prop_map(|h| CacheOp::InvalidateByInput(h % 16)),
1059                (any::<u8>()).prop_map(CacheOp::InvalidateByKey),
1060                Just(CacheOp::Clear),
1061            ]
1062        }
1063
1064        proptest! {
1065            /// Invariant: tracked bytes == sum of all entry sizes after any
1066            /// sequence of insert/invalidate/clear operations.
1067            #[test]
1068            fn bytes_tracking_consistent(ops in prop::collection::vec(arb_cache_op(), 1..150)) {
1069                let cache = QueryCache::with_limits(500, 10_000_000);
1070
1071                for op in ops {
1072                    match op {
1073                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1074                            let key = QueryKey::new("prop", key_id as u64);
1075                            let payload = vec![0u8; payload_len];
1076                            cache.insert(key, &payload, vec![input_hash]);
1077                        }
1078                        CacheOp::InvalidateByInput(hash) => {
1079                            cache.invalidate_by_input(hash);
1080                        }
1081                        CacheOp::InvalidateByKey(key_id) => {
1082                            let key = QueryKey::new("prop", key_id as u64);
1083                            cache.invalidate(&key);
1084                        }
1085                        CacheOp::Clear => {
1086                            cache.clear();
1087                        }
1088                    }
1089                }
1090
1091                let tracked = cache.total_bytes();
1092                let actual = recompute_bytes(&cache);
1093                prop_assert_eq!(tracked, actual,
1094                    "tracked bytes ({}) != recomputed bytes ({})", tracked, actual);
1095            }
1096
1097            /// Invariant: entry count never exceeds max_entries after operations.
1098            #[test]
1099            fn entry_count_bounded(ops in prop::collection::vec(arb_cache_op(), 1..200)) {
1100                let max = 50;
1101                let cache = QueryCache::with_limits(max, 10_000_000);
1102
1103                for op in ops {
1104                    match op {
1105                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1106                            let key = QueryKey::new("prop", key_id as u64);
1107                            let payload = vec![0u8; payload_len];
1108                            cache.insert(key, &payload, vec![input_hash]);
1109                        }
1110                        CacheOp::InvalidateByInput(hash) => {
1111                            cache.invalidate_by_input(hash);
1112                        }
1113                        CacheOp::InvalidateByKey(key_id) => {
1114                            let key = QueryKey::new("prop", key_id as u64);
1115                            cache.invalidate(&key);
1116                        }
1117                        CacheOp::Clear => {
1118                            cache.clear();
1119                        }
1120                    }
1121                }
1122
1123                prop_assert!(cache.len() <= max,
1124                    "cache size {} exceeds max {}", cache.len(), max);
1125            }
1126
1127            /// Invariant: total bytes never exceeds max_bytes after operations.
1128            #[test]
1129            fn byte_limit_bounded(ops in prop::collection::vec(arb_cache_op(), 1..200)) {
1130                let max_bytes = 50_000;
1131                let cache = QueryCache::with_limits(500, max_bytes);
1132
1133                for op in ops {
1134                    match op {
1135                        CacheOp::Insert { key_id, payload_len, input_hash } => {
1136                            let key = QueryKey::new("prop", key_id as u64);
1137                            let payload = vec![0u8; payload_len];
1138                            cache.insert(key, &payload, vec![input_hash]);
1139                        }
1140                        CacheOp::InvalidateByInput(hash) => {
1141                            cache.invalidate_by_input(hash);
1142                        }
1143                        CacheOp::InvalidateByKey(key_id) => {
1144                            let key = QueryKey::new("prop", key_id as u64);
1145                            cache.invalidate(&key);
1146                        }
1147                        CacheOp::Clear => {
1148                            cache.clear();
1149                        }
1150                    }
1151                }
1152
1153                prop_assert!(cache.total_bytes() <= max_bytes,
1154                    "total bytes {} exceeds max {}", cache.total_bytes(), max_bytes);
1155            }
1156
1157            /// Invariant: after clear(), cache is empty and bytes are zero.
1158            #[test]
1159            fn clear_resets_everything(
1160                inserts in prop::collection::vec((any::<u8>(), 0..500usize), 1..50)
1161            ) {
1162                let cache = QueryCache::with_limits(500, 10_000_000);
1163
1164                for (key_id, payload_len) in inserts {
1165                    let key = QueryKey::new("prop", key_id as u64);
1166                    cache.insert(key, &vec![0u8; payload_len], vec![]);
1167                }
1168
1169                cache.clear();
1170
1171                prop_assert_eq!(cache.len(), 0);
1172                prop_assert_eq!(cache.total_bytes(), 0);
1173                prop_assert_eq!(recompute_bytes(&cache), 0);
1174            }
1175
1176            /// Invariant: inserting same key twice updates bytes correctly
1177            /// (no double-counting).
1178            #[test]
1179            fn replace_in_place_no_leak(
1180                sizes in prop::collection::vec(0..5000usize, 2..20)
1181            ) {
1182                let cache = QueryCache::with_limits(500, 10_000_000);
1183                let key = QueryKey::new("same", 42);
1184
1185                for size in &sizes {
1186                    cache.insert(key.clone(), &vec![0u8; *size], vec![]);
1187                }
1188
1189                // Only one entry should exist
1190                prop_assert_eq!(cache.len(), 1);
1191                // Tracked bytes should match actual
1192                let tracked = cache.total_bytes();
1193                let actual = recompute_bytes(&cache);
1194                prop_assert_eq!(tracked, actual);
1195            }
1196        }
1197    }
1198}