sqz_engine/
cache_manager.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::time::Duration;
4
5use sha2::{Digest, Sha256};
6
7use crate::delta_encoder::DeltaEncoder;
8use crate::error::Result;
9use crate::pipeline::{CompressionPipeline, SessionContext};
10use crate::preset::Preset;
11use crate::session_store::SessionStore;
12use crate::types::CompressedContent;
13
14/// Outcome of a cache lookup in [`CacheManager`].
15///
16/// The cache has three possible outcomes:
17/// - **Dedup**: exact match, returns a tiny `§ref:HASH§` token (~13 tokens)
18/// - **Delta**: near-duplicate, returns a compact diff against the cached version
19/// - **Fresh**: cache miss, returns the full compressed output
20pub enum CacheResult {
21    /// Previously seen content — returns a short inline reference (~13 tokens).
22    Dedup {
23        /// Inline token of the form `§ref:<hash_prefix>§`.
24        inline_ref: String,
25        /// Approximate token cost of the reference (always 13).
26        token_cost: u32,
27    },
28    /// Near-duplicate of cached content — returns a compact delta.
29    Delta {
30        /// The delta text (header + changed lines).
31        delta_text: String,
32        /// Approximate token cost of the delta.
33        token_cost: u32,
34        /// Similarity to the cached version (0.0–1.0).
35        similarity: f64,
36    },
37    /// Content not seen before — full compression result.
38    Fresh { output: CompressedContent },
39}
40
41/// Tracks when a dedup ref was last sent, so we can detect staleness.
42///
43/// Historically used for an in-memory per-process turn counter; now kept
44/// only for interface compatibility (clear on notify_compaction). Actual
45/// staleness is computed from SQLite `accessed_at` timestamps so it works
46/// across the shell-hook invocation model where each sqz process is short-
47/// lived. See the comment on `is_ref_fresh` for details.
48#[derive(Debug, Clone)]
49#[allow(dead_code)]
50struct RefEntry {
51    /// The turn number when this ref was last sent to the LLM.
52    last_sent_turn: u64,
53}
54
55/// SHA-256 content-hash deduplication cache backed by [`SessionStore`],
56/// with delta encoding for near-duplicate content and compaction awareness.
57///
58/// # Freshness model
59///
60/// A dedup ref is considered fresh (safe to serve instead of the full
61/// content) when the cache entry's `accessed_at` timestamp in SQLite is
62/// within `max_ref_age` of now. When sqz is invoked from shell hooks each
63/// invocation is a short-lived process, so the freshness check must be
64/// persistent — in-memory state is gone the moment the process exits.
65///
66/// The previous turn-counter heuristic was in-memory only and therefore
67/// never registered freshness across hook invocations, which silently
68/// disabled the dedup feature in production. Issue found April 18 2026.
69///
70/// Default TTL: 30 minutes. Empirically matches a typical active coding
71/// session before a context compaction. Use [`with_ref_age`] to tune.
72pub struct CacheManager {
73    store: SessionStore,
74    max_size_bytes: u64,
75    delta_encoder: DeltaEncoder,
76    /// Retained for notify_compaction's semantic ("forget all tracked refs"),
77    /// but no longer consulted for freshness checks.
78    #[allow(dead_code)]
79    turn_counter: std::cell::Cell<u64>,
80    /// Retained for notify_compaction; cleared on compaction events.
81    #[allow(dead_code)]
82    ref_tracker: std::cell::RefCell<HashMap<String, RefEntry>>,
83    /// Maximum wall-clock age before a dedup ref is considered stale.
84    /// After this duration we assume the LLM's context window has rolled
85    /// over enough to have dropped the original content, so we re-send the
86    /// full version instead of a dangling ref.
87    max_ref_age: Duration,
88    /// Records the instant at which the in-memory compaction flag was set.
89    /// Any cache entry whose `accessed_at` predates this instant is stale.
90    /// Reset by [`notify_compaction`].
91    compaction_marker: std::cell::Cell<Option<chrono::DateTime<chrono::Utc>>>,
92}
93
94impl CacheManager {
95    /// Create a new cache manager backed by the given session store.
96    ///
97    /// `max_size_bytes` controls when LRU eviction kicks in. A good default
98    /// is 512 MB (`512 * 1024 * 1024`). Dedup refs go stale after 30 minutes
99    /// of wall-clock time by default — use [`with_ref_age`] to tune.
100    pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
101        Self::with_ref_age_duration(store, max_size_bytes, Duration::from_secs(30 * 60))
102    }
103
104    /// Create a CacheManager with a custom ref staleness threshold measured
105    /// in turns. The turn count is converted to wall-clock time by assuming
106    /// ~1 minute per turn (a rough approximation; the real freshness check
107    /// uses SQLite timestamps). This constructor exists for backward
108    /// compatibility with tests that previously advanced a turn counter.
109    #[doc(hidden)]
110    pub fn with_ref_age(store: SessionStore, max_size_bytes: u64, max_ref_age_turns: u64) -> Self {
111        Self::with_ref_age_duration(
112            store,
113            max_size_bytes,
114            Duration::from_secs(max_ref_age_turns.saturating_mul(60)),
115        )
116    }
117
118    /// Create a CacheManager with an explicit wall-clock ref-age cap.
119    pub fn with_ref_age_duration(
120        store: SessionStore,
121        max_size_bytes: u64,
122        max_ref_age: Duration,
123    ) -> Self {
124        Self {
125            store,
126            max_size_bytes,
127            delta_encoder: DeltaEncoder::new(),
128            turn_counter: std::cell::Cell::new(0),
129            ref_tracker: std::cell::RefCell::new(HashMap::new()),
130            max_ref_age,
131            compaction_marker: std::cell::Cell::new(None),
132        }
133    }
134
135    /// Compute the SHA-256 hex digest of `bytes`.
136    fn sha256_hex(bytes: &[u8]) -> String {
137        let mut hasher = Sha256::new();
138        hasher.update(bytes);
139        format!("{:x}", hasher.finalize())
140    }
141
142    /// Advance the turn counter. Retained for API compatibility; not used
143    /// for freshness. The context_evictor still reads `current_turn` for
144    /// LRU scoring during `sqz compact`.
145    pub fn advance_turn(&self) {
146        self.turn_counter.set(self.turn_counter.get() + 1);
147    }
148
149    /// Get the current turn number. Used by the context_evictor for scoring.
150    pub fn current_turn(&self) -> u64 {
151        self.turn_counter.get()
152    }
153
154    /// Notify the cache that a context compaction has occurred.
155    ///
156    /// Persists a compaction timestamp into the session store so any cache
157    /// entry whose `accessed_at` predates the marker is considered stale
158    /// by **every subsequent sqz process**, not just this one. The shell-
159    /// hook invocation model means this method is typically called from a
160    /// short-lived `sqz hook precompact` process, and the check runs in a
161    /// different `sqz compress` process milliseconds later.
162    ///
163    /// Call this when:
164    /// - The harness signals a compaction event (PreCompact hook)
165    /// - A session is resumed after being idle
166    /// - The user runs `sqz compact`
167    pub fn notify_compaction(&self) {
168        let now = chrono::Utc::now();
169        self.compaction_marker.set(Some(now));
170        self.ref_tracker.borrow_mut().clear();
171        // Persist the marker so other sqz processes see the invalidation.
172        // Silently swallow a write error: losing the marker means some
173        // refs may survive the compaction and show as dedup hits in the
174        // next few calls — annoying, not wrong (the agent still receives
175        // valid content; it just sees a short-ref it has to resolve).
176        let _ = self
177            .store
178            .set_metadata("last_compaction_at", &now.to_rfc3339());
179    }
180
181    /// Check if a dedup ref for the given hash is still fresh (likely still
182    /// in the LLM's context window).
183    ///
184    /// Uses the SQLite `accessed_at` timestamp rather than the in-memory
185    /// turn counter. This works across sqz process invocations: shell hooks
186    /// spawn a new sqz process per intercepted command, so any in-memory
187    /// counter would reset every time. The database survives.
188    ///
189    /// The compaction marker is read from SQLite on every check so that
190    /// a `sqz hook precompact` call from another process immediately
191    /// invalidates refs in the current process. Without the persistent
192    /// read, the invalidation would only affect the process that called
193    /// notify_compaction — which is never the same process that serves
194    /// dedup hits.
195    fn is_ref_fresh(&self, hash: &str) -> bool {
196        let accessed = match self.store.get_cache_entry_accessed_at(hash) {
197            Ok(Some(ts)) => ts,
198            _ => return false,
199        };
200        // In-memory compaction marker (set in this process).
201        if let Some(marker) = self.compaction_marker.get() {
202            if accessed < marker {
203                return false;
204            }
205        }
206        // Persistent compaction marker — set by `sqz hook precompact` in
207        // a different process. Without this read the in-memory marker is
208        // never consulted because each hook invocation is a fresh process.
209        if let Ok(Some(raw)) = self.store.get_metadata("last_compaction_at") {
210            if let Ok(marker) = raw.parse::<chrono::DateTime<chrono::Utc>>() {
211                if accessed < marker {
212                    return false;
213                }
214            }
215        }
216        let age = (chrono::Utc::now() - accessed)
217            .to_std()
218            .unwrap_or(Duration::from_secs(0));
219        age < self.max_ref_age
220    }
221
222    /// Record that a dedup ref was sent for the given hash. Updates the
223    /// persistent `accessed_at` timestamp so subsequent freshness checks
224    /// see this send. Silently swallows SQLite errors — losing a touch
225    /// means the next call may treat the ref as stale and re-send, which
226    /// is strictly worse on tokens but never wrong.
227    fn record_ref_sent(&self, hash: &str) {
228        let _ = self.store.touch_cache_entry(hash);
229    }
230
231    /// Look up `content` in the cache with compaction awareness.
232    ///
233    /// - On exact dedup with fresh ref: return `CacheResult::Dedup` (~13 tokens).
234    /// - On exact dedup with stale ref: re-compress and return `CacheResult::Fresh`
235    ///   (the original content may have been compacted out of the LLM's context).
236    /// - On near-duplicate: return `CacheResult::Delta` with a compact diff.
237    /// - On cache miss: compress via `pipeline`, persist, return `CacheResult::Fresh`.
238    pub fn get_or_compress(
239        &self,
240        _path: &Path,
241        content: &[u8],
242        pipeline: &CompressionPipeline,
243    ) -> Result<CacheResult> {
244        let hash = Self::sha256_hex(content);
245
246        // Exact match — check if the ref is still fresh
247        // Exact match — probe without touching accessed_at, then check
248        // freshness. Touching on the probe would make every ref appear
249        // fresh immediately (the timestamp we just wrote is `now`).
250        let exists = self.store.cache_entry_exists(&hash)?;
251        if exists {
252            if self.is_ref_fresh(&hash) {
253                // Ref is fresh — the LLM likely still has the original in context
254                let hash_prefix = &hash[..16];
255                let inline_ref = format!("§ref:{hash_prefix}§");
256                // Update the sent timestamp
257                self.record_ref_sent(&hash);
258                return Ok(CacheResult::Dedup {
259                    inline_ref,
260                    token_cost: 13,
261                });
262            } else {
263                // Ref is stale — re-send the full compressed content.
264                // The original may have been compacted out of the LLM's context.
265                let text = String::from_utf8_lossy(content).into_owned();
266                let ctx = SessionContext {
267                    session_id: "cache".to_string(),
268                };
269                let preset = Preset::default();
270                let compressed = pipeline.compress(&text, &ctx, &preset)?;
271                // Record that we re-sent this content
272                self.record_ref_sent(&hash);
273                return Ok(CacheResult::Fresh { output: compressed });
274            }
275        }
276
277        // Near-duplicate check: compare against recent cache entries
278        let text = String::from_utf8_lossy(content).into_owned();
279        if let Some(delta_result) = self.try_delta_encode(&text)? {
280            // Store the new content in cache for future exact matches
281            let ctx = SessionContext {
282                session_id: "cache".to_string(),
283            };
284            let preset = Preset::default();
285            let compressed = pipeline.compress(&text, &ctx, &preset)?;
286            self.store.save_cache_entry(&hash, &compressed)?;
287            self.record_ref_sent(&hash);
288
289            let token_cost = (delta_result.delta_text.len() / 4) as u32;
290            return Ok(CacheResult::Delta {
291                delta_text: delta_result.delta_text,
292                token_cost: token_cost.max(5),
293                similarity: delta_result.similarity,
294            });
295        }
296
297        let ctx = SessionContext {
298            session_id: "cache".to_string(),
299        };
300        let preset = Preset::default();
301        let compressed = pipeline.compress(&text, &ctx, &preset)?;
302        self.store.save_cache_entry(&hash, &compressed)?;
303        // Record that this content was sent at the current turn
304        self.record_ref_sent(&hash);
305
306        Ok(CacheResult::Fresh { output: compressed })
307    }
308
309    /// Try to delta-encode content against recent cache entries.
310    /// Returns Some(DeltaResult) if a near-duplicate was found.
311    fn try_delta_encode(
312        &self,
313        new_content: &str,
314    ) -> Result<Option<crate::delta_encoder::DeltaResult>> {
315        let entries = self.store.list_cache_entries_lru()?;
316
317        // Check the most recent entries (up to 10) for near-duplicates
318        let check_count = entries.len().min(10);
319        for (hash, _) in entries.iter().rev().take(check_count) {
320            if let Some(cached) = self.store.get_cache_entry(hash)? {
321                let hash_prefix = &hash[..hash.len().min(16)];
322                if let Ok(Some(delta)) =
323                    self.delta_encoder
324                        .encode(&cached.data, new_content, hash_prefix)
325                {
326                    // Only use delta if it's actually smaller than the full content
327                    if delta.delta_text.len() < new_content.len() {
328                        return Ok(Some(delta));
329                    }
330                }
331            }
332        }
333
334        Ok(None)
335    }
336
337    /// Check if `content` is already in the persistent cache (dedup lookup only).
338    ///
339    /// Returns `Some(inline_ref)` if cached AND the ref is still fresh,
340    /// `None` if the content is not cached or the ref is stale.
341    ///
342    /// Unlike [`get_or_compress`], this method does not touch `accessed_at`
343    /// until after the freshness check — otherwise every read would make
344    /// itself "fresh."
345    pub fn check_dedup(&self, content: &[u8]) -> Result<Option<String>> {
346        let hash = Self::sha256_hex(content);
347        // Probe existence without touching accessed_at.
348        let fresh = self.is_ref_fresh(&hash);
349        if fresh {
350            let hash_prefix = &hash[..16];
351            self.record_ref_sent(&hash);
352            Ok(Some(format!("§ref:{hash_prefix}§")))
353        } else {
354            // If the entry exists but is stale, don't return a dangling ref.
355            // If it doesn't exist at all, same result: no dedup.
356            Ok(None)
357        }
358    }
359
360    /// Store a compressed result in the persistent cache, keyed by the
361    /// SHA-256 hash of the original content.
362    ///
363    /// Also records the ref as sent at the current turn for compaction tracking.
364    pub fn store_compressed(
365        &self,
366        original_content: &[u8],
367        compressed: &CompressedContent,
368    ) -> Result<()> {
369        let hash = Self::sha256_hex(original_content);
370        self.store.save_cache_entry(&hash, compressed)?;
371        self.record_ref_sent(&hash);
372        Ok(())
373    }
374
375    /// Invalidate the cache entry for `path` if its current content is known.
376    ///
377    /// Reads the file at `path`, computes its hash, and removes the matching
378    /// entry from the store.  If the file does not exist the call is a no-op.
379    pub fn invalidate(&self, path: &Path) -> Result<()> {
380        if !path.exists() {
381            return Ok(());
382        }
383        let bytes = std::fs::read(path)?;
384        let hash = Self::sha256_hex(&bytes);
385        self.store.delete_cache_entry(&hash)?;
386        Ok(())
387    }
388
389    /// Evict least-recently-used entries until total cache size is at or below
390    /// `max_size_bytes`.
391    ///
392    /// Returns the number of bytes freed.
393    pub fn evict_lru(&self) -> Result<u64> {
394        let entries = self.store.list_cache_entries_lru()?;
395
396        // Compute current total size.
397        let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
398        if total <= self.max_size_bytes {
399            return Ok(0);
400        }
401
402        let mut freed: u64 = 0;
403        let mut remaining = total;
404
405        for (hash, size) in &entries {
406            if remaining <= self.max_size_bytes {
407                break;
408            }
409            self.store.delete_cache_entry(hash)?;
410            freed += size;
411            remaining -= size;
412        }
413
414        Ok(freed)
415    }
416}
417
418// ── Tests ─────────────────────────────────────────────────────────────────────
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423    use crate::preset::{
424        BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
425        CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
426        ToolSelectionConfig, TruncateStringsConfig,
427    };
428    use crate::session_store::SessionStore;
429
430    fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
431        let dir = tempfile::tempdir().unwrap();
432        let path = dir.path().join("test.db");
433        let store = SessionStore::open_or_create(&path).unwrap();
434        (store, dir)
435    }
436
437    fn test_preset() -> Preset {
438        Preset {
439            preset: PresetMeta {
440                name: "test".into(),
441                version: "1.0".into(),
442                description: String::new(),
443            },
444            compression: CompressionConfig {
445                stages: vec![],
446                keep_fields: None,
447                strip_fields: None,
448                condense: Some(CondenseConfig {
449                    enabled: true,
450                    max_repeated_lines: 3,
451                }),
452                git_diff_fold: None,
453                strip_nulls: Some(StripNullsConfig { enabled: true }),
454                flatten: None,
455                truncate_strings: Some(TruncateStringsConfig {
456                    enabled: true,
457                    max_length: 500,
458                }),
459                collapse_arrays: Some(CollapseArraysConfig {
460                    enabled: true,
461                    max_items: 5,
462                    summary_template: "... and {remaining} more items".into(),
463                }),
464                custom_transforms: Some(CustomTransformsConfig { enabled: true }),
465            },
466            tool_selection: ToolSelectionConfig {
467                max_tools: 5,
468                similarity_threshold: 0.7,
469                default_tools: vec![],
470            },
471            budget: BudgetConfig {
472                warning_threshold: 0.70,
473                ceiling_threshold: 0.85,
474                default_window_size: 200_000,
475                agents: Default::default(),
476            },
477            terse_mode: TerseModeConfig {
478                enabled: false,
479                level: crate::preset::TerseLevel::Moderate,
480            },
481            model: ModelConfig {
482                family: "anthropic".into(),
483                primary: "claude-sonnet-4-20250514".into(),
484                local: String::new(),
485                complexity_threshold: 0.4,
486                pricing: None,
487            },
488        }
489    }
490
491    fn make_pipeline() -> CompressionPipeline {
492        CompressionPipeline::new(&test_preset())
493    }
494
495    #[test]
496    fn first_read_is_miss() {
497        let (store, _dir) = in_memory_store();
498        let cm = CacheManager::new(store, u64::MAX);
499        let pipeline = make_pipeline();
500        let content = b"hello world";
501        let result = cm
502            .get_or_compress(Path::new("file.txt"), content, &pipeline)
503            .unwrap();
504        assert!(matches!(result, CacheResult::Fresh { .. }));
505    }
506
507    #[test]
508    fn second_read_is_hit() {
509        let (store, _dir) = in_memory_store();
510        let cm = CacheManager::new(store, u64::MAX);
511        let pipeline = make_pipeline();
512        let content = b"hello world";
513        let path = Path::new("file.txt");
514
515        // First read — miss
516        cm.get_or_compress(path, content, &pipeline).unwrap();
517
518        // Second read — hit
519        let result = cm.get_or_compress(path, content, &pipeline).unwrap();
520        match result {
521            CacheResult::Dedup {
522                inline_ref,
523                token_cost,
524            } => {
525                assert!(inline_ref.starts_with("§ref:"));
526                assert!(inline_ref.ends_with('§'));
527                assert_eq!(token_cost, 13);
528            }
529            CacheResult::Fresh { .. } | CacheResult::Delta { .. } => panic!("expected cache hit"),
530        }
531    }
532
533    #[test]
534    fn different_content_is_miss() {
535        let (store, _dir) = in_memory_store();
536        let cm = CacheManager::new(store, u64::MAX);
537        let pipeline = make_pipeline();
538        let path = Path::new("file.txt");
539
540        cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
541        let result = cm
542            .get_or_compress(path, b"content v2", &pipeline)
543            .unwrap();
544        assert!(matches!(result, CacheResult::Fresh { .. } | CacheResult::Delta { .. }));
545    }
546
547    #[test]
548    fn evict_lru_frees_bytes_when_over_limit() {
549        let (store, _dir) = in_memory_store();
550        // Very small limit so eviction triggers immediately.
551        let cm = CacheManager::new(store, 1);
552        let pipeline = make_pipeline();
553        let path = Path::new("f.txt");
554
555        // Populate cache with a few entries.
556        cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
557        cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
558        cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
559
560        let freed = cm.evict_lru().unwrap();
561        assert!(freed > 0, "expected bytes to be freed");
562    }
563
564    #[test]
565    fn evict_lru_no_op_when_under_limit() {
566        let (store, _dir) = in_memory_store();
567        let cm = CacheManager::new(store, u64::MAX);
568        let pipeline = make_pipeline();
569
570        cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
571            .unwrap();
572
573        let freed = cm.evict_lru().unwrap();
574        assert_eq!(freed, 0);
575    }
576
577    #[test]
578    fn invalidate_removes_entry() {
579        let dir = tempfile::tempdir().unwrap();
580        let file_path = dir.path().join("test.txt");
581        std::fs::write(&file_path, b"some content").unwrap();
582
583        let store_path = dir.path().join("store.db");
584        let store = SessionStore::open_or_create(&store_path).unwrap();
585        let cm = CacheManager::new(store, u64::MAX);
586        let pipeline = make_pipeline();
587
588        // Populate cache.
589        let content = std::fs::read(&file_path).unwrap();
590        cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
591
592        // Verify it's a hit.
593        let hit = cm
594            .get_or_compress(&file_path, &content, &pipeline)
595            .unwrap();
596        assert!(matches!(hit, CacheResult::Dedup { .. }));
597
598        cm.invalidate(&file_path).unwrap();
599
600        let miss = cm
601            .get_or_compress(&file_path, &content, &pipeline)
602            .unwrap();
603        assert!(matches!(miss, CacheResult::Fresh { .. }));
604    }
605
606    #[test]
607    fn invalidate_nonexistent_path_is_noop() {
608        let (store, _dir) = in_memory_store();
609        let cm = CacheManager::new(store, u64::MAX);
610        // Should not error.
611        cm.invalidate(Path::new("/nonexistent/path/file.txt"))
612            .unwrap();
613    }
614
615    // ── Compaction / freshness tests ──────────────────────────────────────
616    //
617    // These tests used to exercise an in-memory turn counter. Freshness is
618    // now computed from SQLite `accessed_at` timestamps so dedup works
619    // across the shell-hook model (each hook invocation is a fresh
620    // process). The tests below use wall-clock durations instead.
621
622    #[test]
623    fn stale_ref_returns_fresh_instead_of_dedup() {
624        let (store, _dir) = in_memory_store();
625        // Set max_ref_age to 0 — every ref goes stale immediately.
626        let cm = CacheManager::with_ref_age_duration(store, u64::MAX, Duration::ZERO);
627        let pipeline = make_pipeline();
628        let content = b"hello world";
629        let path = Path::new("file.txt");
630
631        // First read — miss. accessed_at recorded.
632        cm.get_or_compress(path, content, &pipeline).unwrap();
633
634        // Second read — with TTL=0 the ref is already stale, should re-send.
635        let result = cm.get_or_compress(path, content, &pipeline).unwrap();
636        assert!(
637            matches!(result, CacheResult::Fresh { .. }),
638            "stale ref (TTL=0) should return Fresh, not Dedup"
639        );
640    }
641
642    #[test]
643    fn fresh_ref_returns_dedup() {
644        let (store, _dir) = in_memory_store();
645        // Generous TTL: one day. Refs stay fresh for the life of the test.
646        let cm = CacheManager::with_ref_age_duration(
647            store,
648            u64::MAX,
649            Duration::from_secs(86_400),
650        );
651        let pipeline = make_pipeline();
652        let content = b"hello world";
653        let path = Path::new("file.txt");
654
655        cm.get_or_compress(path, content, &pipeline).unwrap();
656        let result = cm.get_or_compress(path, content, &pipeline).unwrap();
657        assert!(
658            matches!(result, CacheResult::Dedup { .. }),
659            "fresh ref should dedup"
660        );
661    }
662
663    #[test]
664    fn notify_compaction_invalidates_all_refs() {
665        let (store, _dir) = in_memory_store();
666        let cm = CacheManager::with_ref_age_duration(
667            store,
668            u64::MAX,
669            Duration::from_secs(86_400),
670        );
671        let pipeline = make_pipeline();
672        let path = Path::new("file.txt");
673
674        // Populate cache — every subsequent read is a dedup hit.
675        cm.get_or_compress(path, b"content A", &pipeline).unwrap();
676        cm.get_or_compress(path, b"content B", &pipeline).unwrap();
677        assert!(matches!(
678            cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
679            CacheResult::Dedup { .. }
680        ));
681        assert!(matches!(
682            cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
683            CacheResult::Dedup { .. }
684        ));
685
686        // Simulate a context compaction. The compaction marker is set to
687        // `now`; any cache entry whose accessed_at predates this moment is
688        // treated as stale even though the TTL hasn't expired.
689        // Sleep 10ms to ensure `now` is strictly after the last touch.
690        std::thread::sleep(std::time::Duration::from_millis(10));
691        cm.notify_compaction();
692
693        // After compaction, refs predate the marker — re-send full content.
694        assert!(matches!(
695            cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
696            CacheResult::Fresh { .. }
697        ));
698        assert!(matches!(
699            cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
700            CacheResult::Fresh { .. }
701        ));
702    }
703
704    #[test]
705    fn ref_refreshed_after_resend() {
706        let (store, _dir) = in_memory_store();
707        // TTL of 10ms: a fresh send bumps accessed_at, so immediately after
708        // the re-send the ref is fresh again.
709        let cm = CacheManager::with_ref_age_duration(
710            store,
711            u64::MAX,
712            Duration::from_millis(10),
713        );
714        let pipeline = make_pipeline();
715        let content = b"hello world";
716        let path = Path::new("file.txt");
717
718        cm.get_or_compress(path, content, &pipeline).unwrap();
719        // Wait past the TTL so the entry is stale.
720        std::thread::sleep(std::time::Duration::from_millis(25));
721
722        // Stale — must re-send Fresh. The re-send bumps accessed_at.
723        let result = cm.get_or_compress(path, content, &pipeline).unwrap();
724        assert!(matches!(result, CacheResult::Fresh { .. }));
725
726        // Immediately read again — the freshly-updated accessed_at is
727        // within the 10ms TTL, so the ref is fresh.
728        let result = cm.get_or_compress(path, content, &pipeline).unwrap();
729        assert!(
730            matches!(result, CacheResult::Dedup { .. }),
731            "ref should be fresh after re-send"
732        );
733    }
734
735    #[test]
736    fn check_dedup_returns_none_for_stale_ref() {
737        let (store, _dir) = in_memory_store();
738        let cm = CacheManager::with_ref_age_duration(
739            store,
740            u64::MAX,
741            Duration::from_millis(10),
742        );
743        let pipeline = make_pipeline();
744        let content = b"test content";
745        let path = Path::new("file.txt");
746
747        cm.get_or_compress(path, content, &pipeline).unwrap();
748
749        // Immediately fresh.
750        assert!(cm.check_dedup(content).unwrap().is_some());
751
752        // Wait past TTL.
753        std::thread::sleep(std::time::Duration::from_millis(25));
754        assert!(
755            cm.check_dedup(content).unwrap().is_none(),
756            "stale ref should not be returned by check_dedup"
757        );
758    }
759
760    #[test]
761    fn advance_turn_increments_counter() {
762        // The counter is retained for context_evictor compatibility.
763        let (store, _dir) = in_memory_store();
764        let cm = CacheManager::new(store, u64::MAX);
765        assert_eq!(cm.current_turn(), 0);
766        cm.advance_turn();
767        assert_eq!(cm.current_turn(), 1);
768        cm.advance_turn();
769        assert_eq!(cm.current_turn(), 2);
770    }
771
772    #[test]
773    fn dedup_survives_cache_manager_restart() {
774        // Regression for the April 18 bug: the turn counter was in-memory
775        // only, so every new sqz process saw an empty ref tracker and the
776        // dedup feature silently produced Fresh results forever. With
777        // accessed_at-based freshness, a fresh CacheManager reading the
778        // same SQLite store picks up the dedup correctly.
779        let dir = tempfile::tempdir().unwrap();
780        let db_path = dir.path().join("cache.db");
781        let pipeline = make_pipeline();
782        let content = b"a substantial chunk of content to dedup";
783        let path = Path::new("x.txt");
784
785        // First "process": populate cache.
786        {
787            let store = SessionStore::open_or_create(&db_path).unwrap();
788            let cm = CacheManager::with_ref_age_duration(
789                store,
790                u64::MAX,
791                Duration::from_secs(3600),
792            );
793            let first = cm.get_or_compress(path, content, &pipeline).unwrap();
794            assert!(matches!(first, CacheResult::Fresh { .. }));
795        }
796
797        // Second "process": new CacheManager, same DB. Dedup must fire.
798        {
799            let store = SessionStore::open_or_create(&db_path).unwrap();
800            let cm = CacheManager::with_ref_age_duration(
801                store,
802                u64::MAX,
803                Duration::from_secs(3600),
804            );
805            let second = cm.get_or_compress(path, content, &pipeline).unwrap();
806            assert!(
807                matches!(second, CacheResult::Dedup { .. }),
808                "second-process read must dedup — this was broken before the April 18 fix"
809            );
810        }
811    }
812
813    #[test]
814    fn compaction_from_one_process_invalidates_refs_in_another() {
815        // Regression for the PreCompact hook wiring: the host harness
816        // (e.g. Claude Code) runs `sqz hook precompact` in a short-lived
817        // process to signal auto-compaction. The actual dedup serving runs
818        // in a DIFFERENT sqz process (the shell hook). notify_compaction
819        // must persist through SQLite so the second process sees it.
820        //
821        // Before the fix, compaction_marker was Cell<Option<DateTime>>
822        // in memory only — the precompact process set it, exited, the
823        // state was lost. Next shell-hook process started with a clean
824        // marker, served stale refs to the agent, and the agent saw a
825        // §ref:HASH§ pointing at content no longer in its context.
826        let dir = tempfile::tempdir().unwrap();
827        let db_path = dir.path().join("cache.db");
828        let pipeline = make_pipeline();
829        let content = b"content that needs stale-marking after compaction";
830        let path = Path::new("file.txt");
831        let ttl = Duration::from_secs(3600);
832
833        // Process A: populate the cache so the content is dedup-eligible.
834        {
835            let store = SessionStore::open_or_create(&db_path).unwrap();
836            let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
837            cm.get_or_compress(path, content, &pipeline).unwrap();
838        }
839        // Sleep so the compaction marker is strictly after the touch.
840        std::thread::sleep(Duration::from_millis(10));
841
842        // Process B: simulates `sqz hook precompact`. Just calls
843        // notify_compaction and exits. No reads.
844        {
845            let store = SessionStore::open_or_create(&db_path).unwrap();
846            let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
847            cm.notify_compaction();
848        }
849
850        // Process C: simulates the next `sqz compress` shell-hook call.
851        // Reads the same content. MUST re-send Fresh, not return a ref
852        // the agent can no longer resolve.
853        {
854            let store = SessionStore::open_or_create(&db_path).unwrap();
855            let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
856            let result = cm.get_or_compress(path, content, &pipeline).unwrap();
857            assert!(
858                matches!(result, CacheResult::Fresh { .. }),
859                "post-compaction read from a fresh process must re-send Fresh; \
860                 returning Dedup would be a dangling-ref bug"
861            );
862        }
863    }
864
865    use proptest::prelude::*;
866
867    // ── Property 8: Cache deduplication ──────────────────────────────────────
868    // **Validates: Requirements 8.1, 8.2, 18.1, 18.2**
869    //
870    // For any file content, reading the file twice through the CacheManager
871    // (with no content change between reads) SHALL return a cache hit on the
872    // second read with a reference token of approximately 13 tokens.
873
874    proptest! {
875        /// **Validates: Requirements 8.1, 8.2, 18.1, 18.2**
876        ///
877        /// For any file content, the second read through CacheManager SHALL be
878        /// a cache hit with tokens == 13.
879        #[test]
880        fn prop_cache_deduplication(
881            content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
882        ) {
883            let (store, _dir) = in_memory_store();
884            let cm = CacheManager::new(store, u64::MAX);
885            let pipeline = make_pipeline();
886            let path = Path::new("file.txt");
887
888            // First read — must be a miss.
889            let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
890            prop_assert!(
891                matches!(first, CacheResult::Fresh { .. }),
892                "first read should be a cache miss"
893            );
894
895            let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
896            match second {
897                CacheResult::Dedup { inline_ref, token_cost } => {
898                    prop_assert_eq!(
899                        token_cost, 13,
900                        "cache hit should report ~13 reference tokens"
901                    );
902                    prop_assert!(
903                        inline_ref.starts_with("§ref:"),
904                        "reference token should start with §ref:"
905                    );
906                    prop_assert!(
907                        inline_ref.ends_with('§'),
908                        "reference token should end with §"
909                    );
910                }
911                CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
912                    prop_assert!(false, "second read should be a cache hit, not a miss");
913                }
914            }
915        }
916    }
917
918    // ── Property 9: Cache invalidation on content change ─────────────────────
919    // **Validates: Requirements 8.3, 18.3**
920    //
921    // For any cached file, if the file content changes (producing a different
922    // SHA-256 hash), the CacheManager SHALL treat the next read as a cache miss
923    // and re-compress the updated content.
924
925    proptest! {
926        /// **Validates: Requirements 8.3, 18.3**
927        ///
928        /// For any two distinct byte sequences, the first read of each is a
929        /// cache miss — content change always triggers re-compression.
930        #[test]
931        fn prop_cache_invalidation_on_content_change(
932            content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
933            content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
934        ) {
935            // Only meaningful when the two contents differ (different hashes).
936            prop_assume!(content_a != content_b);
937
938            let (store, _dir) = in_memory_store();
939            let cm = CacheManager::new(store, u64::MAX);
940            let pipeline = make_pipeline();
941            let path = Path::new("file.txt");
942
943            // Cache content_a.
944            let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
945            prop_assert!(
946                matches!(r1, CacheResult::Fresh { .. }),
947                "first read of content_a should be a miss"
948            );
949
950            let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
951            prop_assert!(
952                matches!(r2, CacheResult::Dedup { .. }),
953                "second read of content_a should be a hit"
954            );
955
956            let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
957            prop_assert!(
958                matches!(r3, CacheResult::Fresh { .. } | CacheResult::Delta { .. }),
959                "read with changed content should be a cache miss or delta"
960            );
961        }
962    }
963
964    // ── Property 10: Cache LRU eviction ──────────────────────────────────────
965    // **Validates: Requirements 8.5**
966    //
967    // For any cache state where total size exceeds the configured maximum, the
968    // CacheManager SHALL evict entries in LRU order until total size is at or
969    // below the limit.
970
971    proptest! {
972        /// **Validates: Requirements 8.5**
973        ///
974        /// After evict_lru, the total remaining cache size SHALL be at or below
975        /// max_size_bytes.
976        #[test]
977        fn prop_cache_lru_eviction(
978            // Generate 2-8 distinct content entries.
979            entries in proptest::collection::vec(
980                proptest::collection::vec(any::<u8>(), 10..=200usize),
981                2..=8usize,
982            ),
983        ) {
984            // Deduplicate entries so each has a unique hash.
985            let mut unique_entries: Vec<Vec<u8>> = Vec::new();
986            for e in &entries {
987                if !unique_entries.contains(e) {
988                    unique_entries.push(e.clone());
989                }
990            }
991            prop_assume!(unique_entries.len() >= 2);
992
993            let (store, _dir) = in_memory_store();
994            // Use a very small limit (1 byte) to guarantee eviction is needed.
995            let cm = CacheManager::new(store, 1);
996            let pipeline = make_pipeline();
997            let path = Path::new("f.txt");
998
999            // Populate the cache.
1000            for entry in &unique_entries {
1001                cm.get_or_compress(path, entry, &pipeline).unwrap();
1002            }
1003
1004            // Evict LRU entries.
1005            let freed = cm.evict_lru().unwrap();
1006
1007            // Bytes freed must be > 0 since total > 1 byte.
1008            prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
1009
1010            // After eviction, total remaining size must be <= max_size_bytes (1).
1011            // We verify by checking that evict_lru now returns 0 (nothing left to evict).
1012            let freed_again = cm.evict_lru().unwrap();
1013            prop_assert_eq!(
1014                freed_again, 0,
1015                "second evict_lru call should free 0 bytes (already at or below limit)"
1016            );
1017        }
1018    }
1019
1020    // ── Property 34: Cache persistence across sessions ────────────────────────
1021    // **Validates: Requirements 18.4**
1022    //
1023    // For any set of cache entries saved to the SessionStore, reloading the
1024    // store (opening the same database file) SHALL produce the same cache
1025    // entries, and a subsequent read with the same content hash SHALL return a
1026    // cache hit.
1027
1028    proptest! {
1029        /// **Validates: Requirements 18.4**
1030        ///
1031        /// Cache entries written in one CacheManager instance SHALL survive
1032        /// a store close/reopen. With the wall-clock freshness model
1033        /// (introduced April 18 2026), a subsequent CacheManager reading
1034        /// the same database SHALL see the entry as fresh (within TTL) and
1035        /// return a Dedup hit on the very first read — this is the whole
1036        /// point of the cross-process fix. Previous behavior (Fresh on
1037        /// first read after restart) was a bug that silently disabled the
1038        /// dedup feature in production.
1039        #[test]
1040        fn prop_cache_persistence_across_sessions(
1041            content in proptest::collection::vec(any::<u8>(), 1..=500usize),
1042        ) {
1043            use crate::session_store::SessionStore;
1044
1045            let dir = tempfile::tempdir().unwrap();
1046            let db_path = dir.path().join("cache.db");
1047            let path = Path::new("file.txt");
1048
1049            // Session 1: populate the cache.
1050            {
1051                let store = SessionStore::open_or_create(&db_path).unwrap();
1052                // Explicit long TTL so tests don't race with wall-clock drift.
1053                let cm = CacheManager::with_ref_age_duration(
1054                    store,
1055                    u64::MAX,
1056                    Duration::from_secs(3600),
1057                );
1058                let pipeline = make_pipeline();
1059
1060                let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
1061                prop_assert!(
1062                    matches!(r, CacheResult::Fresh { .. }),
1063                    "first-ever read should be a miss"
1064                );
1065            }
1066
1067            // Session 2: reopen the same database file.
1068            {
1069                let store = SessionStore::open_or_create(&db_path).unwrap();
1070                let cm = CacheManager::with_ref_age_duration(
1071                    store,
1072                    u64::MAX,
1073                    Duration::from_secs(3600),
1074                );
1075                let pipeline = make_pipeline();
1076
1077                // First read in the new session MUST dedup. The entry was
1078                // just written (within TTL), so the wall-clock freshness
1079                // check finds it fresh. This is what makes sqz's dedup
1080                // actually work across shell-hook invocations.
1081                let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
1082                match r {
1083                    CacheResult::Dedup { token_cost, .. } => {
1084                        prop_assert_eq!(
1085                            token_cost, 13,
1086                            "first read after restart must be a 13-token dedup ref"
1087                        );
1088                    }
1089                    CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
1090                        prop_assert!(
1091                            false,
1092                            "first read after restart must dedup — this was the \
1093                             April 18 bug and its fix is the whole reason this \
1094                             test exists"
1095                        );
1096                    }
1097                }
1098            }
1099        }
1100    }
1101}
sqz_engine/cache_manager.rs

sqz_engine/
cache_manager.rs