sqz_engine/cache_manager.rs
1use std::collections::HashMap;
2use std::path::Path;
3use std::time::Duration;
4
5use sha2::{Digest, Sha256};
6
7use crate::delta_encoder::DeltaEncoder;
8use crate::error::Result;
9use crate::pipeline::{CompressionPipeline, SessionContext};
10use crate::preset::Preset;
11use crate::session_store::SessionStore;
12use crate::types::CompressedContent;
13
14/// Outcome of a cache lookup in [`CacheManager`].
15///
16/// The cache has three possible outcomes:
17/// - **Dedup**: exact match, returns a tiny `§ref:HASH§` token (~13 tokens)
18/// - **Delta**: near-duplicate, returns a compact diff against the cached version
19/// - **Fresh**: cache miss, returns the full compressed output
20pub enum CacheResult {
21 /// Previously seen content — returns a short inline reference (~13 tokens).
22 Dedup {
23 /// Inline token of the form `§ref:<hash_prefix>§`.
24 inline_ref: String,
25 /// Approximate token cost of the reference (always 13).
26 token_cost: u32,
27 },
28 /// Near-duplicate of cached content — returns a compact delta.
29 Delta {
30 /// The delta text (header + changed lines).
31 delta_text: String,
32 /// Approximate token cost of the delta.
33 token_cost: u32,
34 /// Similarity to the cached version (0.0–1.0).
35 similarity: f64,
36 },
37 /// Content not seen before — full compression result.
38 Fresh { output: CompressedContent },
39}
40
41/// Tracks when a dedup ref was last sent, so we can detect staleness.
42///
43/// Historically used for an in-memory per-process turn counter; now kept
44/// only for interface compatibility (clear on notify_compaction). Actual
45/// staleness is computed from SQLite `accessed_at` timestamps so it works
46/// across the shell-hook invocation model where each sqz process is short-
47/// lived. See the comment on `is_ref_fresh` for details.
48#[derive(Debug, Clone)]
49#[allow(dead_code)]
50struct RefEntry {
51 /// The turn number when this ref was last sent to the LLM.
52 last_sent_turn: u64,
53}
54
55/// SHA-256 content-hash deduplication cache backed by [`SessionStore`],
56/// with delta encoding for near-duplicate content and compaction awareness.
57///
58/// # Freshness model
59///
60/// A dedup ref is considered fresh (safe to serve instead of the full
61/// content) when the cache entry's `accessed_at` timestamp in SQLite is
62/// within `max_ref_age` of now. When sqz is invoked from shell hooks each
63/// invocation is a short-lived process, so the freshness check must be
64/// persistent — in-memory state is gone the moment the process exits.
65///
66/// The previous turn-counter heuristic was in-memory only and therefore
67/// never registered freshness across hook invocations, which silently
68/// disabled the dedup feature in production. Issue found April 18 2026.
69///
70/// Default TTL: 30 minutes. Empirically matches a typical active coding
71/// session before a context compaction. Use [`with_ref_age`] to tune.
72pub struct CacheManager {
73 store: SessionStore,
74 max_size_bytes: u64,
75 delta_encoder: DeltaEncoder,
76 /// Retained for notify_compaction's semantic ("forget all tracked refs"),
77 /// but no longer consulted for freshness checks.
78 #[allow(dead_code)]
79 turn_counter: std::cell::Cell<u64>,
80 /// Retained for notify_compaction; cleared on compaction events.
81 #[allow(dead_code)]
82 ref_tracker: std::cell::RefCell<HashMap<String, RefEntry>>,
83 /// Maximum wall-clock age before a dedup ref is considered stale.
84 /// After this duration we assume the LLM's context window has rolled
85 /// over enough to have dropped the original content, so we re-send the
86 /// full version instead of a dangling ref.
87 max_ref_age: Duration,
88 /// Records the instant at which the in-memory compaction flag was set.
89 /// Any cache entry whose `accessed_at` predates this instant is stale.
90 /// Reset by [`notify_compaction`].
91 compaction_marker: std::cell::Cell<Option<chrono::DateTime<chrono::Utc>>>,
92}
93
94impl CacheManager {
95 /// Create a new cache manager backed by the given session store.
96 ///
97 /// `max_size_bytes` controls when LRU eviction kicks in. A good default
98 /// is 512 MB (`512 * 1024 * 1024`). Dedup refs go stale after 30 minutes
99 /// of wall-clock time by default — use [`with_ref_age`] to tune.
100 pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
101 Self::with_ref_age_duration(store, max_size_bytes, Duration::from_secs(30 * 60))
102 }
103
104 /// Create a CacheManager with a custom ref staleness threshold measured
105 /// in turns. The turn count is converted to wall-clock time by assuming
106 /// ~1 minute per turn (a rough approximation; the real freshness check
107 /// uses SQLite timestamps). This constructor exists for backward
108 /// compatibility with tests that previously advanced a turn counter.
109 #[doc(hidden)]
110 pub fn with_ref_age(store: SessionStore, max_size_bytes: u64, max_ref_age_turns: u64) -> Self {
111 Self::with_ref_age_duration(
112 store,
113 max_size_bytes,
114 Duration::from_secs(max_ref_age_turns.saturating_mul(60)),
115 )
116 }
117
118 /// Create a CacheManager with an explicit wall-clock ref-age cap.
119 pub fn with_ref_age_duration(
120 store: SessionStore,
121 max_size_bytes: u64,
122 max_ref_age: Duration,
123 ) -> Self {
124 Self {
125 store,
126 max_size_bytes,
127 delta_encoder: DeltaEncoder::new(),
128 turn_counter: std::cell::Cell::new(0),
129 ref_tracker: std::cell::RefCell::new(HashMap::new()),
130 max_ref_age,
131 compaction_marker: std::cell::Cell::new(None),
132 }
133 }
134
135 /// Compute the SHA-256 hex digest of `bytes`.
136 fn sha256_hex(bytes: &[u8]) -> String {
137 let mut hasher = Sha256::new();
138 hasher.update(bytes);
139 format!("{:x}", hasher.finalize())
140 }
141
142 /// Advance the turn counter. Retained for API compatibility; not used
143 /// for freshness. The context_evictor still reads `current_turn` for
144 /// LRU scoring during `sqz compact`.
145 pub fn advance_turn(&self) {
146 self.turn_counter.set(self.turn_counter.get() + 1);
147 }
148
149 /// Get the current turn number. Used by the context_evictor for scoring.
150 pub fn current_turn(&self) -> u64 {
151 self.turn_counter.get()
152 }
153
154 /// Notify the cache that a context compaction has occurred.
155 ///
156 /// Persists a compaction timestamp into the session store so any cache
157 /// entry whose `accessed_at` predates the marker is considered stale
158 /// by **every subsequent sqz process**, not just this one. The shell-
159 /// hook invocation model means this method is typically called from a
160 /// short-lived `sqz hook precompact` process, and the check runs in a
161 /// different `sqz compress` process milliseconds later.
162 ///
163 /// Call this when:
164 /// - The harness signals a compaction event (PreCompact hook)
165 /// - A session is resumed after being idle
166 /// - The user runs `sqz compact`
167 pub fn notify_compaction(&self) {
168 let now = chrono::Utc::now();
169 self.compaction_marker.set(Some(now));
170 self.ref_tracker.borrow_mut().clear();
171 // Persist the marker so other sqz processes see the invalidation.
172 // Silently swallow a write error: losing the marker means some
173 // refs may survive the compaction and show as dedup hits in the
174 // next few calls — annoying, not wrong (the agent still receives
175 // valid content; it just sees a short-ref it has to resolve).
176 let _ = self
177 .store
178 .set_metadata("last_compaction_at", &now.to_rfc3339());
179 }
180
181 /// Check if a dedup ref for the given hash is still fresh (likely still
182 /// in the LLM's context window).
183 ///
184 /// Uses the SQLite `accessed_at` timestamp rather than the in-memory
185 /// turn counter. This works across sqz process invocations: shell hooks
186 /// spawn a new sqz process per intercepted command, so any in-memory
187 /// counter would reset every time. The database survives.
188 ///
189 /// The compaction marker is read from SQLite on every check so that
190 /// a `sqz hook precompact` call from another process immediately
191 /// invalidates refs in the current process. Without the persistent
192 /// read, the invalidation would only affect the process that called
193 /// notify_compaction — which is never the same process that serves
194 /// dedup hits.
195 fn is_ref_fresh(&self, hash: &str) -> bool {
196 let accessed = match self.store.get_cache_entry_accessed_at(hash) {
197 Ok(Some(ts)) => ts,
198 _ => return false,
199 };
200 // In-memory compaction marker (set in this process).
201 if let Some(marker) = self.compaction_marker.get() {
202 if accessed < marker {
203 return false;
204 }
205 }
206 // Persistent compaction marker — set by `sqz hook precompact` in
207 // a different process. Without this read the in-memory marker is
208 // never consulted because each hook invocation is a fresh process.
209 if let Ok(Some(raw)) = self.store.get_metadata("last_compaction_at") {
210 if let Ok(marker) = raw.parse::<chrono::DateTime<chrono::Utc>>() {
211 if accessed < marker {
212 return false;
213 }
214 }
215 }
216 let age = (chrono::Utc::now() - accessed)
217 .to_std()
218 .unwrap_or(Duration::from_secs(0));
219 age < self.max_ref_age
220 }
221
222 /// Record that a dedup ref was sent for the given hash. Updates the
223 /// persistent `accessed_at` timestamp so subsequent freshness checks
224 /// see this send. Silently swallows SQLite errors — losing a touch
225 /// means the next call may treat the ref as stale and re-send, which
226 /// is strictly worse on tokens but never wrong.
227 fn record_ref_sent(&self, hash: &str) {
228 let _ = self.store.touch_cache_entry(hash);
229 }
230
231 /// Look up `content` in the cache with compaction awareness.
232 ///
233 /// - On exact dedup with fresh ref: return `CacheResult::Dedup` (~13 tokens).
234 /// - On exact dedup with stale ref: re-compress and return `CacheResult::Fresh`
235 /// (the original content may have been compacted out of the LLM's context).
236 /// - On near-duplicate: return `CacheResult::Delta` with a compact diff.
237 /// - On cache miss: compress via `pipeline`, persist, return `CacheResult::Fresh`.
238 pub fn get_or_compress(
239 &self,
240 _path: &Path,
241 content: &[u8],
242 pipeline: &CompressionPipeline,
243 ) -> Result<CacheResult> {
244 let hash = Self::sha256_hex(content);
245
246 // Exact match — check if the ref is still fresh
247 // Exact match — probe without touching accessed_at, then check
248 // freshness. Touching on the probe would make every ref appear
249 // fresh immediately (the timestamp we just wrote is `now`).
250 let exists = self.store.cache_entry_exists(&hash)?;
251 if exists {
252 if self.is_ref_fresh(&hash) {
253 // Ref is fresh — the LLM likely still has the original in context
254 let hash_prefix = &hash[..16];
255 let inline_ref = format!("§ref:{hash_prefix}§");
256 // Update the sent timestamp
257 self.record_ref_sent(&hash);
258 return Ok(CacheResult::Dedup {
259 inline_ref,
260 token_cost: 13,
261 });
262 } else {
263 // Ref is stale — re-send the full compressed content.
264 // The original may have been compacted out of the LLM's context.
265 let text = String::from_utf8_lossy(content).into_owned();
266 let ctx = SessionContext {
267 session_id: "cache".to_string(),
268 };
269 let preset = Preset::default();
270 let compressed = pipeline.compress(&text, &ctx, &preset)?;
271 // Record that we re-sent this content
272 self.record_ref_sent(&hash);
273 return Ok(CacheResult::Fresh { output: compressed });
274 }
275 }
276
277 // Near-duplicate check: compare against recent cache entries
278 let text = String::from_utf8_lossy(content).into_owned();
279 if let Some(delta_result) = self.try_delta_encode(&text)? {
280 // Store the new content in cache for future exact matches
281 let ctx = SessionContext {
282 session_id: "cache".to_string(),
283 };
284 let preset = Preset::default();
285 let compressed = pipeline.compress(&text, &ctx, &preset)?;
286 self.store.save_cache_entry(&hash, &compressed)?;
287 self.record_ref_sent(&hash);
288
289 let token_cost = (delta_result.delta_text.len() / 4) as u32;
290 return Ok(CacheResult::Delta {
291 delta_text: delta_result.delta_text,
292 token_cost: token_cost.max(5),
293 similarity: delta_result.similarity,
294 });
295 }
296
297 let ctx = SessionContext {
298 session_id: "cache".to_string(),
299 };
300 let preset = Preset::default();
301 let compressed = pipeline.compress(&text, &ctx, &preset)?;
302 self.store.save_cache_entry(&hash, &compressed)?;
303 // Record that this content was sent at the current turn
304 self.record_ref_sent(&hash);
305
306 Ok(CacheResult::Fresh { output: compressed })
307 }
308
309 /// Try to delta-encode content against recent cache entries.
310 /// Returns Some(DeltaResult) if a near-duplicate was found.
311 fn try_delta_encode(
312 &self,
313 new_content: &str,
314 ) -> Result<Option<crate::delta_encoder::DeltaResult>> {
315 let entries = self.store.list_cache_entries_lru()?;
316
317 // Check the most recent entries (up to 10) for near-duplicates
318 let check_count = entries.len().min(10);
319 for (hash, _) in entries.iter().rev().take(check_count) {
320 if let Some(cached) = self.store.get_cache_entry(hash)? {
321 let hash_prefix = &hash[..hash.len().min(16)];
322 if let Ok(Some(delta)) =
323 self.delta_encoder
324 .encode(&cached.data, new_content, hash_prefix)
325 {
326 // Only use delta if it's actually smaller than the full content
327 if delta.delta_text.len() < new_content.len() {
328 return Ok(Some(delta));
329 }
330 }
331 }
332 }
333
334 Ok(None)
335 }
336
337 /// Check if `content` is already in the persistent cache (dedup lookup only).
338 ///
339 /// Returns `Some(inline_ref)` if cached AND the ref is still fresh,
340 /// `None` if the content is not cached or the ref is stale.
341 ///
342 /// Unlike [`get_or_compress`], this method does not touch `accessed_at`
343 /// until after the freshness check — otherwise every read would make
344 /// itself "fresh."
345 pub fn check_dedup(&self, content: &[u8]) -> Result<Option<String>> {
346 let hash = Self::sha256_hex(content);
347 // Probe existence without touching accessed_at.
348 let fresh = self.is_ref_fresh(&hash);
349 if fresh {
350 let hash_prefix = &hash[..16];
351 self.record_ref_sent(&hash);
352 Ok(Some(format!("§ref:{hash_prefix}§")))
353 } else {
354 // If the entry exists but is stale, don't return a dangling ref.
355 // If it doesn't exist at all, same result: no dedup.
356 Ok(None)
357 }
358 }
359
360 /// Store a compressed result in the persistent cache, keyed by the
361 /// SHA-256 hash of the original content.
362 ///
363 /// Also records the ref as sent at the current turn for compaction tracking.
364 pub fn store_compressed(
365 &self,
366 original_content: &[u8],
367 compressed: &CompressedContent,
368 ) -> Result<()> {
369 let hash = Self::sha256_hex(original_content);
370 self.store.save_cache_entry(&hash, compressed)?;
371 self.record_ref_sent(&hash);
372 Ok(())
373 }
374
375 /// Invalidate the cache entry for `path` if its current content is known.
376 ///
377 /// Reads the file at `path`, computes its hash, and removes the matching
378 /// entry from the store. If the file does not exist the call is a no-op.
379 pub fn invalidate(&self, path: &Path) -> Result<()> {
380 if !path.exists() {
381 return Ok(());
382 }
383 let bytes = std::fs::read(path)?;
384 let hash = Self::sha256_hex(&bytes);
385 self.store.delete_cache_entry(&hash)?;
386 Ok(())
387 }
388
389 /// Evict least-recently-used entries until total cache size is at or below
390 /// `max_size_bytes`.
391 ///
392 /// Returns the number of bytes freed.
393 pub fn evict_lru(&self) -> Result<u64> {
394 let entries = self.store.list_cache_entries_lru()?;
395
396 // Compute current total size.
397 let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
398 if total <= self.max_size_bytes {
399 return Ok(0);
400 }
401
402 let mut freed: u64 = 0;
403 let mut remaining = total;
404
405 for (hash, size) in &entries {
406 if remaining <= self.max_size_bytes {
407 break;
408 }
409 self.store.delete_cache_entry(hash)?;
410 freed += size;
411 remaining -= size;
412 }
413
414 Ok(freed)
415 }
416}
417
418// ── Tests ─────────────────────────────────────────────────────────────────────
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423 use crate::preset::{
424 BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
425 CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
426 ToolSelectionConfig, TruncateStringsConfig,
427 };
428 use crate::session_store::SessionStore;
429
430 fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
431 let dir = tempfile::tempdir().unwrap();
432 let path = dir.path().join("test.db");
433 let store = SessionStore::open_or_create(&path).unwrap();
434 (store, dir)
435 }
436
437 fn test_preset() -> Preset {
438 Preset {
439 preset: PresetMeta {
440 name: "test".into(),
441 version: "1.0".into(),
442 description: String::new(),
443 },
444 compression: CompressionConfig {
445 stages: vec![],
446 keep_fields: None,
447 strip_fields: None,
448 condense: Some(CondenseConfig {
449 enabled: true,
450 max_repeated_lines: 3,
451 }),
452 git_diff_fold: None,
453 strip_nulls: Some(StripNullsConfig { enabled: true }),
454 flatten: None,
455 truncate_strings: Some(TruncateStringsConfig {
456 enabled: true,
457 max_length: 500,
458 }),
459 collapse_arrays: Some(CollapseArraysConfig {
460 enabled: true,
461 max_items: 5,
462 summary_template: "... and {remaining} more items".into(),
463 }),
464 custom_transforms: Some(CustomTransformsConfig { enabled: true }),
465 },
466 tool_selection: ToolSelectionConfig {
467 max_tools: 5,
468 similarity_threshold: 0.7,
469 default_tools: vec![],
470 },
471 budget: BudgetConfig {
472 warning_threshold: 0.70,
473 ceiling_threshold: 0.85,
474 default_window_size: 200_000,
475 agents: Default::default(),
476 },
477 terse_mode: TerseModeConfig {
478 enabled: false,
479 level: crate::preset::TerseLevel::Moderate,
480 },
481 model: ModelConfig {
482 family: "anthropic".into(),
483 primary: "claude-sonnet-4-20250514".into(),
484 local: String::new(),
485 complexity_threshold: 0.4,
486 pricing: None,
487 },
488 }
489 }
490
491 fn make_pipeline() -> CompressionPipeline {
492 CompressionPipeline::new(&test_preset())
493 }
494
495 #[test]
496 fn first_read_is_miss() {
497 let (store, _dir) = in_memory_store();
498 let cm = CacheManager::new(store, u64::MAX);
499 let pipeline = make_pipeline();
500 let content = b"hello world";
501 let result = cm
502 .get_or_compress(Path::new("file.txt"), content, &pipeline)
503 .unwrap();
504 assert!(matches!(result, CacheResult::Fresh { .. }));
505 }
506
507 #[test]
508 fn second_read_is_hit() {
509 let (store, _dir) = in_memory_store();
510 let cm = CacheManager::new(store, u64::MAX);
511 let pipeline = make_pipeline();
512 let content = b"hello world";
513 let path = Path::new("file.txt");
514
515 // First read — miss
516 cm.get_or_compress(path, content, &pipeline).unwrap();
517
518 // Second read — hit
519 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
520 match result {
521 CacheResult::Dedup {
522 inline_ref,
523 token_cost,
524 } => {
525 assert!(inline_ref.starts_with("§ref:"));
526 assert!(inline_ref.ends_with('§'));
527 assert_eq!(token_cost, 13);
528 }
529 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => panic!("expected cache hit"),
530 }
531 }
532
533 #[test]
534 fn different_content_is_miss() {
535 let (store, _dir) = in_memory_store();
536 let cm = CacheManager::new(store, u64::MAX);
537 let pipeline = make_pipeline();
538 let path = Path::new("file.txt");
539
540 cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
541 let result = cm
542 .get_or_compress(path, b"content v2", &pipeline)
543 .unwrap();
544 assert!(matches!(result, CacheResult::Fresh { .. } | CacheResult::Delta { .. }));
545 }
546
547 #[test]
548 fn evict_lru_frees_bytes_when_over_limit() {
549 let (store, _dir) = in_memory_store();
550 // Very small limit so eviction triggers immediately.
551 let cm = CacheManager::new(store, 1);
552 let pipeline = make_pipeline();
553 let path = Path::new("f.txt");
554
555 // Populate cache with a few entries.
556 cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
557 cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
558 cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
559
560 let freed = cm.evict_lru().unwrap();
561 assert!(freed > 0, "expected bytes to be freed");
562 }
563
564 #[test]
565 fn evict_lru_no_op_when_under_limit() {
566 let (store, _dir) = in_memory_store();
567 let cm = CacheManager::new(store, u64::MAX);
568 let pipeline = make_pipeline();
569
570 cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
571 .unwrap();
572
573 let freed = cm.evict_lru().unwrap();
574 assert_eq!(freed, 0);
575 }
576
577 #[test]
578 fn invalidate_removes_entry() {
579 let dir = tempfile::tempdir().unwrap();
580 let file_path = dir.path().join("test.txt");
581 std::fs::write(&file_path, b"some content").unwrap();
582
583 let store_path = dir.path().join("store.db");
584 let store = SessionStore::open_or_create(&store_path).unwrap();
585 let cm = CacheManager::new(store, u64::MAX);
586 let pipeline = make_pipeline();
587
588 // Populate cache.
589 let content = std::fs::read(&file_path).unwrap();
590 cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
591
592 // Verify it's a hit.
593 let hit = cm
594 .get_or_compress(&file_path, &content, &pipeline)
595 .unwrap();
596 assert!(matches!(hit, CacheResult::Dedup { .. }));
597
598 cm.invalidate(&file_path).unwrap();
599
600 let miss = cm
601 .get_or_compress(&file_path, &content, &pipeline)
602 .unwrap();
603 assert!(matches!(miss, CacheResult::Fresh { .. }));
604 }
605
606 #[test]
607 fn invalidate_nonexistent_path_is_noop() {
608 let (store, _dir) = in_memory_store();
609 let cm = CacheManager::new(store, u64::MAX);
610 // Should not error.
611 cm.invalidate(Path::new("/nonexistent/path/file.txt"))
612 .unwrap();
613 }
614
615 // ── Compaction / freshness tests ──────────────────────────────────────
616 //
617 // These tests used to exercise an in-memory turn counter. Freshness is
618 // now computed from SQLite `accessed_at` timestamps so dedup works
619 // across the shell-hook model (each hook invocation is a fresh
620 // process). The tests below use wall-clock durations instead.
621
622 #[test]
623 fn stale_ref_returns_fresh_instead_of_dedup() {
624 let (store, _dir) = in_memory_store();
625 // Set max_ref_age to 0 — every ref goes stale immediately.
626 let cm = CacheManager::with_ref_age_duration(store, u64::MAX, Duration::ZERO);
627 let pipeline = make_pipeline();
628 let content = b"hello world";
629 let path = Path::new("file.txt");
630
631 // First read — miss. accessed_at recorded.
632 cm.get_or_compress(path, content, &pipeline).unwrap();
633
634 // Second read — with TTL=0 the ref is already stale, should re-send.
635 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
636 assert!(
637 matches!(result, CacheResult::Fresh { .. }),
638 "stale ref (TTL=0) should return Fresh, not Dedup"
639 );
640 }
641
642 #[test]
643 fn fresh_ref_returns_dedup() {
644 let (store, _dir) = in_memory_store();
645 // Generous TTL: one day. Refs stay fresh for the life of the test.
646 let cm = CacheManager::with_ref_age_duration(
647 store,
648 u64::MAX,
649 Duration::from_secs(86_400),
650 );
651 let pipeline = make_pipeline();
652 let content = b"hello world";
653 let path = Path::new("file.txt");
654
655 cm.get_or_compress(path, content, &pipeline).unwrap();
656 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
657 assert!(
658 matches!(result, CacheResult::Dedup { .. }),
659 "fresh ref should dedup"
660 );
661 }
662
663 #[test]
664 fn notify_compaction_invalidates_all_refs() {
665 let (store, _dir) = in_memory_store();
666 let cm = CacheManager::with_ref_age_duration(
667 store,
668 u64::MAX,
669 Duration::from_secs(86_400),
670 );
671 let pipeline = make_pipeline();
672 let path = Path::new("file.txt");
673
674 // Populate cache — every subsequent read is a dedup hit.
675 cm.get_or_compress(path, b"content A", &pipeline).unwrap();
676 cm.get_or_compress(path, b"content B", &pipeline).unwrap();
677 assert!(matches!(
678 cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
679 CacheResult::Dedup { .. }
680 ));
681 assert!(matches!(
682 cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
683 CacheResult::Dedup { .. }
684 ));
685
686 // Simulate a context compaction. The compaction marker is set to
687 // `now`; any cache entry whose accessed_at predates this moment is
688 // treated as stale even though the TTL hasn't expired.
689 // Sleep 10ms to ensure `now` is strictly after the last touch.
690 std::thread::sleep(std::time::Duration::from_millis(10));
691 cm.notify_compaction();
692
693 // After compaction, refs predate the marker — re-send full content.
694 assert!(matches!(
695 cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
696 CacheResult::Fresh { .. }
697 ));
698 assert!(matches!(
699 cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
700 CacheResult::Fresh { .. }
701 ));
702 }
703
704 #[test]
705 fn ref_refreshed_after_resend() {
706 let (store, _dir) = in_memory_store();
707 // TTL of 10ms: a fresh send bumps accessed_at, so immediately after
708 // the re-send the ref is fresh again.
709 let cm = CacheManager::with_ref_age_duration(
710 store,
711 u64::MAX,
712 Duration::from_millis(10),
713 );
714 let pipeline = make_pipeline();
715 let content = b"hello world";
716 let path = Path::new("file.txt");
717
718 cm.get_or_compress(path, content, &pipeline).unwrap();
719 // Wait past the TTL so the entry is stale.
720 std::thread::sleep(std::time::Duration::from_millis(25));
721
722 // Stale — must re-send Fresh. The re-send bumps accessed_at.
723 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
724 assert!(matches!(result, CacheResult::Fresh { .. }));
725
726 // Immediately read again — the freshly-updated accessed_at is
727 // within the 10ms TTL, so the ref is fresh.
728 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
729 assert!(
730 matches!(result, CacheResult::Dedup { .. }),
731 "ref should be fresh after re-send"
732 );
733 }
734
735 #[test]
736 fn check_dedup_returns_none_for_stale_ref() {
737 let (store, _dir) = in_memory_store();
738 let cm = CacheManager::with_ref_age_duration(
739 store,
740 u64::MAX,
741 Duration::from_millis(10),
742 );
743 let pipeline = make_pipeline();
744 let content = b"test content";
745 let path = Path::new("file.txt");
746
747 cm.get_or_compress(path, content, &pipeline).unwrap();
748
749 // Immediately fresh.
750 assert!(cm.check_dedup(content).unwrap().is_some());
751
752 // Wait past TTL.
753 std::thread::sleep(std::time::Duration::from_millis(25));
754 assert!(
755 cm.check_dedup(content).unwrap().is_none(),
756 "stale ref should not be returned by check_dedup"
757 );
758 }
759
760 #[test]
761 fn advance_turn_increments_counter() {
762 // The counter is retained for context_evictor compatibility.
763 let (store, _dir) = in_memory_store();
764 let cm = CacheManager::new(store, u64::MAX);
765 assert_eq!(cm.current_turn(), 0);
766 cm.advance_turn();
767 assert_eq!(cm.current_turn(), 1);
768 cm.advance_turn();
769 assert_eq!(cm.current_turn(), 2);
770 }
771
772 #[test]
773 fn dedup_survives_cache_manager_restart() {
774 // Regression for the April 18 bug: the turn counter was in-memory
775 // only, so every new sqz process saw an empty ref tracker and the
776 // dedup feature silently produced Fresh results forever. With
777 // accessed_at-based freshness, a fresh CacheManager reading the
778 // same SQLite store picks up the dedup correctly.
779 let dir = tempfile::tempdir().unwrap();
780 let db_path = dir.path().join("cache.db");
781 let pipeline = make_pipeline();
782 let content = b"a substantial chunk of content to dedup";
783 let path = Path::new("x.txt");
784
785 // First "process": populate cache.
786 {
787 let store = SessionStore::open_or_create(&db_path).unwrap();
788 let cm = CacheManager::with_ref_age_duration(
789 store,
790 u64::MAX,
791 Duration::from_secs(3600),
792 );
793 let first = cm.get_or_compress(path, content, &pipeline).unwrap();
794 assert!(matches!(first, CacheResult::Fresh { .. }));
795 }
796
797 // Second "process": new CacheManager, same DB. Dedup must fire.
798 {
799 let store = SessionStore::open_or_create(&db_path).unwrap();
800 let cm = CacheManager::with_ref_age_duration(
801 store,
802 u64::MAX,
803 Duration::from_secs(3600),
804 );
805 let second = cm.get_or_compress(path, content, &pipeline).unwrap();
806 assert!(
807 matches!(second, CacheResult::Dedup { .. }),
808 "second-process read must dedup — this was broken before the April 18 fix"
809 );
810 }
811 }
812
813 #[test]
814 fn compaction_from_one_process_invalidates_refs_in_another() {
815 // Regression for the PreCompact hook wiring: the host harness
816 // (e.g. Claude Code) runs `sqz hook precompact` in a short-lived
817 // process to signal auto-compaction. The actual dedup serving runs
818 // in a DIFFERENT sqz process (the shell hook). notify_compaction
819 // must persist through SQLite so the second process sees it.
820 //
821 // Before the fix, compaction_marker was Cell<Option<DateTime>>
822 // in memory only — the precompact process set it, exited, the
823 // state was lost. Next shell-hook process started with a clean
824 // marker, served stale refs to the agent, and the agent saw a
825 // §ref:HASH§ pointing at content no longer in its context.
826 let dir = tempfile::tempdir().unwrap();
827 let db_path = dir.path().join("cache.db");
828 let pipeline = make_pipeline();
829 let content = b"content that needs stale-marking after compaction";
830 let path = Path::new("file.txt");
831 let ttl = Duration::from_secs(3600);
832
833 // Process A: populate the cache so the content is dedup-eligible.
834 {
835 let store = SessionStore::open_or_create(&db_path).unwrap();
836 let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
837 cm.get_or_compress(path, content, &pipeline).unwrap();
838 }
839 // Sleep so the compaction marker is strictly after the touch.
840 std::thread::sleep(Duration::from_millis(10));
841
842 // Process B: simulates `sqz hook precompact`. Just calls
843 // notify_compaction and exits. No reads.
844 {
845 let store = SessionStore::open_or_create(&db_path).unwrap();
846 let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
847 cm.notify_compaction();
848 }
849
850 // Process C: simulates the next `sqz compress` shell-hook call.
851 // Reads the same content. MUST re-send Fresh, not return a ref
852 // the agent can no longer resolve.
853 {
854 let store = SessionStore::open_or_create(&db_path).unwrap();
855 let cm = CacheManager::with_ref_age_duration(store, u64::MAX, ttl);
856 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
857 assert!(
858 matches!(result, CacheResult::Fresh { .. }),
859 "post-compaction read from a fresh process must re-send Fresh; \
860 returning Dedup would be a dangling-ref bug"
861 );
862 }
863 }
864
865 use proptest::prelude::*;
866
867 // ── Property 8: Cache deduplication ──────────────────────────────────────
868 // **Validates: Requirements 8.1, 8.2, 18.1, 18.2**
869 //
870 // For any file content, reading the file twice through the CacheManager
871 // (with no content change between reads) SHALL return a cache hit on the
872 // second read with a reference token of approximately 13 tokens.
873
874 proptest! {
875 /// **Validates: Requirements 8.1, 8.2, 18.1, 18.2**
876 ///
877 /// For any file content, the second read through CacheManager SHALL be
878 /// a cache hit with tokens == 13.
879 #[test]
880 fn prop_cache_deduplication(
881 content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
882 ) {
883 let (store, _dir) = in_memory_store();
884 let cm = CacheManager::new(store, u64::MAX);
885 let pipeline = make_pipeline();
886 let path = Path::new("file.txt");
887
888 // First read — must be a miss.
889 let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
890 prop_assert!(
891 matches!(first, CacheResult::Fresh { .. }),
892 "first read should be a cache miss"
893 );
894
895 let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
896 match second {
897 CacheResult::Dedup { inline_ref, token_cost } => {
898 prop_assert_eq!(
899 token_cost, 13,
900 "cache hit should report ~13 reference tokens"
901 );
902 prop_assert!(
903 inline_ref.starts_with("§ref:"),
904 "reference token should start with §ref:"
905 );
906 prop_assert!(
907 inline_ref.ends_with('§'),
908 "reference token should end with §"
909 );
910 }
911 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
912 prop_assert!(false, "second read should be a cache hit, not a miss");
913 }
914 }
915 }
916 }
917
918 // ── Property 9: Cache invalidation on content change ─────────────────────
919 // **Validates: Requirements 8.3, 18.3**
920 //
921 // For any cached file, if the file content changes (producing a different
922 // SHA-256 hash), the CacheManager SHALL treat the next read as a cache miss
923 // and re-compress the updated content.
924
925 proptest! {
926 /// **Validates: Requirements 8.3, 18.3**
927 ///
928 /// For any two distinct byte sequences, the first read of each is a
929 /// cache miss — content change always triggers re-compression.
930 #[test]
931 fn prop_cache_invalidation_on_content_change(
932 content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
933 content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
934 ) {
935 // Only meaningful when the two contents differ (different hashes).
936 prop_assume!(content_a != content_b);
937
938 let (store, _dir) = in_memory_store();
939 let cm = CacheManager::new(store, u64::MAX);
940 let pipeline = make_pipeline();
941 let path = Path::new("file.txt");
942
943 // Cache content_a.
944 let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
945 prop_assert!(
946 matches!(r1, CacheResult::Fresh { .. }),
947 "first read of content_a should be a miss"
948 );
949
950 let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
951 prop_assert!(
952 matches!(r2, CacheResult::Dedup { .. }),
953 "second read of content_a should be a hit"
954 );
955
956 let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
957 prop_assert!(
958 matches!(r3, CacheResult::Fresh { .. } | CacheResult::Delta { .. }),
959 "read with changed content should be a cache miss or delta"
960 );
961 }
962 }
963
964 // ── Property 10: Cache LRU eviction ──────────────────────────────────────
965 // **Validates: Requirements 8.5**
966 //
967 // For any cache state where total size exceeds the configured maximum, the
968 // CacheManager SHALL evict entries in LRU order until total size is at or
969 // below the limit.
970
971 proptest! {
972 /// **Validates: Requirements 8.5**
973 ///
974 /// After evict_lru, the total remaining cache size SHALL be at or below
975 /// max_size_bytes.
976 #[test]
977 fn prop_cache_lru_eviction(
978 // Generate 2-8 distinct content entries.
979 entries in proptest::collection::vec(
980 proptest::collection::vec(any::<u8>(), 10..=200usize),
981 2..=8usize,
982 ),
983 ) {
984 // Deduplicate entries so each has a unique hash.
985 let mut unique_entries: Vec<Vec<u8>> = Vec::new();
986 for e in &entries {
987 if !unique_entries.contains(e) {
988 unique_entries.push(e.clone());
989 }
990 }
991 prop_assume!(unique_entries.len() >= 2);
992
993 let (store, _dir) = in_memory_store();
994 // Use a very small limit (1 byte) to guarantee eviction is needed.
995 let cm = CacheManager::new(store, 1);
996 let pipeline = make_pipeline();
997 let path = Path::new("f.txt");
998
999 // Populate the cache.
1000 for entry in &unique_entries {
1001 cm.get_or_compress(path, entry, &pipeline).unwrap();
1002 }
1003
1004 // Evict LRU entries.
1005 let freed = cm.evict_lru().unwrap();
1006
1007 // Bytes freed must be > 0 since total > 1 byte.
1008 prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
1009
1010 // After eviction, total remaining size must be <= max_size_bytes (1).
1011 // We verify by checking that evict_lru now returns 0 (nothing left to evict).
1012 let freed_again = cm.evict_lru().unwrap();
1013 prop_assert_eq!(
1014 freed_again, 0,
1015 "second evict_lru call should free 0 bytes (already at or below limit)"
1016 );
1017 }
1018 }
1019
1020 // ── Property 34: Cache persistence across sessions ────────────────────────
1021 // **Validates: Requirements 18.4**
1022 //
1023 // For any set of cache entries saved to the SessionStore, reloading the
1024 // store (opening the same database file) SHALL produce the same cache
1025 // entries, and a subsequent read with the same content hash SHALL return a
1026 // cache hit.
1027
1028 proptest! {
1029 /// **Validates: Requirements 18.4**
1030 ///
1031 /// Cache entries written in one CacheManager instance SHALL survive
1032 /// a store close/reopen. With the wall-clock freshness model
1033 /// (introduced April 18 2026), a subsequent CacheManager reading
1034 /// the same database SHALL see the entry as fresh (within TTL) and
1035 /// return a Dedup hit on the very first read — this is the whole
1036 /// point of the cross-process fix. Previous behavior (Fresh on
1037 /// first read after restart) was a bug that silently disabled the
1038 /// dedup feature in production.
1039 #[test]
1040 fn prop_cache_persistence_across_sessions(
1041 content in proptest::collection::vec(any::<u8>(), 1..=500usize),
1042 ) {
1043 use crate::session_store::SessionStore;
1044
1045 let dir = tempfile::tempdir().unwrap();
1046 let db_path = dir.path().join("cache.db");
1047 let path = Path::new("file.txt");
1048
1049 // Session 1: populate the cache.
1050 {
1051 let store = SessionStore::open_or_create(&db_path).unwrap();
1052 // Explicit long TTL so tests don't race with wall-clock drift.
1053 let cm = CacheManager::with_ref_age_duration(
1054 store,
1055 u64::MAX,
1056 Duration::from_secs(3600),
1057 );
1058 let pipeline = make_pipeline();
1059
1060 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
1061 prop_assert!(
1062 matches!(r, CacheResult::Fresh { .. }),
1063 "first-ever read should be a miss"
1064 );
1065 }
1066
1067 // Session 2: reopen the same database file.
1068 {
1069 let store = SessionStore::open_or_create(&db_path).unwrap();
1070 let cm = CacheManager::with_ref_age_duration(
1071 store,
1072 u64::MAX,
1073 Duration::from_secs(3600),
1074 );
1075 let pipeline = make_pipeline();
1076
1077 // First read in the new session MUST dedup. The entry was
1078 // just written (within TTL), so the wall-clock freshness
1079 // check finds it fresh. This is what makes sqz's dedup
1080 // actually work across shell-hook invocations.
1081 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
1082 match r {
1083 CacheResult::Dedup { token_cost, .. } => {
1084 prop_assert_eq!(
1085 token_cost, 13,
1086 "first read after restart must be a 13-token dedup ref"
1087 );
1088 }
1089 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
1090 prop_assert!(
1091 false,
1092 "first read after restart must dedup — this was the \
1093 April 18 bug and its fix is the whole reason this \
1094 test exists"
1095 );
1096 }
1097 }
1098 }
1099 }
1100 }
1101}