1use std::collections::HashMap;
2use std::path::Path;
3
4use sha2::{Digest, Sha256};
5
6use crate::delta_encoder::DeltaEncoder;
7use crate::error::Result;
8use crate::pipeline::{CompressionPipeline, SessionContext};
9use crate::preset::Preset;
10use crate::session_store::SessionStore;
11use crate::types::CompressedContent;
12
13pub enum CacheResult {
15 Dedup {
17 inline_ref: String,
19 token_cost: u32,
21 },
22 Delta {
24 delta_text: String,
26 token_cost: u32,
28 similarity: f64,
30 },
31 Fresh { output: CompressedContent },
33}
34
35#[derive(Debug, Clone)]
37struct RefEntry {
38 last_sent_turn: u64,
40}
41
42pub struct CacheManager {
51 store: SessionStore,
52 max_size_bytes: u64,
53 delta_encoder: DeltaEncoder,
54 turn_counter: std::cell::Cell<u64>,
56 ref_tracker: std::cell::RefCell<HashMap<String, RefEntry>>,
58 max_ref_age_turns: u64,
62}
63
64impl CacheManager {
65 pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
66 Self {
67 store,
68 max_size_bytes,
69 delta_encoder: DeltaEncoder::new(),
70 turn_counter: std::cell::Cell::new(0),
71 ref_tracker: std::cell::RefCell::new(HashMap::new()),
72 max_ref_age_turns: 20,
73 }
74 }
75
76 pub fn with_ref_age(store: SessionStore, max_size_bytes: u64, max_ref_age_turns: u64) -> Self {
78 Self {
79 store,
80 max_size_bytes,
81 delta_encoder: DeltaEncoder::new(),
82 turn_counter: std::cell::Cell::new(0),
83 ref_tracker: std::cell::RefCell::new(HashMap::new()),
84 max_ref_age_turns,
85 }
86 }
87
88 fn sha256_hex(bytes: &[u8]) -> String {
90 let mut hasher = Sha256::new();
91 hasher.update(bytes);
92 format!("{:x}", hasher.finalize())
93 }
94
95 pub fn advance_turn(&self) {
97 self.turn_counter.set(self.turn_counter.get() + 1);
98 }
99
100 pub fn current_turn(&self) -> u64 {
102 self.turn_counter.get()
103 }
104
105 pub fn notify_compaction(&self) {
116 self.ref_tracker.borrow_mut().clear();
117 }
118
119 fn is_ref_fresh(&self, hash: &str) -> bool {
122 let tracker = self.ref_tracker.borrow();
123 if let Some(entry) = tracker.get(hash) {
124 let age = self.turn_counter.get().saturating_sub(entry.last_sent_turn);
125 age < self.max_ref_age_turns
126 } else {
127 false
128 }
129 }
130
131 fn record_ref_sent(&self, hash: &str) {
133 self.ref_tracker.borrow_mut().insert(
134 hash.to_string(),
135 RefEntry {
136 last_sent_turn: self.turn_counter.get(),
137 },
138 );
139 }
140
141 pub fn get_or_compress(
149 &self,
150 _path: &Path,
151 content: &[u8],
152 pipeline: &CompressionPipeline,
153 ) -> Result<CacheResult> {
154 let hash = Self::sha256_hex(content);
155
156 if self.store.get_cache_entry(&hash)?.is_some() {
158 if self.is_ref_fresh(&hash) {
159 let hash_prefix = &hash[..16];
161 let inline_ref = format!("§ref:{hash_prefix}§");
162 self.record_ref_sent(&hash);
164 return Ok(CacheResult::Dedup {
165 inline_ref,
166 token_cost: 13,
167 });
168 } else {
169 let text = String::from_utf8_lossy(content).into_owned();
172 let ctx = SessionContext {
173 session_id: "cache".to_string(),
174 };
175 let preset = Preset::default();
176 let compressed = pipeline.compress(&text, &ctx, &preset)?;
177 self.record_ref_sent(&hash);
179 return Ok(CacheResult::Fresh { output: compressed });
180 }
181 }
182
183 let text = String::from_utf8_lossy(content).into_owned();
185 if let Some(delta_result) = self.try_delta_encode(&text)? {
186 let ctx = SessionContext {
188 session_id: "cache".to_string(),
189 };
190 let preset = Preset::default();
191 let compressed = pipeline.compress(&text, &ctx, &preset)?;
192 self.store.save_cache_entry(&hash, &compressed)?;
193 self.record_ref_sent(&hash);
194
195 let token_cost = (delta_result.delta_text.len() / 4) as u32;
196 return Ok(CacheResult::Delta {
197 delta_text: delta_result.delta_text,
198 token_cost: token_cost.max(5),
199 similarity: delta_result.similarity,
200 });
201 }
202
203 let ctx = SessionContext {
204 session_id: "cache".to_string(),
205 };
206 let preset = Preset::default();
207 let compressed = pipeline.compress(&text, &ctx, &preset)?;
208 self.store.save_cache_entry(&hash, &compressed)?;
209 self.record_ref_sent(&hash);
211
212 Ok(CacheResult::Fresh { output: compressed })
213 }
214
215 fn try_delta_encode(
218 &self,
219 new_content: &str,
220 ) -> Result<Option<crate::delta_encoder::DeltaResult>> {
221 let entries = self.store.list_cache_entries_lru()?;
222
223 let check_count = entries.len().min(10);
225 for (hash, _) in entries.iter().rev().take(check_count) {
226 if let Some(cached) = self.store.get_cache_entry(hash)? {
227 let hash_prefix = &hash[..hash.len().min(16)];
228 if let Ok(Some(delta)) =
229 self.delta_encoder
230 .encode(&cached.data, new_content, hash_prefix)
231 {
232 if delta.delta_text.len() < new_content.len() {
234 return Ok(Some(delta));
235 }
236 }
237 }
238 }
239
240 Ok(None)
241 }
242
243 pub fn check_dedup(&self, content: &[u8]) -> Result<Option<String>> {
248 let hash = Self::sha256_hex(content);
249 if self.store.get_cache_entry(&hash)?.is_some() {
250 if self.is_ref_fresh(&hash) {
251 let hash_prefix = &hash[..16];
252 self.record_ref_sent(&hash);
253 Ok(Some(format!("§ref:{hash_prefix}§")))
254 } else {
255 Ok(None)
257 }
258 } else {
259 Ok(None)
260 }
261 }
262
263 pub fn store_compressed(
268 &self,
269 original_content: &[u8],
270 compressed: &CompressedContent,
271 ) -> Result<()> {
272 let hash = Self::sha256_hex(original_content);
273 self.store.save_cache_entry(&hash, compressed)?;
274 self.record_ref_sent(&hash);
275 Ok(())
276 }
277
278 pub fn invalidate(&self, path: &Path) -> Result<()> {
283 if !path.exists() {
284 return Ok(());
285 }
286 let bytes = std::fs::read(path)?;
287 let hash = Self::sha256_hex(&bytes);
288 self.store.delete_cache_entry(&hash)?;
289 Ok(())
290 }
291
292 pub fn evict_lru(&self) -> Result<u64> {
297 let entries = self.store.list_cache_entries_lru()?;
298
299 let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
301 if total <= self.max_size_bytes {
302 return Ok(0);
303 }
304
305 let mut freed: u64 = 0;
306 let mut remaining = total;
307
308 for (hash, size) in &entries {
309 if remaining <= self.max_size_bytes {
310 break;
311 }
312 self.store.delete_cache_entry(hash)?;
313 freed += size;
314 remaining -= size;
315 }
316
317 Ok(freed)
318 }
319}
320
321#[cfg(test)]
324mod tests {
325 use super::*;
326 use crate::preset::{
327 BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
328 CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
329 ToolSelectionConfig, TruncateStringsConfig,
330 };
331 use crate::session_store::SessionStore;
332
333 fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
334 let dir = tempfile::tempdir().unwrap();
335 let path = dir.path().join("test.db");
336 let store = SessionStore::open_or_create(&path).unwrap();
337 (store, dir)
338 }
339
340 fn test_preset() -> Preset {
341 Preset {
342 preset: PresetMeta {
343 name: "test".into(),
344 version: "1.0".into(),
345 description: String::new(),
346 },
347 compression: CompressionConfig {
348 stages: vec![],
349 keep_fields: None,
350 strip_fields: None,
351 condense: Some(CondenseConfig {
352 enabled: true,
353 max_repeated_lines: 3,
354 }),
355 git_diff_fold: None,
356 strip_nulls: Some(StripNullsConfig { enabled: true }),
357 flatten: None,
358 truncate_strings: Some(TruncateStringsConfig {
359 enabled: true,
360 max_length: 500,
361 }),
362 collapse_arrays: Some(CollapseArraysConfig {
363 enabled: true,
364 max_items: 5,
365 summary_template: "... and {remaining} more items".into(),
366 }),
367 custom_transforms: Some(CustomTransformsConfig { enabled: true }),
368 },
369 tool_selection: ToolSelectionConfig {
370 max_tools: 5,
371 similarity_threshold: 0.7,
372 default_tools: vec![],
373 },
374 budget: BudgetConfig {
375 warning_threshold: 0.70,
376 ceiling_threshold: 0.85,
377 default_window_size: 200_000,
378 agents: Default::default(),
379 },
380 terse_mode: TerseModeConfig {
381 enabled: false,
382 level: crate::preset::TerseLevel::Moderate,
383 },
384 model: ModelConfig {
385 family: "anthropic".into(),
386 primary: "claude-sonnet-4-20250514".into(),
387 local: String::new(),
388 complexity_threshold: 0.4,
389 pricing: None,
390 },
391 }
392 }
393
394 fn make_pipeline() -> CompressionPipeline {
395 CompressionPipeline::new(&test_preset())
396 }
397
398 #[test]
399 fn first_read_is_miss() {
400 let (store, _dir) = in_memory_store();
401 let cm = CacheManager::new(store, u64::MAX);
402 let pipeline = make_pipeline();
403 let content = b"hello world";
404 let result = cm
405 .get_or_compress(Path::new("file.txt"), content, &pipeline)
406 .unwrap();
407 assert!(matches!(result, CacheResult::Fresh { .. }));
408 }
409
410 #[test]
411 fn second_read_is_hit() {
412 let (store, _dir) = in_memory_store();
413 let cm = CacheManager::new(store, u64::MAX);
414 let pipeline = make_pipeline();
415 let content = b"hello world";
416 let path = Path::new("file.txt");
417
418 cm.get_or_compress(path, content, &pipeline).unwrap();
420
421 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
423 match result {
424 CacheResult::Dedup {
425 inline_ref,
426 token_cost,
427 } => {
428 assert!(inline_ref.starts_with("§ref:"));
429 assert!(inline_ref.ends_with('§'));
430 assert_eq!(token_cost, 13);
431 }
432 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => panic!("expected cache hit"),
433 }
434 }
435
436 #[test]
437 fn different_content_is_miss() {
438 let (store, _dir) = in_memory_store();
439 let cm = CacheManager::new(store, u64::MAX);
440 let pipeline = make_pipeline();
441 let path = Path::new("file.txt");
442
443 cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
444 let result = cm
445 .get_or_compress(path, b"content v2", &pipeline)
446 .unwrap();
447 assert!(matches!(result, CacheResult::Fresh { .. } | CacheResult::Delta { .. }));
448 }
449
450 #[test]
451 fn evict_lru_frees_bytes_when_over_limit() {
452 let (store, _dir) = in_memory_store();
453 let cm = CacheManager::new(store, 1);
455 let pipeline = make_pipeline();
456 let path = Path::new("f.txt");
457
458 cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
460 cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
461 cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
462
463 let freed = cm.evict_lru().unwrap();
464 assert!(freed > 0, "expected bytes to be freed");
465 }
466
467 #[test]
468 fn evict_lru_no_op_when_under_limit() {
469 let (store, _dir) = in_memory_store();
470 let cm = CacheManager::new(store, u64::MAX);
471 let pipeline = make_pipeline();
472
473 cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
474 .unwrap();
475
476 let freed = cm.evict_lru().unwrap();
477 assert_eq!(freed, 0);
478 }
479
480 #[test]
481 fn invalidate_removes_entry() {
482 let dir = tempfile::tempdir().unwrap();
483 let file_path = dir.path().join("test.txt");
484 std::fs::write(&file_path, b"some content").unwrap();
485
486 let store_path = dir.path().join("store.db");
487 let store = SessionStore::open_or_create(&store_path).unwrap();
488 let cm = CacheManager::new(store, u64::MAX);
489 let pipeline = make_pipeline();
490
491 let content = std::fs::read(&file_path).unwrap();
493 cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
494
495 let hit = cm
497 .get_or_compress(&file_path, &content, &pipeline)
498 .unwrap();
499 assert!(matches!(hit, CacheResult::Dedup { .. }));
500
501 cm.invalidate(&file_path).unwrap();
502
503 let miss = cm
504 .get_or_compress(&file_path, &content, &pipeline)
505 .unwrap();
506 assert!(matches!(miss, CacheResult::Fresh { .. }));
507 }
508
509 #[test]
510 fn invalidate_nonexistent_path_is_noop() {
511 let (store, _dir) = in_memory_store();
512 let cm = CacheManager::new(store, u64::MAX);
513 cm.invalidate(Path::new("/nonexistent/path/file.txt"))
515 .unwrap();
516 }
517
518 #[test]
521 fn stale_ref_returns_fresh_instead_of_dedup() {
522 let (store, _dir) = in_memory_store();
523 let cm = CacheManager::with_ref_age(store, u64::MAX, 3);
525 let pipeline = make_pipeline();
526 let content = b"hello world";
527 let path = Path::new("file.txt");
528
529 cm.get_or_compress(path, content, &pipeline).unwrap();
531
532 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
534 assert!(matches!(result, CacheResult::Dedup { .. }), "ref should be fresh at turn 0");
535
536 for _ in 0..4 {
538 cm.advance_turn();
539 }
540
541 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
543 assert!(matches!(result, CacheResult::Fresh { .. }),
544 "stale ref should return Fresh, not Dedup");
545 }
546
547 #[test]
548 fn notify_compaction_invalidates_all_refs() {
549 let (store, _dir) = in_memory_store();
550 let cm = CacheManager::new(store, u64::MAX);
551 let pipeline = make_pipeline();
552 let path = Path::new("file.txt");
553
554 cm.get_or_compress(path, b"content A", &pipeline).unwrap();
556 cm.get_or_compress(path, b"content B", &pipeline).unwrap();
557
558 assert!(matches!(
560 cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
561 CacheResult::Dedup { .. }
562 ));
563 assert!(matches!(
564 cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
565 CacheResult::Dedup { .. }
566 ));
567
568 cm.notify_compaction();
570
571 assert!(matches!(
573 cm.get_or_compress(path, b"content A", &pipeline).unwrap(),
574 CacheResult::Fresh { .. }
575 ));
576 assert!(matches!(
577 cm.get_or_compress(path, b"content B", &pipeline).unwrap(),
578 CacheResult::Fresh { .. }
579 ));
580 }
581
582 #[test]
583 fn ref_refreshed_after_resend() {
584 let (store, _dir) = in_memory_store();
585 let cm = CacheManager::with_ref_age(store, u64::MAX, 3);
586 let pipeline = make_pipeline();
587 let content = b"hello world";
588 let path = Path::new("file.txt");
589
590 cm.get_or_compress(path, content, &pipeline).unwrap();
592
593 for _ in 0..4 {
595 cm.advance_turn();
596 }
597
598 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
600 assert!(matches!(result, CacheResult::Fresh { .. }));
601
602 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
604 assert!(matches!(result, CacheResult::Dedup { .. }),
605 "ref should be fresh after re-send");
606 }
607
608 #[test]
609 fn check_dedup_returns_none_for_stale_ref() {
610 let (store, _dir) = in_memory_store();
611 let cm = CacheManager::with_ref_age(store, u64::MAX, 2);
612 let pipeline = make_pipeline();
613 let content = b"test content";
614 let path = Path::new("file.txt");
615
616 cm.get_or_compress(path, content, &pipeline).unwrap();
618
619 assert!(cm.check_dedup(content).unwrap().is_some());
621
622 for _ in 0..3 {
624 cm.advance_turn();
625 }
626
627 assert!(cm.check_dedup(content).unwrap().is_none(),
629 "stale ref should not be returned by check_dedup");
630 }
631
632 #[test]
633 fn advance_turn_increments_counter() {
634 let (store, _dir) = in_memory_store();
635 let cm = CacheManager::new(store, u64::MAX);
636 assert_eq!(cm.current_turn(), 0);
637 cm.advance_turn();
638 assert_eq!(cm.current_turn(), 1);
639 cm.advance_turn();
640 assert_eq!(cm.current_turn(), 2);
641 }
642
643 use proptest::prelude::*;
646
647 proptest! {
655 #[test]
660 fn prop_cache_deduplication(
661 content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
662 ) {
663 let (store, _dir) = in_memory_store();
664 let cm = CacheManager::new(store, u64::MAX);
665 let pipeline = make_pipeline();
666 let path = Path::new("file.txt");
667
668 let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
670 prop_assert!(
671 matches!(first, CacheResult::Fresh { .. }),
672 "first read should be a cache miss"
673 );
674
675 let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
676 match second {
677 CacheResult::Dedup { inline_ref, token_cost } => {
678 prop_assert_eq!(
679 token_cost, 13,
680 "cache hit should report ~13 reference tokens"
681 );
682 prop_assert!(
683 inline_ref.starts_with("§ref:"),
684 "reference token should start with §ref:"
685 );
686 prop_assert!(
687 inline_ref.ends_with('§'),
688 "reference token should end with §"
689 );
690 }
691 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
692 prop_assert!(false, "second read should be a cache hit, not a miss");
693 }
694 }
695 }
696 }
697
698 proptest! {
706 #[test]
711 fn prop_cache_invalidation_on_content_change(
712 content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
713 content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
714 ) {
715 prop_assume!(content_a != content_b);
717
718 let (store, _dir) = in_memory_store();
719 let cm = CacheManager::new(store, u64::MAX);
720 let pipeline = make_pipeline();
721 let path = Path::new("file.txt");
722
723 let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
725 prop_assert!(
726 matches!(r1, CacheResult::Fresh { .. }),
727 "first read of content_a should be a miss"
728 );
729
730 let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
731 prop_assert!(
732 matches!(r2, CacheResult::Dedup { .. }),
733 "second read of content_a should be a hit"
734 );
735
736 let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
737 prop_assert!(
738 matches!(r3, CacheResult::Fresh { .. } | CacheResult::Delta { .. }),
739 "read with changed content should be a cache miss or delta"
740 );
741 }
742 }
743
744 proptest! {
752 #[test]
757 fn prop_cache_lru_eviction(
758 entries in proptest::collection::vec(
760 proptest::collection::vec(any::<u8>(), 10..=200usize),
761 2..=8usize,
762 ),
763 ) {
764 let mut unique_entries: Vec<Vec<u8>> = Vec::new();
766 for e in &entries {
767 if !unique_entries.contains(e) {
768 unique_entries.push(e.clone());
769 }
770 }
771 prop_assume!(unique_entries.len() >= 2);
772
773 let (store, _dir) = in_memory_store();
774 let cm = CacheManager::new(store, 1);
776 let pipeline = make_pipeline();
777 let path = Path::new("f.txt");
778
779 for entry in &unique_entries {
781 cm.get_or_compress(path, entry, &pipeline).unwrap();
782 }
783
784 let freed = cm.evict_lru().unwrap();
786
787 prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
789
790 let freed_again = cm.evict_lru().unwrap();
793 prop_assert_eq!(
794 freed_again, 0,
795 "second evict_lru call should free 0 bytes (already at or below limit)"
796 );
797 }
798 }
799
800 proptest! {
809 #[test]
817 fn prop_cache_persistence_across_sessions(
818 content in proptest::collection::vec(any::<u8>(), 1..=500usize),
819 ) {
820 use crate::session_store::SessionStore;
821
822 let dir = tempfile::tempdir().unwrap();
823 let db_path = dir.path().join("cache.db");
824 let path = Path::new("file.txt");
825
826 {
828 let store = SessionStore::open_or_create(&db_path).unwrap();
829 let cm = CacheManager::new(store, u64::MAX);
830 let pipeline = make_pipeline();
831
832 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
833 prop_assert!(
834 matches!(r, CacheResult::Fresh { .. }),
835 "first read should be a miss"
836 );
837 }
838 {
842 let store = SessionStore::open_or_create(&db_path).unwrap();
843 let cm = CacheManager::new(store, u64::MAX);
844 let pipeline = make_pipeline();
845
846 let r1 = cm.get_or_compress(path, &content, &pipeline).unwrap();
850 prop_assert!(
851 matches!(r1, CacheResult::Fresh { .. }),
852 "first read after restart should re-send (ref tracker empty)"
853 );
854
855 let r2 = cm.get_or_compress(path, &content, &pipeline).unwrap();
858 match r2 {
859 CacheResult::Dedup { token_cost, .. } => {
860 prop_assert_eq!(
861 token_cost, 13,
862 "second read in same session should be dedup hit"
863 );
864 }
865 CacheResult::Fresh { .. } | CacheResult::Delta { .. } => {
866 prop_assert!(
867 false,
868 "second read should be a dedup hit after re-send"
869 );
870 }
871 }
872 }
873 }
874 }
875}