1use std::path::Path;
2
3use sha2::{Digest, Sha256};
4
5use crate::error::Result;
6use crate::pipeline::{CompressionPipeline, SessionContext};
7use crate::preset::Preset;
8use crate::session_store::SessionStore;
9use crate::types::CompressedContent;
10
11pub enum CacheResult {
13 Dedup {
15 inline_ref: String,
17 token_cost: u32,
19 },
20 Fresh { output: CompressedContent },
22}
23
24pub struct CacheManager {
26 store: SessionStore,
27 max_size_bytes: u64,
28}
29
30impl CacheManager {
31 pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
32 Self {
33 store,
34 max_size_bytes,
35 }
36 }
37
38 fn sha256_hex(bytes: &[u8]) -> String {
40 let mut hasher = Sha256::new();
41 hasher.update(bytes);
42 format!("{:x}", hasher.finalize())
43 }
44
45 pub fn get_or_compress(
51 &self,
52 _path: &Path,
53 content: &[u8],
54 pipeline: &CompressionPipeline,
55 ) -> Result<CacheResult> {
56 let hash = Self::sha256_hex(content);
57
58 if self.store.get_cache_entry(&hash)?.is_some() {
59 let hash_prefix = &hash[..16];
60 let inline_ref = format!("§ref:{hash_prefix}§");
61 return Ok(CacheResult::Dedup {
62 inline_ref,
63 token_cost: 13,
64 });
65 }
66
67 let text = String::from_utf8_lossy(content).into_owned();
68 let ctx = SessionContext {
69 session_id: "cache".to_string(),
70 };
71 let preset = Preset::default();
72 let compressed = pipeline.compress(&text, &ctx, &preset)?;
73 self.store.save_cache_entry(&hash, &compressed)?;
74
75 Ok(CacheResult::Fresh { output: compressed })
76 }
77
78 pub fn check_dedup(&self, content: &[u8]) -> Result<Option<String>> {
82 let hash = Self::sha256_hex(content);
83 if self.store.get_cache_entry(&hash)?.is_some() {
84 let hash_prefix = &hash[..16];
85 Ok(Some(format!("§ref:{hash_prefix}§")))
86 } else {
87 Ok(None)
88 }
89 }
90
91 pub fn store_compressed(
94 &self,
95 original_content: &[u8],
96 compressed: &CompressedContent,
97 ) -> Result<()> {
98 let hash = Self::sha256_hex(original_content);
99 self.store.save_cache_entry(&hash, compressed)?;
100 Ok(())
101 }
102
103 pub fn invalidate(&self, path: &Path) -> Result<()> {
108 if !path.exists() {
109 return Ok(());
110 }
111 let bytes = std::fs::read(path)?;
112 let hash = Self::sha256_hex(&bytes);
113 self.store.delete_cache_entry(&hash)?;
114 Ok(())
115 }
116
117 pub fn evict_lru(&self) -> Result<u64> {
122 let entries = self.store.list_cache_entries_lru()?;
123
124 let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
126 if total <= self.max_size_bytes {
127 return Ok(0);
128 }
129
130 let mut freed: u64 = 0;
131 let mut remaining = total;
132
133 for (hash, size) in &entries {
134 if remaining <= self.max_size_bytes {
135 break;
136 }
137 self.store.delete_cache_entry(hash)?;
138 freed += size;
139 remaining -= size;
140 }
141
142 Ok(freed)
143 }
144}
145
146#[cfg(test)]
149mod tests {
150 use super::*;
151 use crate::preset::{
152 BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
153 CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
154 ToolSelectionConfig, TruncateStringsConfig,
155 };
156 use crate::session_store::SessionStore;
157
158 fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
159 let dir = tempfile::tempdir().unwrap();
160 let path = dir.path().join("test.db");
161 let store = SessionStore::open_or_create(&path).unwrap();
162 (store, dir)
163 }
164
165 fn test_preset() -> Preset {
166 Preset {
167 preset: PresetMeta {
168 name: "test".into(),
169 version: "1.0".into(),
170 description: String::new(),
171 },
172 compression: CompressionConfig {
173 stages: vec![],
174 keep_fields: None,
175 strip_fields: None,
176 condense: Some(CondenseConfig {
177 enabled: true,
178 max_repeated_lines: 3,
179 }),
180 git_diff_fold: None,
181 strip_nulls: Some(StripNullsConfig { enabled: true }),
182 flatten: None,
183 truncate_strings: Some(TruncateStringsConfig {
184 enabled: true,
185 max_length: 500,
186 }),
187 collapse_arrays: Some(CollapseArraysConfig {
188 enabled: true,
189 max_items: 5,
190 summary_template: "... and {remaining} more items".into(),
191 }),
192 custom_transforms: Some(CustomTransformsConfig { enabled: true }),
193 },
194 tool_selection: ToolSelectionConfig {
195 max_tools: 5,
196 similarity_threshold: 0.7,
197 default_tools: vec![],
198 },
199 budget: BudgetConfig {
200 warning_threshold: 0.70,
201 ceiling_threshold: 0.85,
202 default_window_size: 200_000,
203 agents: Default::default(),
204 },
205 terse_mode: TerseModeConfig {
206 enabled: false,
207 level: crate::preset::TerseLevel::Moderate,
208 },
209 model: ModelConfig {
210 family: "anthropic".into(),
211 primary: "claude-sonnet-4-20250514".into(),
212 local: String::new(),
213 complexity_threshold: 0.4,
214 pricing: None,
215 },
216 }
217 }
218
219 fn make_pipeline() -> CompressionPipeline {
220 CompressionPipeline::new(&test_preset())
221 }
222
223 #[test]
224 fn first_read_is_miss() {
225 let (store, _dir) = in_memory_store();
226 let cm = CacheManager::new(store, u64::MAX);
227 let pipeline = make_pipeline();
228 let content = b"hello world";
229 let result = cm
230 .get_or_compress(Path::new("file.txt"), content, &pipeline)
231 .unwrap();
232 assert!(matches!(result, CacheResult::Fresh { .. }));
233 }
234
235 #[test]
236 fn second_read_is_hit() {
237 let (store, _dir) = in_memory_store();
238 let cm = CacheManager::new(store, u64::MAX);
239 let pipeline = make_pipeline();
240 let content = b"hello world";
241 let path = Path::new("file.txt");
242
243 cm.get_or_compress(path, content, &pipeline).unwrap();
245
246 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
248 match result {
249 CacheResult::Dedup {
250 inline_ref,
251 token_cost,
252 } => {
253 assert!(inline_ref.starts_with("§ref:"));
254 assert!(inline_ref.ends_with('§'));
255 assert_eq!(token_cost, 13);
256 }
257 CacheResult::Fresh { .. } => panic!("expected cache hit"),
258 }
259 }
260
261 #[test]
262 fn different_content_is_miss() {
263 let (store, _dir) = in_memory_store();
264 let cm = CacheManager::new(store, u64::MAX);
265 let pipeline = make_pipeline();
266 let path = Path::new("file.txt");
267
268 cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
269 let result = cm
270 .get_or_compress(path, b"content v2", &pipeline)
271 .unwrap();
272 assert!(matches!(result, CacheResult::Fresh { .. }));
273 }
274
275 #[test]
276 fn evict_lru_frees_bytes_when_over_limit() {
277 let (store, _dir) = in_memory_store();
278 let cm = CacheManager::new(store, 1);
280 let pipeline = make_pipeline();
281 let path = Path::new("f.txt");
282
283 cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
285 cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
286 cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
287
288 let freed = cm.evict_lru().unwrap();
289 assert!(freed > 0, "expected bytes to be freed");
290 }
291
292 #[test]
293 fn evict_lru_no_op_when_under_limit() {
294 let (store, _dir) = in_memory_store();
295 let cm = CacheManager::new(store, u64::MAX);
296 let pipeline = make_pipeline();
297
298 cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
299 .unwrap();
300
301 let freed = cm.evict_lru().unwrap();
302 assert_eq!(freed, 0);
303 }
304
305 #[test]
306 fn invalidate_removes_entry() {
307 let dir = tempfile::tempdir().unwrap();
308 let file_path = dir.path().join("test.txt");
309 std::fs::write(&file_path, b"some content").unwrap();
310
311 let store_path = dir.path().join("store.db");
312 let store = SessionStore::open_or_create(&store_path).unwrap();
313 let cm = CacheManager::new(store, u64::MAX);
314 let pipeline = make_pipeline();
315
316 let content = std::fs::read(&file_path).unwrap();
318 cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
319
320 let hit = cm
322 .get_or_compress(&file_path, &content, &pipeline)
323 .unwrap();
324 assert!(matches!(hit, CacheResult::Dedup { .. }));
325
326 cm.invalidate(&file_path).unwrap();
327
328 let miss = cm
329 .get_or_compress(&file_path, &content, &pipeline)
330 .unwrap();
331 assert!(matches!(miss, CacheResult::Fresh { .. }));
332 }
333
334 #[test]
335 fn invalidate_nonexistent_path_is_noop() {
336 let (store, _dir) = in_memory_store();
337 let cm = CacheManager::new(store, u64::MAX);
338 cm.invalidate(Path::new("/nonexistent/path/file.txt"))
340 .unwrap();
341 }
342
343 use proptest::prelude::*;
346
347 proptest! {
355 #[test]
360 fn prop_cache_deduplication(
361 content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
362 ) {
363 let (store, _dir) = in_memory_store();
364 let cm = CacheManager::new(store, u64::MAX);
365 let pipeline = make_pipeline();
366 let path = Path::new("file.txt");
367
368 let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
370 prop_assert!(
371 matches!(first, CacheResult::Fresh { .. }),
372 "first read should be a cache miss"
373 );
374
375 let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
376 match second {
377 CacheResult::Dedup { inline_ref, token_cost } => {
378 prop_assert_eq!(
379 token_cost, 13,
380 "cache hit should report ~13 reference tokens"
381 );
382 prop_assert!(
383 inline_ref.starts_with("§ref:"),
384 "reference token should start with §ref:"
385 );
386 prop_assert!(
387 inline_ref.ends_with('§'),
388 "reference token should end with §"
389 );
390 }
391 CacheResult::Fresh { .. } => {
392 prop_assert!(false, "second read should be a cache hit, not a miss");
393 }
394 }
395 }
396 }
397
398 proptest! {
406 #[test]
411 fn prop_cache_invalidation_on_content_change(
412 content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
413 content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
414 ) {
415 prop_assume!(content_a != content_b);
417
418 let (store, _dir) = in_memory_store();
419 let cm = CacheManager::new(store, u64::MAX);
420 let pipeline = make_pipeline();
421 let path = Path::new("file.txt");
422
423 let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
425 prop_assert!(
426 matches!(r1, CacheResult::Fresh { .. }),
427 "first read of content_a should be a miss"
428 );
429
430 let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
431 prop_assert!(
432 matches!(r2, CacheResult::Dedup { .. }),
433 "second read of content_a should be a hit"
434 );
435
436 let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
437 prop_assert!(
438 matches!(r3, CacheResult::Fresh { .. }),
439 "read with changed content should be a cache miss"
440 );
441 }
442 }
443
444 proptest! {
452 #[test]
457 fn prop_cache_lru_eviction(
458 entries in proptest::collection::vec(
460 proptest::collection::vec(any::<u8>(), 10..=200usize),
461 2..=8usize,
462 ),
463 ) {
464 let mut unique_entries: Vec<Vec<u8>> = Vec::new();
466 for e in &entries {
467 if !unique_entries.contains(e) {
468 unique_entries.push(e.clone());
469 }
470 }
471 prop_assume!(unique_entries.len() >= 2);
472
473 let (store, _dir) = in_memory_store();
474 let cm = CacheManager::new(store, 1);
476 let pipeline = make_pipeline();
477 let path = Path::new("f.txt");
478
479 for entry in &unique_entries {
481 cm.get_or_compress(path, entry, &pipeline).unwrap();
482 }
483
484 let freed = cm.evict_lru().unwrap();
486
487 prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
489
490 let freed_again = cm.evict_lru().unwrap();
493 prop_assert_eq!(
494 freed_again, 0,
495 "second evict_lru call should free 0 bytes (already at or below limit)"
496 );
497 }
498 }
499
500 proptest! {
509 #[test]
514 fn prop_cache_persistence_across_sessions(
515 content in proptest::collection::vec(any::<u8>(), 1..=500usize),
516 ) {
517 use crate::session_store::SessionStore;
518
519 let dir = tempfile::tempdir().unwrap();
520 let db_path = dir.path().join("cache.db");
521 let path = Path::new("file.txt");
522
523 {
525 let store = SessionStore::open_or_create(&db_path).unwrap();
526 let cm = CacheManager::new(store, u64::MAX);
527 let pipeline = make_pipeline();
528
529 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
530 prop_assert!(
531 matches!(r, CacheResult::Fresh { .. }),
532 "first read should be a miss"
533 );
534 }
535 {
539 let store = SessionStore::open_or_create(&db_path).unwrap();
540 let cm = CacheManager::new(store, u64::MAX);
541 let pipeline = make_pipeline();
542
543 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
545 match r {
546 CacheResult::Dedup { token_cost, .. } => {
547 prop_assert_eq!(
548 token_cost, 13,
549 "persisted cache hit should report 13 tokens"
550 );
551 }
552 CacheResult::Fresh { .. } => {
553 prop_assert!(
554 false,
555 "cache entry should persist across store reopen"
556 );
557 }
558 }
559 }
560 }
561 }
562}