1use std::path::Path;
2
3use sha2::{Digest, Sha256};
4
5use crate::error::Result;
6use crate::pipeline::{CompressionPipeline, SessionContext};
7use crate::preset::Preset;
8use crate::session_store::SessionStore;
9use crate::types::CompressedContent;
10
11pub enum CacheResult {
13 Dedup {
15 inline_ref: String,
17 token_cost: u32,
19 },
20 Fresh { output: CompressedContent },
22}
23
24pub struct CacheManager {
26 store: SessionStore,
27 max_size_bytes: u64,
28}
29
30impl CacheManager {
31 pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
32 Self {
33 store,
34 max_size_bytes,
35 }
36 }
37
38 fn sha256_hex(bytes: &[u8]) -> String {
40 let mut hasher = Sha256::new();
41 hasher.update(bytes);
42 format!("{:x}", hasher.finalize())
43 }
44
45 pub fn get_or_compress(
51 &self,
52 _path: &Path,
53 content: &[u8],
54 pipeline: &CompressionPipeline,
55 ) -> Result<CacheResult> {
56 let hash = Self::sha256_hex(content);
57
58 if self.store.get_cache_entry(&hash)?.is_some() {
59 let hash_prefix = &hash[..16];
60 let inline_ref = format!("§ref:{hash_prefix}§");
61 return Ok(CacheResult::Dedup {
62 inline_ref,
63 token_cost: 13,
64 });
65 }
66
67 let text = String::from_utf8_lossy(content).into_owned();
68 let ctx = SessionContext {
69 session_id: "cache".to_string(),
70 };
71 let preset = Preset::default();
72 let compressed = pipeline.compress(&text, &ctx, &preset)?;
73 self.store.save_cache_entry(&hash, &compressed)?;
74
75 Ok(CacheResult::Fresh { output: compressed })
76 }
77
78 pub fn invalidate(&self, path: &Path) -> Result<()> {
83 if !path.exists() {
84 return Ok(());
85 }
86 let bytes = std::fs::read(path)?;
87 let hash = Self::sha256_hex(&bytes);
88 self.store.delete_cache_entry(&hash)?;
89 Ok(())
90 }
91
92 pub fn evict_lru(&self) -> Result<u64> {
97 let entries = self.store.list_cache_entries_lru()?;
98
99 let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
101 if total <= self.max_size_bytes {
102 return Ok(0);
103 }
104
105 let mut freed: u64 = 0;
106 let mut remaining = total;
107
108 for (hash, size) in &entries {
109 if remaining <= self.max_size_bytes {
110 break;
111 }
112 self.store.delete_cache_entry(hash)?;
113 freed += size;
114 remaining -= size;
115 }
116
117 Ok(freed)
118 }
119}
120
121#[cfg(test)]
124mod tests {
125 use super::*;
126 use crate::preset::{
127 BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
128 CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
129 ToolSelectionConfig, TruncateStringsConfig,
130 };
131 use crate::session_store::SessionStore;
132
133 fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
134 let dir = tempfile::tempdir().unwrap();
135 let path = dir.path().join("test.db");
136 let store = SessionStore::open_or_create(&path).unwrap();
137 (store, dir)
138 }
139
140 fn test_preset() -> Preset {
141 Preset {
142 preset: PresetMeta {
143 name: "test".into(),
144 version: "1.0".into(),
145 description: String::new(),
146 },
147 compression: CompressionConfig {
148 stages: vec![],
149 keep_fields: None,
150 strip_fields: None,
151 condense: Some(CondenseConfig {
152 enabled: true,
153 max_repeated_lines: 3,
154 }),
155 git_diff_fold: None,
156 strip_nulls: Some(StripNullsConfig { enabled: true }),
157 flatten: None,
158 truncate_strings: Some(TruncateStringsConfig {
159 enabled: true,
160 max_length: 500,
161 }),
162 collapse_arrays: Some(CollapseArraysConfig {
163 enabled: true,
164 max_items: 5,
165 summary_template: "... and {remaining} more items".into(),
166 }),
167 custom_transforms: Some(CustomTransformsConfig { enabled: true }),
168 },
169 tool_selection: ToolSelectionConfig {
170 max_tools: 5,
171 similarity_threshold: 0.7,
172 default_tools: vec![],
173 },
174 budget: BudgetConfig {
175 warning_threshold: 0.70,
176 ceiling_threshold: 0.85,
177 default_window_size: 200_000,
178 agents: Default::default(),
179 },
180 terse_mode: TerseModeConfig {
181 enabled: false,
182 level: crate::preset::TerseLevel::Moderate,
183 },
184 model: ModelConfig {
185 family: "anthropic".into(),
186 primary: "claude-sonnet-4-20250514".into(),
187 local: String::new(),
188 complexity_threshold: 0.4,
189 pricing: None,
190 },
191 }
192 }
193
194 fn make_pipeline() -> CompressionPipeline {
195 CompressionPipeline::new(&test_preset())
196 }
197
198 #[test]
199 fn first_read_is_miss() {
200 let (store, _dir) = in_memory_store();
201 let cm = CacheManager::new(store, u64::MAX);
202 let pipeline = make_pipeline();
203 let content = b"hello world";
204 let result = cm
205 .get_or_compress(Path::new("file.txt"), content, &pipeline)
206 .unwrap();
207 assert!(matches!(result, CacheResult::Fresh { .. }));
208 }
209
210 #[test]
211 fn second_read_is_hit() {
212 let (store, _dir) = in_memory_store();
213 let cm = CacheManager::new(store, u64::MAX);
214 let pipeline = make_pipeline();
215 let content = b"hello world";
216 let path = Path::new("file.txt");
217
218 cm.get_or_compress(path, content, &pipeline).unwrap();
220
221 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
223 match result {
224 CacheResult::Dedup {
225 inline_ref,
226 token_cost,
227 } => {
228 assert!(inline_ref.starts_with("§ref:"));
229 assert!(inline_ref.ends_with('§'));
230 assert_eq!(token_cost, 13);
231 }
232 CacheResult::Fresh { .. } => panic!("expected cache hit"),
233 }
234 }
235
236 #[test]
237 fn different_content_is_miss() {
238 let (store, _dir) = in_memory_store();
239 let cm = CacheManager::new(store, u64::MAX);
240 let pipeline = make_pipeline();
241 let path = Path::new("file.txt");
242
243 cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
244 let result = cm
245 .get_or_compress(path, b"content v2", &pipeline)
246 .unwrap();
247 assert!(matches!(result, CacheResult::Fresh { .. }));
248 }
249
250 #[test]
251 fn evict_lru_frees_bytes_when_over_limit() {
252 let (store, _dir) = in_memory_store();
253 let cm = CacheManager::new(store, 1);
255 let pipeline = make_pipeline();
256 let path = Path::new("f.txt");
257
258 cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
260 cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
261 cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
262
263 let freed = cm.evict_lru().unwrap();
264 assert!(freed > 0, "expected bytes to be freed");
265 }
266
267 #[test]
268 fn evict_lru_no_op_when_under_limit() {
269 let (store, _dir) = in_memory_store();
270 let cm = CacheManager::new(store, u64::MAX);
271 let pipeline = make_pipeline();
272
273 cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
274 .unwrap();
275
276 let freed = cm.evict_lru().unwrap();
277 assert_eq!(freed, 0);
278 }
279
280 #[test]
281 fn invalidate_removes_entry() {
282 let dir = tempfile::tempdir().unwrap();
283 let file_path = dir.path().join("test.txt");
284 std::fs::write(&file_path, b"some content").unwrap();
285
286 let store_path = dir.path().join("store.db");
287 let store = SessionStore::open_or_create(&store_path).unwrap();
288 let cm = CacheManager::new(store, u64::MAX);
289 let pipeline = make_pipeline();
290
291 let content = std::fs::read(&file_path).unwrap();
293 cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
294
295 let hit = cm
297 .get_or_compress(&file_path, &content, &pipeline)
298 .unwrap();
299 assert!(matches!(hit, CacheResult::Dedup { .. }));
300
301 cm.invalidate(&file_path).unwrap();
302
303 let miss = cm
304 .get_or_compress(&file_path, &content, &pipeline)
305 .unwrap();
306 assert!(matches!(miss, CacheResult::Fresh { .. }));
307 }
308
309 #[test]
310 fn invalidate_nonexistent_path_is_noop() {
311 let (store, _dir) = in_memory_store();
312 let cm = CacheManager::new(store, u64::MAX);
313 cm.invalidate(Path::new("/nonexistent/path/file.txt"))
315 .unwrap();
316 }
317
318 use proptest::prelude::*;
321
322 proptest! {
330 #[test]
335 fn prop_cache_deduplication(
336 content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
337 ) {
338 let (store, _dir) = in_memory_store();
339 let cm = CacheManager::new(store, u64::MAX);
340 let pipeline = make_pipeline();
341 let path = Path::new("file.txt");
342
343 let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
345 prop_assert!(
346 matches!(first, CacheResult::Fresh { .. }),
347 "first read should be a cache miss"
348 );
349
350 let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
351 match second {
352 CacheResult::Dedup { inline_ref, token_cost } => {
353 prop_assert_eq!(
354 token_cost, 13,
355 "cache hit should report ~13 reference tokens"
356 );
357 prop_assert!(
358 inline_ref.starts_with("§ref:"),
359 "reference token should start with §ref:"
360 );
361 prop_assert!(
362 inline_ref.ends_with('§'),
363 "reference token should end with §"
364 );
365 }
366 CacheResult::Fresh { .. } => {
367 prop_assert!(false, "second read should be a cache hit, not a miss");
368 }
369 }
370 }
371 }
372
373 proptest! {
381 #[test]
386 fn prop_cache_invalidation_on_content_change(
387 content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
388 content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
389 ) {
390 prop_assume!(content_a != content_b);
392
393 let (store, _dir) = in_memory_store();
394 let cm = CacheManager::new(store, u64::MAX);
395 let pipeline = make_pipeline();
396 let path = Path::new("file.txt");
397
398 let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
400 prop_assert!(
401 matches!(r1, CacheResult::Fresh { .. }),
402 "first read of content_a should be a miss"
403 );
404
405 let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
406 prop_assert!(
407 matches!(r2, CacheResult::Dedup { .. }),
408 "second read of content_a should be a hit"
409 );
410
411 let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
412 prop_assert!(
413 matches!(r3, CacheResult::Fresh { .. }),
414 "read with changed content should be a cache miss"
415 );
416 }
417 }
418
419 proptest! {
427 #[test]
432 fn prop_cache_lru_eviction(
433 entries in proptest::collection::vec(
435 proptest::collection::vec(any::<u8>(), 10..=200usize),
436 2..=8usize,
437 ),
438 ) {
439 let mut unique_entries: Vec<Vec<u8>> = Vec::new();
441 for e in &entries {
442 if !unique_entries.contains(e) {
443 unique_entries.push(e.clone());
444 }
445 }
446 prop_assume!(unique_entries.len() >= 2);
447
448 let (store, _dir) = in_memory_store();
449 let cm = CacheManager::new(store, 1);
451 let pipeline = make_pipeline();
452 let path = Path::new("f.txt");
453
454 for entry in &unique_entries {
456 cm.get_or_compress(path, entry, &pipeline).unwrap();
457 }
458
459 let freed = cm.evict_lru().unwrap();
461
462 prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
464
465 let freed_again = cm.evict_lru().unwrap();
468 prop_assert_eq!(
469 freed_again, 0,
470 "second evict_lru call should free 0 bytes (already at or below limit)"
471 );
472 }
473 }
474
475 proptest! {
484 #[test]
489 fn prop_cache_persistence_across_sessions(
490 content in proptest::collection::vec(any::<u8>(), 1..=500usize),
491 ) {
492 use crate::session_store::SessionStore;
493
494 let dir = tempfile::tempdir().unwrap();
495 let db_path = dir.path().join("cache.db");
496 let path = Path::new("file.txt");
497
498 {
500 let store = SessionStore::open_or_create(&db_path).unwrap();
501 let cm = CacheManager::new(store, u64::MAX);
502 let pipeline = make_pipeline();
503
504 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
505 prop_assert!(
506 matches!(r, CacheResult::Fresh { .. }),
507 "first read should be a miss"
508 );
509 }
510 {
514 let store = SessionStore::open_or_create(&db_path).unwrap();
515 let cm = CacheManager::new(store, u64::MAX);
516 let pipeline = make_pipeline();
517
518 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
520 match r {
521 CacheResult::Dedup { token_cost, .. } => {
522 prop_assert_eq!(
523 token_cost, 13,
524 "persisted cache hit should report 13 tokens"
525 );
526 }
527 CacheResult::Fresh { .. } => {
528 prop_assert!(
529 false,
530 "cache entry should persist across store reopen"
531 );
532 }
533 }
534 }
535 }
536 }
537}