1use std::path::Path;
2
3use sha2::{Digest, Sha256};
4
5use crate::error::Result;
6use crate::pipeline::{CompressionPipeline, SessionContext};
7use crate::preset::Preset;
8use crate::session_store::SessionStore;
9use crate::types::CompressedContent;
10
11pub enum CacheResult {
13 Hit {
15 reference_token: String,
16 tokens: u32,
17 },
18 Miss { compressed: CompressedContent },
20}
21
22pub struct CacheManager {
24 store: SessionStore,
25 max_size_bytes: u64,
26}
27
28impl CacheManager {
29 pub fn new(store: SessionStore, max_size_bytes: u64) -> Self {
30 Self {
31 store,
32 max_size_bytes,
33 }
34 }
35
36 fn sha256_hex(bytes: &[u8]) -> String {
38 let mut hasher = Sha256::new();
39 hasher.update(bytes);
40 format!("{:x}", hasher.finalize())
41 }
42
43 pub fn get_or_compress(
49 &self,
50 _path: &Path,
51 content: &[u8],
52 pipeline: &CompressionPipeline,
53 ) -> Result<CacheResult> {
54 let hash = Self::sha256_hex(content);
55
56 if self.store.get_cache_entry(&hash)?.is_some() {
57 let hash_prefix = &hash[..16];
60 let reference_token = format!("§ref:{hash_prefix}§");
61 return Ok(CacheResult::Hit {
62 reference_token,
63 tokens: 13,
64 });
65 }
66
67 let text = String::from_utf8_lossy(content).into_owned();
69 let ctx = SessionContext {
70 session_id: "cache".to_string(),
71 };
72 let preset = Preset::default();
73 let compressed = pipeline.compress(&text, &ctx, &preset)?;
74 self.store.save_cache_entry(&hash, &compressed)?;
75
76 Ok(CacheResult::Miss { compressed })
77 }
78
79 pub fn invalidate(&self, path: &Path) -> Result<()> {
84 if !path.exists() {
85 return Ok(());
86 }
87 let bytes = std::fs::read(path)?;
88 let hash = Self::sha256_hex(&bytes);
89 self.store.delete_cache_entry(&hash)?;
90 Ok(())
91 }
92
93 pub fn evict_lru(&self) -> Result<u64> {
98 let entries = self.store.list_cache_entries_lru()?;
99
100 let total: u64 = entries.iter().map(|(_, sz)| sz).sum();
102 if total <= self.max_size_bytes {
103 return Ok(0);
104 }
105
106 let mut freed: u64 = 0;
107 let mut remaining = total;
108
109 for (hash, size) in &entries {
110 if remaining <= self.max_size_bytes {
111 break;
112 }
113 self.store.delete_cache_entry(hash)?;
114 freed += size;
115 remaining -= size;
116 }
117
118 Ok(freed)
119 }
120}
121
122#[cfg(test)]
125mod tests {
126 use super::*;
127 use crate::preset::{
128 BudgetConfig, CollapseArraysConfig, CompressionConfig, CondenseConfig,
129 CustomTransformsConfig, ModelConfig, PresetMeta, StripNullsConfig, TerseModeConfig,
130 ToolSelectionConfig, TruncateStringsConfig,
131 };
132 use crate::session_store::SessionStore;
133
134 fn in_memory_store() -> (SessionStore, tempfile::TempDir) {
135 let dir = tempfile::tempdir().unwrap();
136 let path = dir.path().join("test.db");
137 let store = SessionStore::open_or_create(&path).unwrap();
138 (store, dir)
139 }
140
141 fn test_preset() -> Preset {
142 Preset {
143 preset: PresetMeta {
144 name: "test".into(),
145 version: "1.0".into(),
146 description: String::new(),
147 },
148 compression: CompressionConfig {
149 stages: vec![],
150 keep_fields: None,
151 strip_fields: None,
152 condense: Some(CondenseConfig {
153 enabled: true,
154 max_repeated_lines: 3,
155 }),
156 strip_nulls: Some(StripNullsConfig { enabled: true }),
157 flatten: None,
158 truncate_strings: Some(TruncateStringsConfig {
159 enabled: true,
160 max_length: 500,
161 }),
162 collapse_arrays: Some(CollapseArraysConfig {
163 enabled: true,
164 max_items: 5,
165 summary_template: "... and {remaining} more items".into(),
166 }),
167 custom_transforms: Some(CustomTransformsConfig { enabled: true }),
168 },
169 tool_selection: ToolSelectionConfig {
170 max_tools: 5,
171 similarity_threshold: 0.7,
172 default_tools: vec![],
173 },
174 budget: BudgetConfig {
175 warning_threshold: 0.70,
176 ceiling_threshold: 0.85,
177 default_window_size: 200_000,
178 agents: Default::default(),
179 },
180 terse_mode: TerseModeConfig {
181 enabled: false,
182 level: crate::preset::TerseLevel::Moderate,
183 },
184 model: ModelConfig {
185 family: "anthropic".into(),
186 primary: "claude-sonnet-4-20250514".into(),
187 local: String::new(),
188 complexity_threshold: 0.4,
189 pricing: None,
190 },
191 }
192 }
193
194 fn make_pipeline() -> CompressionPipeline {
195 CompressionPipeline::new(&test_preset())
196 }
197
198 #[test]
199 fn first_read_is_miss() {
200 let (store, _dir) = in_memory_store();
201 let cm = CacheManager::new(store, u64::MAX);
202 let pipeline = make_pipeline();
203 let content = b"hello world";
204 let result = cm
205 .get_or_compress(Path::new("file.txt"), content, &pipeline)
206 .unwrap();
207 assert!(matches!(result, CacheResult::Miss { .. }));
208 }
209
210 #[test]
211 fn second_read_is_hit() {
212 let (store, _dir) = in_memory_store();
213 let cm = CacheManager::new(store, u64::MAX);
214 let pipeline = make_pipeline();
215 let content = b"hello world";
216 let path = Path::new("file.txt");
217
218 cm.get_or_compress(path, content, &pipeline).unwrap();
220
221 let result = cm.get_or_compress(path, content, &pipeline).unwrap();
223 match result {
224 CacheResult::Hit {
225 reference_token,
226 tokens,
227 } => {
228 assert!(reference_token.starts_with("§ref:"));
229 assert!(reference_token.ends_with('§'));
230 assert_eq!(tokens, 13);
231 }
232 CacheResult::Miss { .. } => panic!("expected cache hit"),
233 }
234 }
235
236 #[test]
237 fn different_content_is_miss() {
238 let (store, _dir) = in_memory_store();
239 let cm = CacheManager::new(store, u64::MAX);
240 let pipeline = make_pipeline();
241 let path = Path::new("file.txt");
242
243 cm.get_or_compress(path, b"content v1", &pipeline).unwrap();
244 let result = cm
245 .get_or_compress(path, b"content v2", &pipeline)
246 .unwrap();
247 assert!(matches!(result, CacheResult::Miss { .. }));
248 }
249
250 #[test]
251 fn evict_lru_frees_bytes_when_over_limit() {
252 let (store, _dir) = in_memory_store();
253 let cm = CacheManager::new(store, 1);
255 let pipeline = make_pipeline();
256 let path = Path::new("f.txt");
257
258 cm.get_or_compress(path, b"entry one", &pipeline).unwrap();
260 cm.get_or_compress(path, b"entry two", &pipeline).unwrap();
261 cm.get_or_compress(path, b"entry three", &pipeline).unwrap();
262
263 let freed = cm.evict_lru().unwrap();
264 assert!(freed > 0, "expected bytes to be freed");
265 }
266
267 #[test]
268 fn evict_lru_no_op_when_under_limit() {
269 let (store, _dir) = in_memory_store();
270 let cm = CacheManager::new(store, u64::MAX);
271 let pipeline = make_pipeline();
272
273 cm.get_or_compress(Path::new("f.txt"), b"data", &pipeline)
274 .unwrap();
275
276 let freed = cm.evict_lru().unwrap();
277 assert_eq!(freed, 0);
278 }
279
280 #[test]
281 fn invalidate_removes_entry() {
282 let dir = tempfile::tempdir().unwrap();
283 let file_path = dir.path().join("test.txt");
284 std::fs::write(&file_path, b"some content").unwrap();
285
286 let store_path = dir.path().join("store.db");
287 let store = SessionStore::open_or_create(&store_path).unwrap();
288 let cm = CacheManager::new(store, u64::MAX);
289 let pipeline = make_pipeline();
290
291 let content = std::fs::read(&file_path).unwrap();
293 cm.get_or_compress(&file_path, &content, &pipeline).unwrap();
294
295 let hit = cm
297 .get_or_compress(&file_path, &content, &pipeline)
298 .unwrap();
299 assert!(matches!(hit, CacheResult::Hit { .. }));
300
301 cm.invalidate(&file_path).unwrap();
303
304 let miss = cm
306 .get_or_compress(&file_path, &content, &pipeline)
307 .unwrap();
308 assert!(matches!(miss, CacheResult::Miss { .. }));
309 }
310
311 #[test]
312 fn invalidate_nonexistent_path_is_noop() {
313 let (store, _dir) = in_memory_store();
314 let cm = CacheManager::new(store, u64::MAX);
315 cm.invalidate(Path::new("/nonexistent/path/file.txt"))
317 .unwrap();
318 }
319
320 use proptest::prelude::*;
323
324 proptest! {
332 #[test]
337 fn prop_cache_deduplication(
338 content in proptest::collection::vec(any::<u8>(), 1..=1000usize),
339 ) {
340 let (store, _dir) = in_memory_store();
341 let cm = CacheManager::new(store, u64::MAX);
342 let pipeline = make_pipeline();
343 let path = Path::new("file.txt");
344
345 let first = cm.get_or_compress(path, &content, &pipeline).unwrap();
347 prop_assert!(
348 matches!(first, CacheResult::Miss { .. }),
349 "first read should be a cache miss"
350 );
351
352 let second = cm.get_or_compress(path, &content, &pipeline).unwrap();
354 match second {
355 CacheResult::Hit { reference_token, tokens } => {
356 prop_assert_eq!(
357 tokens, 13,
358 "cache hit should report ~13 reference tokens"
359 );
360 prop_assert!(
361 reference_token.starts_with("§ref:"),
362 "reference token should start with §ref:"
363 );
364 prop_assert!(
365 reference_token.ends_with('§'),
366 "reference token should end with §"
367 );
368 }
369 CacheResult::Miss { .. } => {
370 prop_assert!(false, "second read should be a cache hit, not a miss");
371 }
372 }
373 }
374 }
375
376 proptest! {
384 #[test]
389 fn prop_cache_invalidation_on_content_change(
390 content_a in proptest::collection::vec(any::<u8>(), 1..=500usize),
391 content_b in proptest::collection::vec(any::<u8>(), 1..=500usize),
392 ) {
393 prop_assume!(content_a != content_b);
395
396 let (store, _dir) = in_memory_store();
397 let cm = CacheManager::new(store, u64::MAX);
398 let pipeline = make_pipeline();
399 let path = Path::new("file.txt");
400
401 let r1 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
403 prop_assert!(
404 matches!(r1, CacheResult::Miss { .. }),
405 "first read of content_a should be a miss"
406 );
407
408 let r2 = cm.get_or_compress(path, &content_a, &pipeline).unwrap();
410 prop_assert!(
411 matches!(r2, CacheResult::Hit { .. }),
412 "second read of content_a should be a hit"
413 );
414
415 let r3 = cm.get_or_compress(path, &content_b, &pipeline).unwrap();
417 prop_assert!(
418 matches!(r3, CacheResult::Miss { .. }),
419 "read with changed content should be a cache miss"
420 );
421 }
422 }
423
424 proptest! {
432 #[test]
437 fn prop_cache_lru_eviction(
438 entries in proptest::collection::vec(
440 proptest::collection::vec(any::<u8>(), 10..=200usize),
441 2..=8usize,
442 ),
443 ) {
444 let mut unique_entries: Vec<Vec<u8>> = Vec::new();
446 for e in &entries {
447 if !unique_entries.contains(e) {
448 unique_entries.push(e.clone());
449 }
450 }
451 prop_assume!(unique_entries.len() >= 2);
452
453 let (store, _dir) = in_memory_store();
454 let cm = CacheManager::new(store, 1);
456 let pipeline = make_pipeline();
457 let path = Path::new("f.txt");
458
459 for entry in &unique_entries {
461 cm.get_or_compress(path, entry, &pipeline).unwrap();
462 }
463
464 let freed = cm.evict_lru().unwrap();
466
467 prop_assert!(freed > 0, "evict_lru should free bytes when over limit");
469
470 let freed_again = cm.evict_lru().unwrap();
473 prop_assert_eq!(
474 freed_again, 0,
475 "second evict_lru call should free 0 bytes (already at or below limit)"
476 );
477 }
478 }
479
480 proptest! {
489 #[test]
494 fn prop_cache_persistence_across_sessions(
495 content in proptest::collection::vec(any::<u8>(), 1..=500usize),
496 ) {
497 use crate::session_store::SessionStore;
498
499 let dir = tempfile::tempdir().unwrap();
500 let db_path = dir.path().join("cache.db");
501 let path = Path::new("file.txt");
502
503 {
505 let store = SessionStore::open_or_create(&db_path).unwrap();
506 let cm = CacheManager::new(store, u64::MAX);
507 let pipeline = make_pipeline();
508
509 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
510 prop_assert!(
511 matches!(r, CacheResult::Miss { .. }),
512 "first read should be a miss"
513 );
514 }
515 {
519 let store = SessionStore::open_or_create(&db_path).unwrap();
520 let cm = CacheManager::new(store, u64::MAX);
521 let pipeline = make_pipeline();
522
523 let r = cm.get_or_compress(path, &content, &pipeline).unwrap();
525 match r {
526 CacheResult::Hit { tokens, .. } => {
527 prop_assert_eq!(
528 tokens, 13,
529 "persisted cache hit should report 13 tokens"
530 );
531 }
532 CacheResult::Miss { .. } => {
533 prop_assert!(
534 false,
535 "cache entry should persist across store reopen"
536 );
537 }
538 }
539 }
540 }
541 }
542}