1use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
20use std::collections::HashMap;
21use std::path::{Path, PathBuf};
22use std::sync::RwLock;
23
24#[derive(Debug, thiserror::Error)]
25pub enum CacheError {
26 #[error("store: {0}")]
27 Store(#[from] StoreError),
28 #[error("io: {0}")]
29 Io(#[from] std::io::Error),
30}
31
32#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct FileRoot {
38 pub path: PathBuf,
39 pub expected_hash: String,
40}
41
42#[derive(Debug, Clone)]
43struct EntryMeta {
44 tool_kind: String,
45 file_roots: Vec<FileRoot>,
46 upstream_keys: Vec<String>,
51}
52
53pub struct LiveCache {
54 store: Box<dyn Store>,
55 registry: RwLock<HashMap<String, EntryMeta>>,
56 workspace_base: PathBuf,
64}
65
66#[derive(Debug, Clone, PartialEq)]
67pub enum LookupOutcome {
68 Hit(Payload),
72 Miss,
74 Invalidated,
78}
79
80impl LiveCache {
81 pub fn new<S: Store + 'static>(store: S) -> Self {
82 let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
83 Self::from_box_with_workspace(Box::new(store), base)
84 }
85
86 pub fn with_workspace<S: Store + 'static>(
87 store: S,
88 workspace_base: impl Into<PathBuf>,
89 ) -> Self {
90 Self::from_box_with_workspace(Box::new(store), workspace_base.into())
91 }
92
93 pub fn from_box(store: Box<dyn Store>) -> Self {
94 let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
95 Self::from_box_with_workspace(store, base)
96 }
97
98 pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
99 let mut reg = HashMap::new();
108 if let Ok(items) = store.iter_meta() {
109 for (key, meta) in items {
110 let file_roots = meta
111 .file_roots
112 .into_iter()
113 .map(|f| FileRoot {
114 path: PathBuf::from(f.path),
115 expected_hash: f.expected_hash,
116 })
117 .collect();
118 reg.insert(
119 key.0,
120 EntryMeta {
121 tool_kind: meta.tool_kind,
122 file_roots,
123 upstream_keys: meta.upstream_keys,
124 },
125 );
126 }
127 }
128 Self {
129 store,
130 registry: RwLock::new(reg),
131 workspace_base,
132 }
133 }
134
135 pub fn store(&self) -> &dyn Store {
136 self.store.as_ref()
137 }
138
139 pub fn workspace_base(&self) -> &Path {
140 &self.workspace_base
141 }
142
143 pub fn entry_count(&self) -> usize {
144 self.registry.read().expect("registry poisoned").len()
145 }
146
147 pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
152 let in_reg = self
153 .registry
154 .read()
155 .expect("registry poisoned")
156 .contains_key(&key.0);
157 match self.store.lookup(key)? {
158 Some(p) => {
159 if !in_reg {
167 self.populate_registry_from_meta(key, &p);
168 }
169 Ok(LookupOutcome::Hit(p))
170 }
171 None => {
172 if in_reg {
173 self.registry.write().unwrap().remove(&key.0);
180 }
181 Ok(LookupOutcome::Miss)
182 }
183 }
184 }
185
186 pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
196 let cached_meta = {
200 let reg = self.registry.read().expect("registry poisoned");
201 reg.get(&key.0).cloned()
202 };
203
204 if let Some(meta) = &cached_meta {
207 match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
208 RevalidationOutcome::Ok => {}
209 RevalidationOutcome::Invalidated => {
210 self.registry.write().unwrap().remove(&key.0);
211 return Ok(LookupOutcome::Invalidated);
212 }
213 }
214 }
215
216 match self.store.lookup(key)? {
217 Some(p) => {
218 if cached_meta.is_none() {
227 let local_roots: Vec<FileRoot> = p
228 .meta
229 .file_roots
230 .iter()
231 .map(|f| FileRoot {
232 path: PathBuf::from(&f.path),
233 expected_hash: f.expected_hash.clone(),
234 })
235 .collect();
236 match revalidate_file_roots(&self.workspace_base, &local_roots) {
237 RevalidationOutcome::Ok => {
238 self.populate_registry_from_meta(key, &p);
239 }
240 RevalidationOutcome::Invalidated => {
241 return Ok(LookupOutcome::Invalidated);
242 }
243 }
244 }
245 Ok(LookupOutcome::Hit(p))
246 }
247 None => {
248 if cached_meta.is_some() {
249 self.registry.write().unwrap().remove(&key.0);
250 }
251 Ok(LookupOutcome::Miss)
252 }
253 }
254 }
255
256 fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
257 let file_roots = p
258 .meta
259 .file_roots
260 .iter()
261 .map(|f| FileRoot {
262 path: PathBuf::from(&f.path),
263 expected_hash: f.expected_hash.clone(),
264 })
265 .collect();
266 self.registry.write().unwrap().insert(
267 key.0.clone(),
268 EntryMeta {
269 tool_kind: p.meta.tool_kind.clone(),
270 file_roots,
271 upstream_keys: p.meta.upstream_keys.clone(),
272 },
273 );
274 }
275
276 pub fn persist(
280 &self,
281 key: &Key,
282 bytes: &[u8],
283 tool_kind: &str,
284 file_roots: Vec<FileRoot>,
285 ) -> Result<(), CacheError> {
286 self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
287 }
288
289 pub fn persist_with_upstreams(
294 &self,
295 key: &Key,
296 bytes: &[u8],
297 tool_kind: &str,
298 file_roots: Vec<FileRoot>,
299 upstream_keys: Vec<Key>,
300 ) -> Result<(), CacheError> {
301 let serde_roots: Vec<FileRootSerde> = file_roots
302 .iter()
303 .map(|r| FileRootSerde {
304 path: r.path.display().to_string(),
305 expected_hash: r.expected_hash.clone(),
306 })
307 .collect();
308 let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
309 self.store.persist_with_upstreams(
310 key,
311 bytes,
312 tool_kind,
313 serde_roots,
314 upstream_strings.clone(),
315 )?;
316 self.registry.write().unwrap().insert(
317 key.0.clone(),
318 EntryMeta {
319 tool_kind: tool_kind.to_string(),
320 file_roots,
321 upstream_keys: upstream_strings,
322 },
323 );
324 Ok(())
325 }
326
327 pub fn mark_dirty(&self, key: &Key) {
335 self.registry.write().unwrap().remove(&key.0);
336 let _ = self.store.remove(key);
341 }
342
343 pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
356 let mut reg = self.registry.write().unwrap();
357 let mut dirty: std::collections::HashSet<String> =
361 std::collections::HashSet::from([upstream_key.0.clone()]);
362 loop {
363 let before = dirty.len();
364 for (k, meta) in reg.iter() {
365 if dirty.contains(k) {
366 continue;
367 }
368 if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
369 dirty.insert(k.clone());
370 }
371 }
372 if dirty.len() == before {
373 break;
374 }
375 }
376 let mut dropped = 0;
379 for k in &dirty {
380 if k == &upstream_key.0 {
381 continue;
382 }
383 if reg.remove(k).is_some() {
384 dropped += 1;
385 let _ = self.store.remove(&Key(k.clone()));
386 }
387 }
388 dropped
389 }
390
391 pub fn invalidate_path(&self, path: &Path) -> usize {
396 let target = match path.canonicalize() {
397 Ok(p) => p,
398 Err(_) => path.to_path_buf(),
399 };
400 let to_drop: Vec<String> = {
401 let reg = self.registry.read().unwrap();
402 reg.iter()
403 .filter_map(|(k, meta)| {
404 let touches = meta.file_roots.iter().any(|r| {
405 let resolved = resolve_root_path(&self.workspace_base, &r.path);
406 resolved
407 .canonicalize()
408 .map(|c| c == target)
409 .unwrap_or_else(|_| resolved == path || r.path == path)
410 });
411 if touches {
412 Some(k.clone())
413 } else {
414 None
415 }
416 })
417 .collect()
418 };
419 let n = to_drop.len();
420 for k in to_drop {
421 let key = Key(k);
422 self.invalidate_upstream(&key);
425 self.registry.write().unwrap().remove(&key.0);
426 let _ = self.store.remove(&key);
427 }
428 n
429 }
430
431 pub fn known_kinds(&self) -> Vec<String> {
432 let reg = self.registry.read().unwrap();
433 let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
434 kinds.sort();
435 kinds.dedup();
436 kinds
437 }
438}
439
440enum RevalidationOutcome {
444 Ok,
445 Invalidated,
446}
447
448fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
449 for root in roots {
450 let resolved = resolve_root_path(workspace_base, &root.path);
451 let current = match hash_file(&resolved) {
452 Ok(h) => h,
453 Err(_) => return RevalidationOutcome::Invalidated,
454 };
455 if current != root.expected_hash {
456 return RevalidationOutcome::Invalidated;
457 }
458 }
459 RevalidationOutcome::Ok
460}
461
462fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
470 workspace_base.join(recorded)
471}
472
473pub fn hash_file(path: &Path) -> std::io::Result<String> {
474 let mut hasher = blake3::Hasher::new();
475 let mut f = std::fs::File::open(path)?;
476 let mut buf = [0u8; 1 << 16];
477 loop {
478 let n = std::io::Read::read(&mut f, &mut buf)?;
479 if n == 0 {
480 break;
481 }
482 hasher.update(&buf[..n]);
483 }
484 Ok(hasher.finalize().to_hex().to_string())
485}
486
487#[cfg(test)]
488mod tests {
489 use super::*;
490 use tempfile::TempDir;
491
492 fn cache(dir: &TempDir) -> LiveCache {
493 let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
494 LiveCache::new(store)
495 }
496
497 fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
498 let p = dir.path().join(name);
499 std::fs::write(&p, content).unwrap();
500 p
501 }
502
503 fn root_for(p: &Path) -> FileRoot {
504 FileRoot {
505 path: p.to_path_buf(),
506 expected_hash: hash_file(p).unwrap(),
507 }
508 }
509
510 #[test]
511 fn miss_then_persist_then_hit() {
512 let dir = TempDir::new().unwrap();
513 let cache = cache(&dir);
514 let p = write_file(&dir, "a.txt", b"alpha");
515 let key = Key::from_bytes(b"read|a.txt|alpha");
516
517 assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
518
519 cache
520 .persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
521 .unwrap();
522
523 match cache.lookup(&key).unwrap() {
524 LookupOutcome::Hit(payload) => {
525 assert_eq!(payload.bytes, b"alpha-formatted");
526 assert_eq!(payload.meta.tool_kind, "read");
527 }
528 other => panic!("expected Hit, got {other:?}"),
529 }
530 }
531
532 #[test]
533 fn revalidate_unchanged_returns_hit() {
534 let dir = TempDir::new().unwrap();
535 let cache = cache(&dir);
536 let p = write_file(&dir, "b.txt", b"beta");
537 let key = Key::from_bytes(b"read|b.txt|beta");
538 cache
539 .persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
540 .unwrap();
541 match cache.lookup_revalidate(&key).unwrap() {
542 LookupOutcome::Hit(_) => {}
543 other => panic!("expected Hit, got {other:?}"),
544 }
545 }
546
547 #[test]
548 fn revalidate_modified_invalidates() {
549 let dir = TempDir::new().unwrap();
550 let cache = cache(&dir);
551 let p = write_file(&dir, "c.txt", b"charlie");
552 let key = Key::from_bytes(b"read|c.txt|charlie");
553 cache
554 .persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
555 .unwrap();
556
557 std::fs::write(&p, b"DELTA").unwrap();
558
559 match cache.lookup_revalidate(&key).unwrap() {
560 LookupOutcome::Invalidated => {}
561 other => panic!("expected Invalidated, got {other:?}"),
562 }
563 assert_eq!(cache.entry_count(), 0);
564 }
565
566 #[test]
567 fn revalidate_deleted_invalidates() {
568 let dir = TempDir::new().unwrap();
569 let cache = cache(&dir);
570 let p = write_file(&dir, "d.txt", b"delta");
571 let key = Key::from_bytes(b"read|d.txt|delta");
572 cache
573 .persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
574 .unwrap();
575
576 std::fs::remove_file(&p).unwrap();
577
578 match cache.lookup_revalidate(&key).unwrap() {
579 LookupOutcome::Invalidated => {}
580 other => panic!("expected Invalidated, got {other:?}"),
581 }
582 }
583
584 #[test]
585 fn mark_dirty_drops_entry() {
586 let dir = TempDir::new().unwrap();
587 let cache = cache(&dir);
588 let p = write_file(&dir, "e.txt", b"echo");
589 let key = Key::from_bytes(b"read|e.txt|echo");
590 cache
591 .persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
592 .unwrap();
593 assert_eq!(cache.entry_count(), 1);
594 cache.mark_dirty(&key);
595 assert_eq!(cache.entry_count(), 0);
596 assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
597 }
598
599 #[test]
600 fn invalidate_path_drops_matching_entries() {
601 let dir = TempDir::new().unwrap();
602 let cache = cache(&dir);
603 let p1 = write_file(&dir, "f1.txt", b"foxtrot");
604 let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
605 let k1 = Key::from_bytes(b"read|f1");
606 let k2 = Key::from_bytes(b"read|f2");
607 cache
608 .persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
609 .unwrap();
610 cache
611 .persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
612 .unwrap();
613 assert_eq!(cache.entry_count(), 2);
614 let n = cache.invalidate_path(&p1);
615 assert_eq!(n, 1);
616 assert_eq!(cache.entry_count(), 1);
617 match cache.lookup(&k2).unwrap() {
619 LookupOutcome::Hit(_) => {}
620 other => panic!("k2 should still hit, got {other:?}"),
621 }
622 match cache.lookup(&k1).unwrap() {
623 LookupOutcome::Miss => {}
624 other => panic!("k1 should miss, got {other:?}"),
625 }
626 }
627
628 #[test]
629 fn multi_root_revalidation() {
630 let dir = TempDir::new().unwrap();
631 let cache = cache(&dir);
632 let p1 = write_file(&dir, "g1.txt", b"golf1");
633 let p2 = write_file(&dir, "g2.txt", b"golf2");
634 let key = Key::from_bytes(b"grep|pattern|g1+g2");
635 cache
636 .persist(
637 &key,
638 b"merged-output",
639 "grep",
640 vec![root_for(&p1), root_for(&p2)],
641 )
642 .unwrap();
643
644 match cache.lookup_revalidate(&key).unwrap() {
646 LookupOutcome::Hit(_) => {}
647 other => panic!("expected Hit, got {other:?}"),
648 }
649 std::fs::write(&p2, b"changed").unwrap();
651 match cache.lookup_revalidate(&key).unwrap() {
652 LookupOutcome::Invalidated => {}
653 other => panic!("expected Invalidated, got {other:?}"),
654 }
655 }
656
657 #[test]
658 fn upstream_invalidation_drops_dependents() {
659 let dir = TempDir::new().unwrap();
660 let cache = cache(&dir);
661 let p = write_file(&dir, "src.txt", b"alpha");
662 let read_key = Key::from_bytes(b"read|src");
663 cache
664 .persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
665 .unwrap();
666 let llm1 = Key::from_bytes(b"llm|first-prompt");
668 let llm2 = Key::from_bytes(b"llm|second-prompt");
669 cache
670 .persist_with_upstreams(
671 &llm1,
672 b"completion-1",
673 "llm_call",
674 vec![],
675 vec![read_key.clone()],
676 )
677 .unwrap();
678 cache
679 .persist_with_upstreams(
680 &llm2,
681 b"completion-2",
682 "llm_call",
683 vec![],
684 vec![read_key.clone()],
685 )
686 .unwrap();
687 assert_eq!(cache.entry_count(), 3);
688
689 let dropped = cache.invalidate_upstream(&read_key);
691 assert_eq!(dropped, 2);
692 assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
693 assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
694 }
695
696 #[test]
697 fn invalidate_path_cascades_to_dependent_llm_calls() {
698 let dir = TempDir::new().unwrap();
699 let cache = cache(&dir);
700 let p = write_file(&dir, "input.txt", b"hello");
701 let read_key = Key::from_bytes(b"read|input");
702 cache
703 .persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
704 .unwrap();
705 let llm = Key::from_bytes(b"llm|sees-read");
706 cache
707 .persist_with_upstreams(
708 &llm,
709 b"completion",
710 "llm_call",
711 vec![],
712 vec![read_key.clone()],
713 )
714 .unwrap();
715 assert_eq!(cache.entry_count(), 2);
716
717 std::fs::write(&p, b"changed").unwrap();
719 let n = cache.invalidate_path(&p);
720 assert_eq!(n, 1, "the read entry was the direct path match");
721 assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
723 assert_eq!(cache.entry_count(), 0);
724 }
725
726 #[test]
727 fn transitive_invalidation_walks_multi_hop_chain() {
728 let dir = TempDir::new().unwrap();
730 let cache = cache(&dir);
731 let key_a = Key::from_bytes(b"a");
732 let key_b = Key::from_bytes(b"b");
733 let key_c = Key::from_bytes(b"c");
734 let p = write_file(&dir, "f.txt", b"x");
735 cache
736 .persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
737 .unwrap();
738 cache
739 .persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
740 .unwrap();
741 cache
742 .persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
743 .unwrap();
744
745 let dropped = cache.invalidate_upstream(&key_a);
746 assert_eq!(dropped, 2);
747 assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
748 assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
749 }
750
751 #[test]
752 fn upstream_keys_persist_across_rehydration() {
753 let dir = TempDir::new().unwrap();
754 let p = write_file(&dir, "g.txt", b"data");
755 let read_key = Key::from_bytes(b"read|g");
756 let llm_key = Key::from_bytes(b"llm|g-consumer");
757
758 {
759 let cache = cache(&dir);
760 cache
761 .persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
762 .unwrap();
763 cache
764 .persist_with_upstreams(
765 &llm_key,
766 b"completion",
767 "llm_call",
768 vec![],
769 vec![read_key.clone()],
770 )
771 .unwrap();
772 }
773
774 let store_root = dir.path().join("store");
777 let store2 = crate::store::FileStore::open(store_root).unwrap();
778 let cache2 = LiveCache::new(store2);
779 assert_eq!(cache2.entry_count(), 2);
780 let dropped = cache2.invalidate_upstream(&read_key);
781 assert_eq!(dropped, 1, "rehydrated edge must support cascade");
782 }
783
784 #[test]
785 fn fresh_cache_rehydrates_from_store_on_disk() {
786 let dir = TempDir::new().unwrap();
791 let p = write_file(&dir, "rehydrate.txt", b"persist me");
792 let key = Key::from_bytes(b"read|rehydrate|persist me");
793
794 {
795 let cache = cache(&dir);
796 cache
797 .persist(&key, b"served-once", "read", vec![root_for(&p)])
798 .unwrap();
799 assert_eq!(cache.entry_count(), 1);
800 } let store_root = dir.path().join("store");
803 let store2 = crate::store::FileStore::open(store_root).unwrap();
804 let cache2 = LiveCache::new(store2);
805 assert_eq!(cache2.entry_count(), 1);
808 match cache2.lookup_revalidate(&key).unwrap() {
809 LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
810 other => panic!("expected Hit after rehydrate, got {other:?}"),
811 }
812 }
813
814 #[test]
815 fn hit_returns_byte_identical_payload() {
816 let dir = TempDir::new().unwrap();
820 let cache = cache(&dir);
821 let p = write_file(&dir, "h.txt", b"hotel");
822 let key = Key::from_bytes(b"read|h");
823 let original = b" 1\thotel-formatted-with-line-numbers\n 2\tetc\n";
824 cache
825 .persist(&key, original, "read", vec![root_for(&p)])
826 .unwrap();
827 match cache.lookup_revalidate(&key).unwrap() {
828 LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
829 other => panic!("expected Hit, got {other:?}"),
830 }
831 }
832}