Skip to main content

verdant_runtime/
cache.rs

1//! `LiveCache` — the M1 cache surface that the MCP server consumes.
2//!
3//! Keying philosophy: every cache entry is keyed by a deterministic blake3
4//! hash of the tool's *inputs*, where "inputs" includes any file content
5//! the tool's output is a function of. The store payload is the exact
6//! formatted bytes the MCP tool fed back to the model on the first
7//! execution. A subsequent identical call hits when (a) the input hash
8//! matches AND (b) every recorded file root revalidates clean against the
9//! current filesystem. If either fails, the entry is treated as invalid
10//! and the registered metadata is removed so a stale entry does not
11//! linger. M1 keeps the registry in memory; M2 will persist it.
12//!
13//! The cache surface is deliberately tool-agnostic: callers compute the
14//! input bytes (we provide canonicalization helpers in `key`), invoke
15//! `lookup` or `lookup_revalidate`, and on miss they execute the real
16//! tool and call `persist`. The cache does not run tools itself; that
17//! lives one layer up in `verdant-mcp`.
18
19use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
20use std::collections::HashMap;
21use std::path::{Path, PathBuf};
22use std::sync::RwLock;
23
24#[derive(Debug, thiserror::Error)]
25pub enum CacheError {
26    #[error("store: {0}")]
27    Store(#[from] StoreError),
28    #[error("io: {0}")]
29    Io(#[from] std::io::Error),
30}
31
32/// One file dependency of a cache entry. The tool computed its output as a
33/// function of (path, contents at expected_hash). On every green hit we
34/// re-blake3 the file and require the hash to still match; if it does not,
35/// the entry is invalidated.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct FileRoot {
38    pub path: PathBuf,
39    pub expected_hash: String,
40}
41
42#[derive(Debug, Clone)]
43struct EntryMeta {
44    tool_kind: String,
45    file_roots: Vec<FileRoot>,
46    /// Upstream cache keys this entry depends on. Empty for leaf
47    /// tool-call entries; populated for LlmCall entries with the
48    /// tool-call keys whose payloads appeared in the prompt's
49    /// `tool_result` blocks.
50    upstream_keys: Vec<String>,
51}
52
53pub struct LiveCache {
54    store: Box<dyn Store>,
55    registry: RwLock<HashMap<String, EntryMeta>>,
56    /// Workspace base used to resolve workspace-relative `FileRoot::path`
57    /// during revalidation. `FileRoot` paths are stored relative so a
58    /// cache entry persisted on Alice's machine at `/home/alice/repo/`
59    /// is reusable on Bob's machine at `/home/bob/work/repo/` without
60    /// changing the cache key. `LiveCache::new` defaults the base to
61    /// the process cwd at construction time; binaries that know the
62    /// real project root should call `LiveCache::with_workspace`.
63    workspace_base: PathBuf,
64}
65
66#[derive(Debug, Clone, PartialEq)]
67pub enum LookupOutcome {
68    /// Cache hit. The payload is byte-for-byte the same as the original
69    /// execution and (for revalidating lookups) every file root has been
70    /// confirmed unchanged.
71    Hit(Payload),
72    /// No entry for this key.
73    Miss,
74    /// Entry existed but a file root has changed; the entry has been
75    /// removed from the registry so subsequent lookups behave as Miss
76    /// without paying the revalidation cost again.
77    Invalidated,
78}
79
80impl LiveCache {
81    pub fn new<S: Store + 'static>(store: S) -> Self {
82        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
83        Self::from_box_with_workspace(Box::new(store), base)
84    }
85
86    pub fn with_workspace<S: Store + 'static>(
87        store: S,
88        workspace_base: impl Into<PathBuf>,
89    ) -> Self {
90        Self::from_box_with_workspace(Box::new(store), workspace_base.into())
91    }
92
93    pub fn from_box(store: Box<dyn Store>) -> Self {
94        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
95        Self::from_box_with_workspace(store, base)
96    }
97
98    pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
99        // Rehydrate the in-memory registry from on-disk meta files so a
100        // freshly constructed cache (e.g., a brand-new MCP server
101        // process started by Claude Code on each `claude -p` invocation)
102        // can serve previously-persisted entries instead of treating
103        // every key as Miss. Without this, M1's cross-session cache
104        // doesn't actually exist — every Run-2 lookup would miss,
105        // overwrite the same payload file, and the cache would provide
106        // zero savings between sessions.
107        let mut reg = HashMap::new();
108        if let Ok(items) = store.iter_meta() {
109            for (key, meta) in items {
110                let file_roots = meta
111                    .file_roots
112                    .into_iter()
113                    .map(|f| FileRoot {
114                        path: PathBuf::from(f.path),
115                        expected_hash: f.expected_hash,
116                    })
117                    .collect();
118                reg.insert(
119                    key.0,
120                    EntryMeta {
121                        tool_kind: meta.tool_kind,
122                        file_roots,
123                        upstream_keys: meta.upstream_keys,
124                    },
125                );
126            }
127        }
128        Self {
129            store,
130            registry: RwLock::new(reg),
131            workspace_base,
132        }
133    }
134
135    pub fn store(&self) -> &dyn Store {
136        self.store.as_ref()
137    }
138
139    pub fn workspace_base(&self) -> &Path {
140        &self.workspace_base
141    }
142
143    pub fn entry_count(&self) -> usize {
144        self.registry.read().expect("registry poisoned").len()
145    }
146
147    /// Bare lookup with no file revalidation. Used by tools whose output
148    /// has no filesystem dependency (rare in M1 — even Bash depends on
149    /// the cwd's contents in practice). Most callers want
150    /// `lookup_revalidate`.
151    pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
152        let in_reg = self
153            .registry
154            .read()
155            .expect("registry poisoned")
156            .contains_key(&key.0);
157        match self.store.lookup(key)? {
158            Some(p) => {
159                // Backends like `RemoteStore` cannot pre-populate the
160                // registry through `iter_meta` because there is no bulk
161                // listing over the wire; the registry stays empty and
162                // entries are discovered one round-trip at a time. Seed
163                // the registry from the payload meta so subsequent
164                // operations (invalidate_path, invalidate_upstream,
165                // entry_count) see the entry without another round-trip.
166                if !in_reg {
167                    self.populate_registry_from_meta(key, &p);
168                }
169                Ok(LookupOutcome::Hit(p))
170            }
171            None => {
172                if in_reg {
173                    // Registry says we have it but the store does not —
174                    // happens if the store was truncated externally
175                    // between persist and lookup, or if a `_shared`
176                    // entry the registry knows about was server-side
177                    // invalidated. Drop the orphan so subsequent
178                    // lookups return Miss directly.
179                    self.registry.write().unwrap().remove(&key.0);
180                }
181                Ok(LookupOutcome::Miss)
182            }
183        }
184    }
185
186    /// Lookup with revalidation: re-blake3 every recorded file root and
187    /// require the hash to still match the value captured on persist. On
188    /// any mismatch the entry is removed from the registry and `Invalidated`
189    /// is returned so the caller knows to re-execute the real tool.
190    ///
191    /// This is the primary lookup path for `read`, `glob`, and `grep`
192    /// tools whose output is a pure function of named file contents. Bash
193    /// typically cannot use this path because the set of files Bash
194    /// reads is not known a priori.
195    pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
196        // Snapshot the metadata under a read lock, drop the lock before
197        // doing any I/O so we don't hold it across blake3 of large files,
198        // then upgrade to a write lock only if invalidation is required.
199        let cached_meta = {
200            let reg = self.registry.read().expect("registry poisoned");
201            reg.get(&key.0).cloned()
202        };
203
204        // Fast path: registry already knows the roots. Revalidate them
205        // first to avoid a round-trip to a slow store on a dirty entry.
206        if let Some(meta) = &cached_meta {
207            match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
208                RevalidationOutcome::Ok => {}
209                RevalidationOutcome::Invalidated => {
210                    self.registry.write().unwrap().remove(&key.0);
211                    return Ok(LookupOutcome::Invalidated);
212                }
213            }
214        }
215
216        match self.store.lookup(key)? {
217            Some(p) => {
218                // If we did not have the entry in the registry, the
219                // store's payload meta carries the file roots the
220                // entry was persisted with. Revalidate against the
221                // local filesystem before trusting it — this is the
222                // cross-machine drift check that RemoteStore-backed
223                // caches rely on, because the server only knows its
224                // own filesystem and cannot detect that Bob's local
225                // checkout has diverged from Alice's.
226                if cached_meta.is_none() {
227                    let local_roots: Vec<FileRoot> = p
228                        .meta
229                        .file_roots
230                        .iter()
231                        .map(|f| FileRoot {
232                            path: PathBuf::from(&f.path),
233                            expected_hash: f.expected_hash.clone(),
234                        })
235                        .collect();
236                    match revalidate_file_roots(&self.workspace_base, &local_roots) {
237                        RevalidationOutcome::Ok => {
238                            self.populate_registry_from_meta(key, &p);
239                        }
240                        RevalidationOutcome::Invalidated => {
241                            return Ok(LookupOutcome::Invalidated);
242                        }
243                    }
244                }
245                Ok(LookupOutcome::Hit(p))
246            }
247            None => {
248                if cached_meta.is_some() {
249                    self.registry.write().unwrap().remove(&key.0);
250                }
251                Ok(LookupOutcome::Miss)
252            }
253        }
254    }
255
256    fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
257        let file_roots = p
258            .meta
259            .file_roots
260            .iter()
261            .map(|f| FileRoot {
262                path: PathBuf::from(&f.path),
263                expected_hash: f.expected_hash.clone(),
264            })
265            .collect();
266        self.registry.write().unwrap().insert(
267            key.0.clone(),
268            EntryMeta {
269                tool_kind: p.meta.tool_kind.clone(),
270                file_roots,
271                upstream_keys: p.meta.upstream_keys.clone(),
272            },
273        );
274    }
275
276    /// Record a fresh tool execution. Caller has already produced the
277    /// formatted output bytes the model will see; we persist them under
278    /// `key` and register the file roots for future revalidation.
279    pub fn persist(
280        &self,
281        key: &Key,
282        bytes: &[u8],
283        tool_kind: &str,
284        file_roots: Vec<FileRoot>,
285    ) -> Result<(), CacheError> {
286        self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
287    }
288
289    /// Persist an entry whose validity depends on the listed upstream
290    /// cache keys. The proxy's LlmCall path uses this so a tool-cache
291    /// invalidation can walk the upstream edge and drop dependent
292    /// completions.
293    pub fn persist_with_upstreams(
294        &self,
295        key: &Key,
296        bytes: &[u8],
297        tool_kind: &str,
298        file_roots: Vec<FileRoot>,
299        upstream_keys: Vec<Key>,
300    ) -> Result<(), CacheError> {
301        let serde_roots: Vec<FileRootSerde> = file_roots
302            .iter()
303            .map(|r| FileRootSerde {
304                path: r.path.display().to_string(),
305                expected_hash: r.expected_hash.clone(),
306            })
307            .collect();
308        let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
309        self.store.persist_with_upstreams(
310            key,
311            bytes,
312            tool_kind,
313            serde_roots,
314            upstream_strings.clone(),
315        )?;
316        self.registry.write().unwrap().insert(
317            key.0.clone(),
318            EntryMeta {
319                tool_kind: tool_kind.to_string(),
320                file_roots,
321                upstream_keys: upstream_strings,
322            },
323        );
324        Ok(())
325    }
326
327    /// Drop the registry entry for `key`. The store payload remains on
328    /// disk (M1 is append-only; M2 adds eviction) but subsequent lookups
329    /// will Miss because the registry is the authoritative gate.
330    ///
331    /// Used by `write` and `edit` MCP tools to invalidate every cached
332    /// node whose path matches the written path; the verdant-mcp layer
333    /// computes the affected key set and calls this for each.
334    pub fn mark_dirty(&self, key: &Key) {
335        self.registry.write().unwrap().remove(&key.0);
336        // Best-effort store cleanup; if it fails (concurrent writer,
337        // permissions glitch) we silently leave the bytes on disk
338        // because the registry is the authoritative gate and a stale
339        // payload that no entry references is harmless.
340        let _ = self.store.remove(key);
341    }
342
343    /// Drop every cache entry that depends on `upstream_key` either
344    /// directly (its `upstream_keys` includes that hex) or transitively
345    /// (its dependency closure does). Returns the number of entries
346    /// dropped.
347    ///
348    /// This is the cross-layer dirty propagation path that ties the
349    /// M3 LlmCall cache to M1's tool cache: when a `read` entry is
350    /// invalidated by `invalidate_path`, the proxy calls this with
351    /// the read's key and every LlmCall whose prompt consumed that
352    /// read drops out of the cache. Without this hop, an edited file
353    /// would silently feed the model the old bytes via a stale cached
354    /// completion.
355    pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
356        let mut reg = self.registry.write().unwrap();
357        // Compute the transitive closure of dependents. Each iteration
358        // collects entries whose declared upstream set intersects the
359        // current dirty set; we repeat until no new keys are added.
360        let mut dirty: std::collections::HashSet<String> =
361            std::collections::HashSet::from([upstream_key.0.clone()]);
362        loop {
363            let before = dirty.len();
364            for (k, meta) in reg.iter() {
365                if dirty.contains(k) {
366                    continue;
367                }
368                if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
369                    dirty.insert(k.clone());
370                }
371            }
372            if dirty.len() == before {
373                break;
374            }
375        }
376        // The originating key itself may or may not be in the registry;
377        // we only count entries we actually drop.
378        let mut dropped = 0;
379        for k in &dirty {
380            if k == &upstream_key.0 {
381                continue;
382            }
383            if reg.remove(k).is_some() {
384                dropped += 1;
385                let _ = self.store.remove(&Key(k.clone()));
386            }
387        }
388        dropped
389    }
390
391    /// Drop every registry entry whose recorded file roots include `path`.
392    /// O(n) in the registry size; M1 keeps registries small (a few hundred
393    /// entries per session) so this is fine. M2's persistent index will
394    /// add a path → keys reverse map.
395    pub fn invalidate_path(&self, path: &Path) -> usize {
396        let target = match path.canonicalize() {
397            Ok(p) => p,
398            Err(_) => path.to_path_buf(),
399        };
400        let to_drop: Vec<String> = {
401            let reg = self.registry.read().unwrap();
402            reg.iter()
403                .filter_map(|(k, meta)| {
404                    let touches = meta.file_roots.iter().any(|r| {
405                        let resolved = resolve_root_path(&self.workspace_base, &r.path);
406                        resolved
407                            .canonicalize()
408                            .map(|c| c == target)
409                            .unwrap_or_else(|_| resolved == path || r.path == path)
410                    });
411                    if touches {
412                        Some(k.clone())
413                    } else {
414                        None
415                    }
416                })
417                .collect()
418        };
419        let n = to_drop.len();
420        for k in to_drop {
421            let key = Key(k);
422            // Cascade up the dependency edge so any LlmCall whose
423            // prompt consumed this tool result also drops.
424            self.invalidate_upstream(&key);
425            self.registry.write().unwrap().remove(&key.0);
426            let _ = self.store.remove(&key);
427        }
428        n
429    }
430
431    pub fn known_kinds(&self) -> Vec<String> {
432        let reg = self.registry.read().unwrap();
433        let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
434        kinds.sort();
435        kinds.dedup();
436        kinds
437    }
438}
439
440/// Compute the blake3 hex digest of the file at `path`. Used both to
441/// record `FileRoot::expected_hash` on persist and to revalidate on
442/// lookup.
443enum RevalidationOutcome {
444    Ok,
445    Invalidated,
446}
447
448fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
449    for root in roots {
450        let resolved = resolve_root_path(workspace_base, &root.path);
451        let current = match hash_file(&resolved) {
452            Ok(h) => h,
453            Err(_) => return RevalidationOutcome::Invalidated,
454        };
455        if current != root.expected_hash {
456            return RevalidationOutcome::Invalidated;
457        }
458    }
459    RevalidationOutcome::Ok
460}
461
462/// Join a workspace base with a recorded `FileRoot::path`. On Unix,
463/// `PathBuf::join` replaces the base when the argument is absolute,
464/// so this also handles legacy entries persisted with absolute paths
465/// (single-machine M4) without breaking. New entries persist
466/// workspace-relative paths and resolve through the base; cross-user
467/// `_shared` entries work because every consumer joins against its
468/// own workspace.
469fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
470    workspace_base.join(recorded)
471}
472
473pub fn hash_file(path: &Path) -> std::io::Result<String> {
474    let mut hasher = blake3::Hasher::new();
475    let mut f = std::fs::File::open(path)?;
476    let mut buf = [0u8; 1 << 16];
477    loop {
478        let n = std::io::Read::read(&mut f, &mut buf)?;
479        if n == 0 {
480            break;
481        }
482        hasher.update(&buf[..n]);
483    }
484    Ok(hasher.finalize().to_hex().to_string())
485}
486
487#[cfg(test)]
488mod tests {
489    use super::*;
490    use tempfile::TempDir;
491
492    fn cache(dir: &TempDir) -> LiveCache {
493        let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
494        LiveCache::new(store)
495    }
496
497    fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
498        let p = dir.path().join(name);
499        std::fs::write(&p, content).unwrap();
500        p
501    }
502
503    fn root_for(p: &Path) -> FileRoot {
504        FileRoot {
505            path: p.to_path_buf(),
506            expected_hash: hash_file(p).unwrap(),
507        }
508    }
509
510    #[test]
511    fn miss_then_persist_then_hit() {
512        let dir = TempDir::new().unwrap();
513        let cache = cache(&dir);
514        let p = write_file(&dir, "a.txt", b"alpha");
515        let key = Key::from_bytes(b"read|a.txt|alpha");
516
517        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
518
519        cache
520            .persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
521            .unwrap();
522
523        match cache.lookup(&key).unwrap() {
524            LookupOutcome::Hit(payload) => {
525                assert_eq!(payload.bytes, b"alpha-formatted");
526                assert_eq!(payload.meta.tool_kind, "read");
527            }
528            other => panic!("expected Hit, got {other:?}"),
529        }
530    }
531
532    #[test]
533    fn revalidate_unchanged_returns_hit() {
534        let dir = TempDir::new().unwrap();
535        let cache = cache(&dir);
536        let p = write_file(&dir, "b.txt", b"beta");
537        let key = Key::from_bytes(b"read|b.txt|beta");
538        cache
539            .persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
540            .unwrap();
541        match cache.lookup_revalidate(&key).unwrap() {
542            LookupOutcome::Hit(_) => {}
543            other => panic!("expected Hit, got {other:?}"),
544        }
545    }
546
547    #[test]
548    fn revalidate_modified_invalidates() {
549        let dir = TempDir::new().unwrap();
550        let cache = cache(&dir);
551        let p = write_file(&dir, "c.txt", b"charlie");
552        let key = Key::from_bytes(b"read|c.txt|charlie");
553        cache
554            .persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
555            .unwrap();
556
557        std::fs::write(&p, b"DELTA").unwrap();
558
559        match cache.lookup_revalidate(&key).unwrap() {
560            LookupOutcome::Invalidated => {}
561            other => panic!("expected Invalidated, got {other:?}"),
562        }
563        assert_eq!(cache.entry_count(), 0);
564    }
565
566    #[test]
567    fn revalidate_deleted_invalidates() {
568        let dir = TempDir::new().unwrap();
569        let cache = cache(&dir);
570        let p = write_file(&dir, "d.txt", b"delta");
571        let key = Key::from_bytes(b"read|d.txt|delta");
572        cache
573            .persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
574            .unwrap();
575
576        std::fs::remove_file(&p).unwrap();
577
578        match cache.lookup_revalidate(&key).unwrap() {
579            LookupOutcome::Invalidated => {}
580            other => panic!("expected Invalidated, got {other:?}"),
581        }
582    }
583
584    #[test]
585    fn mark_dirty_drops_entry() {
586        let dir = TempDir::new().unwrap();
587        let cache = cache(&dir);
588        let p = write_file(&dir, "e.txt", b"echo");
589        let key = Key::from_bytes(b"read|e.txt|echo");
590        cache
591            .persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
592            .unwrap();
593        assert_eq!(cache.entry_count(), 1);
594        cache.mark_dirty(&key);
595        assert_eq!(cache.entry_count(), 0);
596        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
597    }
598
599    #[test]
600    fn invalidate_path_drops_matching_entries() {
601        let dir = TempDir::new().unwrap();
602        let cache = cache(&dir);
603        let p1 = write_file(&dir, "f1.txt", b"foxtrot");
604        let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
605        let k1 = Key::from_bytes(b"read|f1");
606        let k2 = Key::from_bytes(b"read|f2");
607        cache
608            .persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
609            .unwrap();
610        cache
611            .persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
612            .unwrap();
613        assert_eq!(cache.entry_count(), 2);
614        let n = cache.invalidate_path(&p1);
615        assert_eq!(n, 1);
616        assert_eq!(cache.entry_count(), 1);
617        // k1 invalidated, k2 still present
618        match cache.lookup(&k2).unwrap() {
619            LookupOutcome::Hit(_) => {}
620            other => panic!("k2 should still hit, got {other:?}"),
621        }
622        match cache.lookup(&k1).unwrap() {
623            LookupOutcome::Miss => {}
624            other => panic!("k1 should miss, got {other:?}"),
625        }
626    }
627
628    #[test]
629    fn multi_root_revalidation() {
630        let dir = TempDir::new().unwrap();
631        let cache = cache(&dir);
632        let p1 = write_file(&dir, "g1.txt", b"golf1");
633        let p2 = write_file(&dir, "g2.txt", b"golf2");
634        let key = Key::from_bytes(b"grep|pattern|g1+g2");
635        cache
636            .persist(
637                &key,
638                b"merged-output",
639                "grep",
640                vec![root_for(&p1), root_for(&p2)],
641            )
642            .unwrap();
643
644        // First revalidation: clean
645        match cache.lookup_revalidate(&key).unwrap() {
646            LookupOutcome::Hit(_) => {}
647            other => panic!("expected Hit, got {other:?}"),
648        }
649        // Modify only the second root: must invalidate
650        std::fs::write(&p2, b"changed").unwrap();
651        match cache.lookup_revalidate(&key).unwrap() {
652            LookupOutcome::Invalidated => {}
653            other => panic!("expected Invalidated, got {other:?}"),
654        }
655    }
656
657    #[test]
658    fn upstream_invalidation_drops_dependents() {
659        let dir = TempDir::new().unwrap();
660        let cache = cache(&dir);
661        let p = write_file(&dir, "src.txt", b"alpha");
662        let read_key = Key::from_bytes(b"read|src");
663        cache
664            .persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
665            .unwrap();
666        // Two LlmCalls both depend on the read result.
667        let llm1 = Key::from_bytes(b"llm|first-prompt");
668        let llm2 = Key::from_bytes(b"llm|second-prompt");
669        cache
670            .persist_with_upstreams(
671                &llm1,
672                b"completion-1",
673                "llm_call",
674                vec![],
675                vec![read_key.clone()],
676            )
677            .unwrap();
678        cache
679            .persist_with_upstreams(
680                &llm2,
681                b"completion-2",
682                "llm_call",
683                vec![],
684                vec![read_key.clone()],
685            )
686            .unwrap();
687        assert_eq!(cache.entry_count(), 3);
688
689        // Invalidating the read key must cascade to both LlmCalls.
690        let dropped = cache.invalidate_upstream(&read_key);
691        assert_eq!(dropped, 2);
692        assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
693        assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
694    }
695
696    #[test]
697    fn invalidate_path_cascades_to_dependent_llm_calls() {
698        let dir = TempDir::new().unwrap();
699        let cache = cache(&dir);
700        let p = write_file(&dir, "input.txt", b"hello");
701        let read_key = Key::from_bytes(b"read|input");
702        cache
703            .persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
704            .unwrap();
705        let llm = Key::from_bytes(b"llm|sees-read");
706        cache
707            .persist_with_upstreams(
708                &llm,
709                b"completion",
710                "llm_call",
711                vec![],
712                vec![read_key.clone()],
713            )
714            .unwrap();
715        assert_eq!(cache.entry_count(), 2);
716
717        // Modify the file and invalidate by path.
718        std::fs::write(&p, b"changed").unwrap();
719        let n = cache.invalidate_path(&p);
720        assert_eq!(n, 1, "the read entry was the direct path match");
721        // The LlmCall must also be gone via the cascade.
722        assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
723        assert_eq!(cache.entry_count(), 0);
724    }
725
726    #[test]
727    fn transitive_invalidation_walks_multi_hop_chain() {
728        // A -> B -> C: invalidating A drops B and C.
729        let dir = TempDir::new().unwrap();
730        let cache = cache(&dir);
731        let key_a = Key::from_bytes(b"a");
732        let key_b = Key::from_bytes(b"b");
733        let key_c = Key::from_bytes(b"c");
734        let p = write_file(&dir, "f.txt", b"x");
735        cache
736            .persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
737            .unwrap();
738        cache
739            .persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
740            .unwrap();
741        cache
742            .persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
743            .unwrap();
744
745        let dropped = cache.invalidate_upstream(&key_a);
746        assert_eq!(dropped, 2);
747        assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
748        assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
749    }
750
751    #[test]
752    fn upstream_keys_persist_across_rehydration() {
753        let dir = TempDir::new().unwrap();
754        let p = write_file(&dir, "g.txt", b"data");
755        let read_key = Key::from_bytes(b"read|g");
756        let llm_key = Key::from_bytes(b"llm|g-consumer");
757
758        {
759            let cache = cache(&dir);
760            cache
761                .persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
762                .unwrap();
763            cache
764                .persist_with_upstreams(
765                    &llm_key,
766                    b"completion",
767                    "llm_call",
768                    vec![],
769                    vec![read_key.clone()],
770                )
771                .unwrap();
772        }
773
774        // Fresh cache pointed at the same store: the upstream edge
775        // must come back so a subsequent invalidation cascades.
776        let store_root = dir.path().join("store");
777        let store2 = crate::store::FileStore::open(store_root).unwrap();
778        let cache2 = LiveCache::new(store2);
779        assert_eq!(cache2.entry_count(), 2);
780        let dropped = cache2.invalidate_upstream(&read_key);
781        assert_eq!(dropped, 1, "rehydrated edge must support cascade");
782    }
783
784    #[test]
785    fn fresh_cache_rehydrates_from_store_on_disk() {
786        // M1's whole point: a process restart must not invalidate the
787        // cache. Persist via one cache instance, drop it, build a fresh
788        // cache pointed at the same store directory, and confirm the
789        // entry is still served as a Hit.
790        let dir = TempDir::new().unwrap();
791        let p = write_file(&dir, "rehydrate.txt", b"persist me");
792        let key = Key::from_bytes(b"read|rehydrate|persist me");
793
794        {
795            let cache = cache(&dir);
796            cache
797                .persist(&key, b"served-once", "read", vec![root_for(&p)])
798                .unwrap();
799            assert_eq!(cache.entry_count(), 1);
800        } // drop cache; in-memory registry destroyed.
801
802        let store_root = dir.path().join("store");
803        let store2 = crate::store::FileStore::open(store_root).unwrap();
804        let cache2 = LiveCache::new(store2);
805        // Without rehydration this would be 0 and the next lookup would
806        // miss, defeating the entire cross-session caching story.
807        assert_eq!(cache2.entry_count(), 1);
808        match cache2.lookup_revalidate(&key).unwrap() {
809            LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
810            other => panic!("expected Hit after rehydrate, got {other:?}"),
811        }
812    }
813
814    #[test]
815    fn hit_returns_byte_identical_payload() {
816        // Critical correctness test: cache must hand back the exact bytes
817        // it persisted, not a re-formatted view. A divergence here would
818        // silently corrupt the model's view of the world.
819        let dir = TempDir::new().unwrap();
820        let cache = cache(&dir);
821        let p = write_file(&dir, "h.txt", b"hotel");
822        let key = Key::from_bytes(b"read|h");
823        let original = b"  1\thotel-formatted-with-line-numbers\n  2\tetc\n";
824        cache
825            .persist(&key, original, "read", vec![root_for(&p)])
826            .unwrap();
827        match cache.lookup_revalidate(&key).unwrap() {
828            LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
829            other => panic!("expected Hit, got {other:?}"),
830        }
831    }
832}