Skip to main content

verdant_runtime/
cache.rs

1//! `LiveCache` — the M1 cache surface that the MCP server consumes.
2//!
3//! Keying philosophy: every cache entry is keyed by a deterministic blake3
4//! hash of the tool's *inputs*, where "inputs" includes any file content
5//! the tool's output is a function of. The store payload is the exact
6//! formatted bytes the MCP tool fed back to the model on the first
7//! execution. A subsequent identical call hits when (a) the input hash
8//! matches AND (b) every recorded file root revalidates clean against the
9//! current filesystem. If either fails, the entry is treated as invalid
10//! and the registered metadata is removed so a stale entry does not
11//! linger. M1 keeps the registry in memory; M2 will persist it.
12//!
13//! The cache surface is deliberately tool-agnostic: callers compute the
14//! input bytes (we provide canonicalization helpers in `key`), invoke
15//! `lookup` or `lookup_revalidate`, and on miss they execute the real
16//! tool and call `persist`. The cache does not run tools itself; that
17//! lives one layer up in `verdant-mcp`.
18
19use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
20use std::collections::HashMap;
21use std::path::{Path, PathBuf};
22use std::sync::RwLock;
23
24#[derive(Debug, thiserror::Error)]
25pub enum CacheError {
26    #[error("store: {0}")]
27    Store(#[from] StoreError),
28    #[error("io: {0}")]
29    Io(#[from] std::io::Error),
30}
31
32/// One file dependency of a cache entry. The tool computed its output as a
33/// function of (path, contents at expected_hash). On every green hit we
34/// re-blake3 the file and require the hash to still match; if it does not,
35/// the entry is invalidated.
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct FileRoot {
38    pub path: PathBuf,
39    pub expected_hash: String,
40}
41
42#[derive(Debug, Clone)]
43struct EntryMeta {
44    tool_kind: String,
45    file_roots: Vec<FileRoot>,
46    /// Upstream cache keys this entry depends on. Empty for leaf
47    /// tool-call entries; populated for LlmCall entries with the
48    /// tool-call keys whose payloads appeared in the prompt's
49    /// `tool_result` blocks.
50    upstream_keys: Vec<String>,
51}
52
53pub struct LiveCache {
54    store: Box<dyn Store>,
55    registry: RwLock<HashMap<String, EntryMeta>>,
56    /// Workspace base used to resolve workspace-relative `FileRoot::path`
57    /// during revalidation. `FileRoot` paths are stored relative so a
58    /// cache entry persisted on Alice's machine at `/home/alice/repo/`
59    /// is reusable on Bob's machine at `/home/bob/work/repo/` without
60    /// changing the cache key. `LiveCache::new` defaults the base to
61    /// the process cwd at construction time; binaries that know the
62    /// real project root should call `LiveCache::with_workspace`.
63    workspace_base: PathBuf,
64}
65
66#[derive(Debug, Clone, PartialEq)]
67pub enum LookupOutcome {
68    /// Cache hit. The payload is byte-for-byte the same as the original
69    /// execution and (for revalidating lookups) every file root has been
70    /// confirmed unchanged.
71    Hit(Payload),
72    /// No entry for this key.
73    Miss,
74    /// Entry existed but a file root has changed; the entry has been
75    /// removed from the registry so subsequent lookups behave as Miss
76    /// without paying the revalidation cost again.
77    Invalidated,
78}
79
80impl LiveCache {
81    pub fn new<S: Store + 'static>(store: S) -> Self {
82        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
83        Self::from_box_with_workspace(Box::new(store), base)
84    }
85
86    pub fn with_workspace<S: Store + 'static>(
87        store: S,
88        workspace_base: impl Into<PathBuf>,
89    ) -> Self {
90        Self::from_box_with_workspace(Box::new(store), workspace_base.into())
91    }
92
93    pub fn from_box(store: Box<dyn Store>) -> Self {
94        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
95        Self::from_box_with_workspace(store, base)
96    }
97
98    pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
99        // Rehydrate the in-memory registry from on-disk meta files so a
100        // freshly constructed cache (e.g., a brand-new MCP server
101        // process started by Claude Code on each `claude -p` invocation)
102        // can serve previously-persisted entries instead of treating
103        // every key as Miss. Without this, M1's cross-session cache
104        // doesn't actually exist — every Run-2 lookup would miss,
105        // overwrite the same payload file, and the cache would provide
106        // zero savings between sessions.
107        let mut reg = HashMap::new();
108        if let Ok(items) = store.iter_meta() {
109            for (key, meta) in items {
110                let file_roots = meta
111                    .file_roots
112                    .into_iter()
113                    .map(|f| FileRoot {
114                        path: PathBuf::from(f.path),
115                        expected_hash: f.expected_hash,
116                    })
117                    .collect();
118                reg.insert(
119                    key.0,
120                    EntryMeta {
121                        tool_kind: meta.tool_kind,
122                        file_roots,
123                        upstream_keys: meta.upstream_keys,
124                    },
125                );
126            }
127        }
128        Self {
129            store,
130            registry: RwLock::new(reg),
131            workspace_base,
132        }
133    }
134
135    pub fn store(&self) -> &dyn Store {
136        self.store.as_ref()
137    }
138
139    pub fn workspace_base(&self) -> &Path {
140        &self.workspace_base
141    }
142
143    pub fn entry_count(&self) -> usize {
144        self.registry
145            .read()
146            .unwrap_or_else(|e| e.into_inner())
147            .len()
148    }
149
150    /// Bare lookup with no file revalidation. Used by tools whose output
151    /// has no filesystem dependency (rare in M1 — even Bash depends on
152    /// the cwd's contents in practice). Most callers want
153    /// `lookup_revalidate`.
154    pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
155        let in_reg = self
156            .registry
157            .read()
158            .unwrap_or_else(|e| e.into_inner())
159            .contains_key(&key.0);
160        match self.store.lookup(key)? {
161            Some(p) => {
162                // Backends like `RemoteStore` cannot pre-populate the
163                // registry through `iter_meta` because there is no bulk
164                // listing over the wire; the registry stays empty and
165                // entries are discovered one round-trip at a time. Seed
166                // the registry from the payload meta so subsequent
167                // operations (invalidate_path, invalidate_upstream,
168                // entry_count) see the entry without another round-trip.
169                if !in_reg {
170                    self.populate_registry_from_meta(key, &p);
171                }
172                Ok(LookupOutcome::Hit(p))
173            }
174            None => {
175                if in_reg {
176                    // Registry says we have it but the store does not —
177                    // happens if the store was truncated externally
178                    // between persist and lookup, or if a `_shared`
179                    // entry the registry knows about was server-side
180                    // invalidated. Drop the orphan so subsequent
181                    // lookups return Miss directly.
182                    self.registry
183                        .write()
184                        .unwrap_or_else(|e| e.into_inner())
185                        .remove(&key.0);
186                }
187                Ok(LookupOutcome::Miss)
188            }
189        }
190    }
191
192    /// Lookup with revalidation: re-blake3 every recorded file root and
193    /// require the hash to still match the value captured on persist. On
194    /// any mismatch the entry is removed from the registry and `Invalidated`
195    /// is returned so the caller knows to re-execute the real tool.
196    ///
197    /// This is the primary lookup path for `read`, `glob`, and `grep`
198    /// tools whose output is a pure function of named file contents. Bash
199    /// typically cannot use this path because the set of files Bash
200    /// reads is not known a priori.
201    pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
202        // Snapshot the metadata under a read lock, drop the lock before
203        // doing any I/O so we don't hold it across blake3 of large files,
204        // then upgrade to a write lock only if invalidation is required.
205        let cached_meta = {
206            let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
207            reg.get(&key.0).cloned()
208        };
209
210        // Fast path: registry already knows the roots. Revalidate them
211        // first to avoid a round-trip to a slow store on a dirty entry.
212        if let Some(meta) = &cached_meta {
213            match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
214                RevalidationOutcome::Ok => {}
215                RevalidationOutcome::Invalidated => {
216                    self.registry
217                        .write()
218                        .unwrap_or_else(|e| e.into_inner())
219                        .remove(&key.0);
220                    return Ok(LookupOutcome::Invalidated);
221                }
222            }
223        }
224
225        match self.store.lookup(key)? {
226            Some(p) => {
227                // If we did not have the entry in the registry, the
228                // store's payload meta carries the file roots the
229                // entry was persisted with. Revalidate against the
230                // local filesystem before trusting it — this is the
231                // cross-machine drift check that RemoteStore-backed
232                // caches rely on, because the server only knows its
233                // own filesystem and cannot detect that Bob's local
234                // checkout has diverged from Alice's.
235                if cached_meta.is_none() {
236                    let local_roots: Vec<FileRoot> = p
237                        .meta
238                        .file_roots
239                        .iter()
240                        .map(|f| FileRoot {
241                            path: PathBuf::from(&f.path),
242                            expected_hash: f.expected_hash.clone(),
243                        })
244                        .collect();
245                    match revalidate_file_roots(&self.workspace_base, &local_roots) {
246                        RevalidationOutcome::Ok => {
247                            self.populate_registry_from_meta(key, &p);
248                        }
249                        RevalidationOutcome::Invalidated => {
250                            return Ok(LookupOutcome::Invalidated);
251                        }
252                    }
253                }
254                Ok(LookupOutcome::Hit(p))
255            }
256            None => {
257                if cached_meta.is_some() {
258                    self.registry
259                        .write()
260                        .unwrap_or_else(|e| e.into_inner())
261                        .remove(&key.0);
262                }
263                Ok(LookupOutcome::Miss)
264            }
265        }
266    }
267
268    fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
269        let file_roots = p
270            .meta
271            .file_roots
272            .iter()
273            .map(|f| FileRoot {
274                path: PathBuf::from(&f.path),
275                expected_hash: f.expected_hash.clone(),
276            })
277            .collect();
278        self.registry
279            .write()
280            .unwrap_or_else(|e| e.into_inner())
281            .insert(
282                key.0.clone(),
283                EntryMeta {
284                    tool_kind: p.meta.tool_kind.clone(),
285                    file_roots,
286                    upstream_keys: p.meta.upstream_keys.clone(),
287                },
288            );
289    }
290
291    /// Record a fresh tool execution. Caller has already produced the
292    /// formatted output bytes the model will see; we persist them under
293    /// `key` and register the file roots for future revalidation.
294    pub fn persist(
295        &self,
296        key: &Key,
297        bytes: &[u8],
298        tool_kind: &str,
299        file_roots: Vec<FileRoot>,
300    ) -> Result<(), CacheError> {
301        self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
302    }
303
304    /// Persist an entry whose validity depends on the listed upstream
305    /// cache keys. The proxy's LlmCall path uses this so a tool-cache
306    /// invalidation can walk the upstream edge and drop dependent
307    /// completions.
308    pub fn persist_with_upstreams(
309        &self,
310        key: &Key,
311        bytes: &[u8],
312        tool_kind: &str,
313        file_roots: Vec<FileRoot>,
314        upstream_keys: Vec<Key>,
315    ) -> Result<(), CacheError> {
316        let serde_roots: Vec<FileRootSerde> = file_roots
317            .iter()
318            .map(|r| FileRootSerde {
319                path: r.path.display().to_string(),
320                expected_hash: r.expected_hash.clone(),
321            })
322            .collect();
323        let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
324        self.store.persist_with_upstreams(
325            key,
326            bytes,
327            tool_kind,
328            serde_roots,
329            upstream_strings.clone(),
330        )?;
331        self.registry
332            .write()
333            .unwrap_or_else(|e| e.into_inner())
334            .insert(
335                key.0.clone(),
336                EntryMeta {
337                    tool_kind: tool_kind.to_string(),
338                    file_roots,
339                    upstream_keys: upstream_strings,
340                },
341            );
342        Ok(())
343    }
344
345    /// Drop the registry entry for `key`. The store payload remains on
346    /// disk (M1 is append-only; M2 adds eviction) but subsequent lookups
347    /// will Miss because the registry is the authoritative gate.
348    ///
349    /// Used by `write` and `edit` MCP tools to invalidate every cached
350    /// node whose path matches the written path; the verdant-mcp layer
351    /// computes the affected key set and calls this for each.
352    pub fn mark_dirty(&self, key: &Key) {
353        self.registry
354            .write()
355            .unwrap_or_else(|e| e.into_inner())
356            .remove(&key.0);
357        // Best-effort store cleanup; if it fails (concurrent writer,
358        // permissions glitch) we silently leave the bytes on disk
359        // because the registry is the authoritative gate and a stale
360        // payload that no entry references is harmless.
361        let _ = self.store.remove(key);
362    }
363
364    /// Drop every cache entry that depends on `upstream_key` either
365    /// directly (its `upstream_keys` includes that hex) or transitively
366    /// (its dependency closure does). Returns the number of entries
367    /// dropped.
368    ///
369    /// This is the cross-layer dirty propagation path that ties the
370    /// M3 LlmCall cache to M1's tool cache: when a `read` entry is
371    /// invalidated by `invalidate_path`, the proxy calls this with
372    /// the read's key and every LlmCall whose prompt consumed that
373    /// read drops out of the cache. Without this hop, an edited file
374    /// would silently feed the model the old bytes via a stale cached
375    /// completion.
376    pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
377        let mut reg = self.registry.write().unwrap_or_else(|e| e.into_inner());
378        // Compute the transitive closure of dependents. Each iteration
379        // collects entries whose declared upstream set intersects the
380        // current dirty set; we repeat until no new keys are added.
381        let mut dirty: std::collections::HashSet<String> =
382            std::collections::HashSet::from([upstream_key.0.clone()]);
383        loop {
384            let before = dirty.len();
385            for (k, meta) in reg.iter() {
386                if dirty.contains(k) {
387                    continue;
388                }
389                if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
390                    dirty.insert(k.clone());
391                }
392            }
393            if dirty.len() == before {
394                break;
395            }
396        }
397        // The originating key itself may or may not be in the registry;
398        // we only count entries we actually drop.
399        let mut dropped = 0;
400        for k in &dirty {
401            if k == &upstream_key.0 {
402                continue;
403            }
404            if reg.remove(k).is_some() {
405                dropped += 1;
406                let _ = self.store.remove(&Key(k.clone()));
407            }
408        }
409        dropped
410    }
411
412    /// Drop every registry entry whose recorded file roots include `path`.
413    /// O(n) in the registry size; M1 keeps registries small (a few hundred
414    /// entries per session) so this is fine. M2's persistent index will
415    /// add a path → keys reverse map.
416    pub fn invalidate_path(&self, path: &Path) -> usize {
417        let target = match path.canonicalize() {
418            Ok(p) => p,
419            Err(_) => path.to_path_buf(),
420        };
421        // Compare a lowercased form so an edit recorded under one casing
422        // still invalidates an entry recorded under another. On a
423        // case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs` are
424        // one file; a missed invalidation leaves a stale hit, while a
425        // spurious extra invalidation only costs a recompute, so the
426        // conservative lowercased comparison is applied unconditionally.
427        let target_ci = lower_path(&target);
428        let path_ci = lower_path(path);
429        let to_drop: Vec<String> = {
430            let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
431            reg.iter()
432                .filter_map(|(k, meta)| {
433                    let touches = meta.file_roots.iter().any(|r| {
434                        let resolved = resolve_root_path(&self.workspace_base, &r.path);
435                        let resolved_ci = lower_path(&resolved);
436                        match resolved.canonicalize() {
437                            Ok(c) => lower_path(&c) == target_ci,
438                            Err(_) => resolved_ci == path_ci || lower_path(&r.path) == path_ci,
439                        }
440                    });
441                    if touches {
442                        Some(k.clone())
443                    } else {
444                        None
445                    }
446                })
447                .collect()
448        };
449        let n = to_drop.len();
450        for k in to_drop {
451            let key = Key(k);
452            // Cascade up the dependency edge so any LlmCall whose
453            // prompt consumed this tool result also drops.
454            self.invalidate_upstream(&key);
455            self.registry
456                .write()
457                .unwrap_or_else(|e| e.into_inner())
458                .remove(&key.0);
459            let _ = self.store.remove(&key);
460        }
461        n
462    }
463
464    pub fn known_kinds(&self) -> Vec<String> {
465        let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
466        let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
467        kinds.sort();
468        kinds.dedup();
469        kinds
470    }
471}
472
473/// Compute the blake3 hex digest of the file at `path`. Used both to
474/// record `FileRoot::expected_hash` on persist and to revalidate on
475/// lookup.
476enum RevalidationOutcome {
477    Ok,
478    Invalidated,
479}
480
481fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
482    for root in roots {
483        let resolved = resolve_root_path(workspace_base, &root.path);
484        let current = match hash_file(&resolved) {
485            Ok(h) => h,
486            Err(_) => return RevalidationOutcome::Invalidated,
487        };
488        if current != root.expected_hash {
489            return RevalidationOutcome::Invalidated;
490        }
491    }
492    RevalidationOutcome::Ok
493}
494
495/// Join a workspace base with a recorded `FileRoot::path`. On Unix,
496/// `PathBuf::join` replaces the base when the argument is absolute,
497/// so this also handles legacy entries persisted with absolute paths
498/// (single-machine M4) without breaking. New entries persist
499/// workspace-relative paths and resolve through the base; cross-user
500/// `_shared` entries work because every consumer joins against its
501/// own workspace.
502fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
503    workspace_base.join(recorded)
504}
505
506/// Lowercased lossy string form of a path, used as a case-insensitive
507/// comparison key for path invalidation.
508fn lower_path(p: &Path) -> String {
509    p.to_string_lossy().to_lowercase()
510}
511
512/// Default ceiling for content hashing. A file larger than this is
513/// reported `FileHash::Oversized` and is therefore uncacheable, because
514/// (see `FileHash`) a size/mtime fingerprint is not a safe substitute
515/// for a content hash.
516const HASH_MAX_BYTES: u64 = 100 * 1024 * 1024;
517
518/// The content-hash ceiling, read from `$VERDANT_HASH_MAX_BYTES` if set
519/// and parseable, otherwise `HASH_MAX_BYTES`.
520pub fn hash_max_bytes() -> u64 {
521    std::env::var("VERDANT_HASH_MAX_BYTES")
522        .ok()
523        .and_then(|s| s.parse::<u64>().ok())
524        .unwrap_or(HASH_MAX_BYTES)
525}
526
527/// Outcome of fingerprinting a file for cache keying.
528#[derive(Debug, Clone, PartialEq, Eq)]
529pub enum FileHash {
530    /// blake3 content digest, hex-encoded.
531    Content(String),
532    /// File is larger than the content-hash ceiling. It carries no
533    /// digest on purpose: a size/mtime fingerprint would collide two
534    /// different files with equal size and mtime onto one key, so a
535    /// tool whose output depends on an oversized file must not cache.
536    Oversized,
537}
538
539impl FileHash {
540    pub fn content(&self) -> Option<&str> {
541        match self {
542            FileHash::Content(h) => Some(h),
543            FileHash::Oversized => None,
544        }
545    }
546}
547
548/// blake3 hex digest of the file at `path`, streamed so memory stays
549/// bounded regardless of file size. Always content-hashes; callers that
550/// must not cache oversized files use `hash_file_with_limit` instead.
551pub fn hash_file(path: &Path) -> std::io::Result<String> {
552    let mut hasher = blake3::Hasher::new();
553    let mut f = std::fs::File::open(path)?;
554    let mut buf = [0u8; 1 << 16];
555    loop {
556        let n = std::io::Read::read(&mut f, &mut buf)?;
557        if n == 0 {
558            break;
559        }
560        hasher.update(&buf[..n]);
561    }
562    Ok(hasher.finalize().to_hex().to_string())
563}
564
565/// Fingerprint `path` for cache keying. A file at or below `max` bytes is
566/// content-hashed; a larger file is reported `Oversized` so the caller
567/// declines to cache rather than keying on a collision-prone fingerprint.
568pub fn hash_file_with_limit(path: &Path, max: u64) -> std::io::Result<FileHash> {
569    if std::fs::metadata(path)?.len() > max {
570        return Ok(FileHash::Oversized);
571    }
572    Ok(FileHash::Content(hash_file(path)?))
573}
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578    use tempfile::TempDir;
579
580    fn cache(dir: &TempDir) -> LiveCache {
581        let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
582        LiveCache::new(store)
583    }
584
585    fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
586        let p = dir.path().join(name);
587        std::fs::write(&p, content).unwrap();
588        p
589    }
590
591    fn root_for(p: &Path) -> FileRoot {
592        FileRoot {
593            path: p.to_path_buf(),
594            expected_hash: hash_file(p).unwrap(),
595        }
596    }
597
598    #[test]
599    fn miss_then_persist_then_hit() {
600        let dir = TempDir::new().unwrap();
601        let cache = cache(&dir);
602        let p = write_file(&dir, "a.txt", b"alpha");
603        let key = Key::from_bytes(b"read|a.txt|alpha");
604
605        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
606
607        cache
608            .persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
609            .unwrap();
610
611        match cache.lookup(&key).unwrap() {
612            LookupOutcome::Hit(payload) => {
613                assert_eq!(payload.bytes, b"alpha-formatted");
614                assert_eq!(payload.meta.tool_kind, "read");
615            }
616            other => panic!("expected Hit, got {other:?}"),
617        }
618    }
619
620    #[test]
621    fn revalidate_unchanged_returns_hit() {
622        let dir = TempDir::new().unwrap();
623        let cache = cache(&dir);
624        let p = write_file(&dir, "b.txt", b"beta");
625        let key = Key::from_bytes(b"read|b.txt|beta");
626        cache
627            .persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
628            .unwrap();
629        match cache.lookup_revalidate(&key).unwrap() {
630            LookupOutcome::Hit(_) => {}
631            other => panic!("expected Hit, got {other:?}"),
632        }
633    }
634
635    #[test]
636    fn revalidate_modified_invalidates() {
637        let dir = TempDir::new().unwrap();
638        let cache = cache(&dir);
639        let p = write_file(&dir, "c.txt", b"charlie");
640        let key = Key::from_bytes(b"read|c.txt|charlie");
641        cache
642            .persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
643            .unwrap();
644
645        std::fs::write(&p, b"DELTA").unwrap();
646
647        match cache.lookup_revalidate(&key).unwrap() {
648            LookupOutcome::Invalidated => {}
649            other => panic!("expected Invalidated, got {other:?}"),
650        }
651        assert_eq!(cache.entry_count(), 0);
652    }
653
654    #[test]
655    fn revalidate_deleted_invalidates() {
656        let dir = TempDir::new().unwrap();
657        let cache = cache(&dir);
658        let p = write_file(&dir, "d.txt", b"delta");
659        let key = Key::from_bytes(b"read|d.txt|delta");
660        cache
661            .persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
662            .unwrap();
663
664        std::fs::remove_file(&p).unwrap();
665
666        match cache.lookup_revalidate(&key).unwrap() {
667            LookupOutcome::Invalidated => {}
668            other => panic!("expected Invalidated, got {other:?}"),
669        }
670    }
671
672    #[test]
673    fn mark_dirty_drops_entry() {
674        let dir = TempDir::new().unwrap();
675        let cache = cache(&dir);
676        let p = write_file(&dir, "e.txt", b"echo");
677        let key = Key::from_bytes(b"read|e.txt|echo");
678        cache
679            .persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
680            .unwrap();
681        assert_eq!(cache.entry_count(), 1);
682        cache.mark_dirty(&key);
683        assert_eq!(cache.entry_count(), 0);
684        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
685    }
686
687    #[test]
688    fn invalidate_path_drops_matching_entries() {
689        let dir = TempDir::new().unwrap();
690        let cache = cache(&dir);
691        let p1 = write_file(&dir, "f1.txt", b"foxtrot");
692        let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
693        let k1 = Key::from_bytes(b"read|f1");
694        let k2 = Key::from_bytes(b"read|f2");
695        cache
696            .persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
697            .unwrap();
698        cache
699            .persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
700            .unwrap();
701        assert_eq!(cache.entry_count(), 2);
702        let n = cache.invalidate_path(&p1);
703        assert_eq!(n, 1);
704        assert_eq!(cache.entry_count(), 1);
705        // k1 invalidated, k2 still present
706        match cache.lookup(&k2).unwrap() {
707            LookupOutcome::Hit(_) => {}
708            other => panic!("k2 should still hit, got {other:?}"),
709        }
710        match cache.lookup(&k1).unwrap() {
711            LookupOutcome::Miss => {}
712            other => panic!("k1 should miss, got {other:?}"),
713        }
714    }
715
716    #[test]
717    fn invalidate_path_matches_case_insensitively() {
718        // On a case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs`
719        // name the same file; a path edit reported under one casing must
720        // still invalidate an entry whose file root was recorded under
721        // another. A missed invalidation leaves a stale hit.
722        let dir = TempDir::new().unwrap();
723        let cache = cache(&dir);
724        let p = write_file(&dir, "CaseFile.txt", b"contents");
725        let key = Key::from_bytes(b"read|casefile");
726        cache
727            .persist(&key, b"formatted", "read", vec![root_for(&p)])
728            .unwrap();
729        assert_eq!(cache.entry_count(), 1);
730
731        let differently_cased = dir.path().join("casefile.txt");
732        let n = cache.invalidate_path(&differently_cased);
733        assert_eq!(n, 1, "case-differing path must still invalidate the entry");
734        assert_eq!(cache.entry_count(), 0);
735    }
736
737    #[test]
738    fn multi_root_revalidation() {
739        let dir = TempDir::new().unwrap();
740        let cache = cache(&dir);
741        let p1 = write_file(&dir, "g1.txt", b"golf1");
742        let p2 = write_file(&dir, "g2.txt", b"golf2");
743        let key = Key::from_bytes(b"grep|pattern|g1+g2");
744        cache
745            .persist(
746                &key,
747                b"merged-output",
748                "grep",
749                vec![root_for(&p1), root_for(&p2)],
750            )
751            .unwrap();
752
753        // First revalidation: clean
754        match cache.lookup_revalidate(&key).unwrap() {
755            LookupOutcome::Hit(_) => {}
756            other => panic!("expected Hit, got {other:?}"),
757        }
758        // Modify only the second root: must invalidate
759        std::fs::write(&p2, b"changed").unwrap();
760        match cache.lookup_revalidate(&key).unwrap() {
761            LookupOutcome::Invalidated => {}
762            other => panic!("expected Invalidated, got {other:?}"),
763        }
764    }
765
766    #[test]
767    fn upstream_invalidation_drops_dependents() {
768        let dir = TempDir::new().unwrap();
769        let cache = cache(&dir);
770        let p = write_file(&dir, "src.txt", b"alpha");
771        let read_key = Key::from_bytes(b"read|src");
772        cache
773            .persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
774            .unwrap();
775        // Two LlmCalls both depend on the read result.
776        let llm1 = Key::from_bytes(b"llm|first-prompt");
777        let llm2 = Key::from_bytes(b"llm|second-prompt");
778        cache
779            .persist_with_upstreams(
780                &llm1,
781                b"completion-1",
782                "llm_call",
783                vec![],
784                vec![read_key.clone()],
785            )
786            .unwrap();
787        cache
788            .persist_with_upstreams(
789                &llm2,
790                b"completion-2",
791                "llm_call",
792                vec![],
793                vec![read_key.clone()],
794            )
795            .unwrap();
796        assert_eq!(cache.entry_count(), 3);
797
798        // Invalidating the read key must cascade to both LlmCalls.
799        let dropped = cache.invalidate_upstream(&read_key);
800        assert_eq!(dropped, 2);
801        assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
802        assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
803    }
804
805    #[test]
806    fn invalidate_path_cascades_to_dependent_llm_calls() {
807        let dir = TempDir::new().unwrap();
808        let cache = cache(&dir);
809        let p = write_file(&dir, "input.txt", b"hello");
810        let read_key = Key::from_bytes(b"read|input");
811        cache
812            .persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
813            .unwrap();
814        let llm = Key::from_bytes(b"llm|sees-read");
815        cache
816            .persist_with_upstreams(
817                &llm,
818                b"completion",
819                "llm_call",
820                vec![],
821                vec![read_key.clone()],
822            )
823            .unwrap();
824        assert_eq!(cache.entry_count(), 2);
825
826        // Modify the file and invalidate by path.
827        std::fs::write(&p, b"changed").unwrap();
828        let n = cache.invalidate_path(&p);
829        assert_eq!(n, 1, "the read entry was the direct path match");
830        // The LlmCall must also be gone via the cascade.
831        assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
832        assert_eq!(cache.entry_count(), 0);
833    }
834
835    #[test]
836    fn transitive_invalidation_walks_multi_hop_chain() {
837        // A -> B -> C: invalidating A drops B and C.
838        let dir = TempDir::new().unwrap();
839        let cache = cache(&dir);
840        let key_a = Key::from_bytes(b"a");
841        let key_b = Key::from_bytes(b"b");
842        let key_c = Key::from_bytes(b"c");
843        let p = write_file(&dir, "f.txt", b"x");
844        cache
845            .persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
846            .unwrap();
847        cache
848            .persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
849            .unwrap();
850        cache
851            .persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
852            .unwrap();
853
854        let dropped = cache.invalidate_upstream(&key_a);
855        assert_eq!(dropped, 2);
856        assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
857        assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
858    }
859
860    #[test]
861    fn upstream_keys_persist_across_rehydration() {
862        let dir = TempDir::new().unwrap();
863        let p = write_file(&dir, "g.txt", b"data");
864        let read_key = Key::from_bytes(b"read|g");
865        let llm_key = Key::from_bytes(b"llm|g-consumer");
866
867        {
868            let cache = cache(&dir);
869            cache
870                .persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
871                .unwrap();
872            cache
873                .persist_with_upstreams(
874                    &llm_key,
875                    b"completion",
876                    "llm_call",
877                    vec![],
878                    vec![read_key.clone()],
879                )
880                .unwrap();
881        }
882
883        // Fresh cache pointed at the same store: the upstream edge
884        // must come back so a subsequent invalidation cascades.
885        let store_root = dir.path().join("store");
886        let store2 = crate::store::FileStore::open(store_root).unwrap();
887        let cache2 = LiveCache::new(store2);
888        assert_eq!(cache2.entry_count(), 2);
889        let dropped = cache2.invalidate_upstream(&read_key);
890        assert_eq!(dropped, 1, "rehydrated edge must support cascade");
891    }
892
893    #[test]
894    fn fresh_cache_rehydrates_from_store_on_disk() {
895        // M1's whole point: a process restart must not invalidate the
896        // cache. Persist via one cache instance, drop it, build a fresh
897        // cache pointed at the same store directory, and confirm the
898        // entry is still served as a Hit.
899        let dir = TempDir::new().unwrap();
900        let p = write_file(&dir, "rehydrate.txt", b"persist me");
901        let key = Key::from_bytes(b"read|rehydrate|persist me");
902
903        {
904            let cache = cache(&dir);
905            cache
906                .persist(&key, b"served-once", "read", vec![root_for(&p)])
907                .unwrap();
908            assert_eq!(cache.entry_count(), 1);
909        } // drop cache; in-memory registry destroyed.
910
911        let store_root = dir.path().join("store");
912        let store2 = crate::store::FileStore::open(store_root).unwrap();
913        let cache2 = LiveCache::new(store2);
914        // Without rehydration this would be 0 and the next lookup would
915        // miss, defeating the entire cross-session caching story.
916        assert_eq!(cache2.entry_count(), 1);
917        match cache2.lookup_revalidate(&key).unwrap() {
918            LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
919            other => panic!("expected Hit after rehydrate, got {other:?}"),
920        }
921    }
922
923    #[test]
924    fn hit_returns_byte_identical_payload() {
925        // Critical correctness test: cache must hand back the exact bytes
926        // it persisted, not a re-formatted view. A divergence here would
927        // silently corrupt the model's view of the world.
928        let dir = TempDir::new().unwrap();
929        let cache = cache(&dir);
930        let p = write_file(&dir, "h.txt", b"hotel");
931        let key = Key::from_bytes(b"read|h");
932        let original = b"  1\thotel-formatted-with-line-numbers\n  2\tetc\n";
933        cache
934            .persist(&key, original, "read", vec![root_for(&p)])
935            .unwrap();
936        match cache.lookup_revalidate(&key).unwrap() {
937            LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
938            other => panic!("expected Hit, got {other:?}"),
939        }
940    }
941
942    #[test]
943    fn hash_file_with_limit_content_hashes_within_limit() {
944        let dir = TempDir::new().unwrap();
945        let p = write_file(&dir, "small.bin", b"comfortably within the limit");
946        match hash_file_with_limit(&p, 1024).unwrap() {
947            FileHash::Content(h) => assert_eq!(h, hash_file(&p).unwrap()),
948            FileHash::Oversized => panic!("a file within the limit must content-hash"),
949        }
950    }
951
952    #[test]
953    fn hash_file_with_limit_reports_oversized_above_limit() {
954        let dir = TempDir::new().unwrap();
955        let p = write_file(&dir, "big.bin", &[7u8; 4096]);
956        assert_eq!(hash_file_with_limit(&p, 64).unwrap(), FileHash::Oversized);
957    }
958
959    #[test]
960    fn oversized_files_yield_no_keyable_digest() {
961        // The removed metadata fallback hashed (path, size, mtime), so two
962        // distinct oversized files with equal size and mtime collided onto
963        // one key. FileHash::Oversized carries no digest, so distinct
964        // oversized files cannot be keyed against each other at all.
965        let dir = TempDir::new().unwrap();
966        let a = write_file(&dir, "a.bin", &[1u8; 4096]);
967        let b = write_file(&dir, "b.bin", &[2u8; 4096]);
968        let ha = hash_file_with_limit(&a, 64).unwrap();
969        let hb = hash_file_with_limit(&b, 64).unwrap();
970        assert_eq!(ha, FileHash::Oversized);
971        assert_eq!(hb, FileHash::Oversized);
972        assert!(ha.content().is_none() && hb.content().is_none());
973    }
974}