Skip to main content

verdant_runtime/
cache.rs

1//! `LiveCache` — the M1 cache surface that the MCP server consumes.
2//!
3//! Keying philosophy: every cache entry is keyed by a deterministic blake3
4//! hash of the tool's *inputs*, where "inputs" includes any file content
5//! the tool's output is a function of. The store payload is the exact
6//! formatted bytes the MCP tool fed back to the model on the first
7//! execution. A subsequent identical call hits when (a) the input hash
8//! matches AND (b) every recorded file root revalidates clean against the
9//! current filesystem. If either fails, the entry is treated as invalid
10//! and the registered metadata is removed so a stale entry does not
11//! linger. M1 keeps the registry in memory; M2 will persist it.
12//!
13//! The cache surface is deliberately tool-agnostic: callers compute the
14//! input bytes (we provide canonicalization helpers in `key`), invoke
15//! `lookup` or `lookup_revalidate`, and on miss they execute the real
16//! tool and call `persist`. The cache does not run tools itself; that
17//! lives one layer up in `verdant-mcp`.
18
19use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
20use std::collections::HashMap;
21use std::os::unix::fs::MetadataExt;
22use std::path::{Path, PathBuf};
23use std::sync::RwLock;
24
25#[derive(Debug, thiserror::Error)]
26pub enum CacheError {
27    #[error("store: {0}")]
28    Store(#[from] StoreError),
29    #[error("io: {0}")]
30    Io(#[from] std::io::Error),
31}
32
33/// One file dependency of a cache entry. The tool computed its output as a
34/// function of (path, contents at expected_hash). On every green hit we
35/// re-blake3 the file and require the hash to still match; if it does not,
36/// the entry is invalidated.
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct FileRoot {
39    pub path: PathBuf,
40    pub expected_hash: String,
41}
42
43#[derive(Debug, Clone)]
44struct EntryMeta {
45    tool_kind: String,
46    file_roots: Vec<FileRoot>,
47}
48
49pub struct LiveCache {
50    store: Box<dyn Store>,
51    registry: RwLock<HashMap<String, EntryMeta>>,
52    /// Workspace base used to resolve workspace-relative `FileRoot::path`
53    /// during revalidation. `FileRoot` paths are stored relative so a
54    /// cache entry persisted on Alice's machine at `/home/alice/repo/`
55    /// is reusable on Bob's machine at `/home/bob/work/repo/` without
56    /// changing the cache key. `LiveCache::new` defaults the base to
57    /// the process cwd at construction time; binaries that know the
58    /// real project root should call `LiveCache::with_workspace`.
59    workspace_base: PathBuf,
60}
61
62#[derive(Debug, Clone, PartialEq)]
63pub enum LookupOutcome {
64    /// Cache hit. The payload is byte-for-byte the same as the original
65    /// execution and (for revalidating lookups) every file root has been
66    /// confirmed unchanged.
67    Hit(Payload),
68    /// No entry for this key.
69    Miss,
70    /// Entry existed but a file root has changed; the entry has been
71    /// removed from the registry so subsequent lookups behave as Miss
72    /// without paying the revalidation cost again.
73    Invalidated,
74}
75
76impl LiveCache {
77    pub fn new<S: Store + 'static>(store: S) -> Self {
78        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
79        Self::from_box_with_workspace(Box::new(store), base)
80    }
81
82    pub fn with_workspace<S: Store + 'static>(
83        store: S,
84        workspace_base: impl Into<PathBuf>,
85    ) -> Self {
86        Self::from_box_with_workspace(Box::new(store), workspace_base.into())
87    }
88
89    pub fn from_box(store: Box<dyn Store>) -> Self {
90        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
91        Self::from_box_with_workspace(store, base)
92    }
93
94    pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
95        // Rehydrate the in-memory registry from on-disk meta files so a
96        // freshly constructed cache (e.g., a brand-new MCP server
97        // process started by Claude Code on each `claude -p` invocation)
98        // can serve previously-persisted entries instead of treating
99        // every key as Miss. Without this, M1's cross-session cache
100        // doesn't actually exist — every Run-2 lookup would miss,
101        // overwrite the same payload file, and the cache would provide
102        // zero savings between sessions.
103        let mut reg = HashMap::new();
104        if let Ok(items) = store.iter_meta() {
105            for (key, meta) in items {
106                let file_roots = meta
107                    .file_roots
108                    .into_iter()
109                    .map(|f| FileRoot {
110                        path: PathBuf::from(f.path),
111                        expected_hash: f.expected_hash,
112                    })
113                    .collect();
114                reg.insert(
115                    key.0,
116                    EntryMeta {
117                        tool_kind: meta.tool_kind,
118                        file_roots,
119                    },
120                );
121            }
122        }
123        Self {
124            store,
125            registry: RwLock::new(reg),
126            workspace_base,
127        }
128    }
129
130    pub fn store(&self) -> &dyn Store {
131        self.store.as_ref()
132    }
133
134    pub fn workspace_base(&self) -> &Path {
135        &self.workspace_base
136    }
137
138    pub fn entry_count(&self) -> usize {
139        self.registry
140            .read()
141            .unwrap_or_else(|e| e.into_inner())
142            .len()
143    }
144
145    /// Whether an entry for `key` is present, checking the in-memory registry
146    /// first and falling back to the store. Does not fetch or revalidate the
147    /// payload, so it is a cheap presence probe (used by provenance telemetry
148    /// to count how many tool-result edges in a prompt resolve to a known node).
149    pub fn contains(&self, key: &Key) -> bool {
150        if self
151            .registry
152            .read()
153            .unwrap_or_else(|e| e.into_inner())
154            .contains_key(&key.0)
155        {
156            return true;
157        }
158        self.store.contains(key)
159    }
160
161    /// Bare lookup with no file revalidation. Used by tools whose output
162    /// has no filesystem dependency (rare in M1 — even Bash depends on
163    /// the cwd's contents in practice). Most callers want
164    /// `lookup_revalidate`.
165    pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
166        let in_reg = self
167            .registry
168            .read()
169            .unwrap_or_else(|e| e.into_inner())
170            .contains_key(&key.0);
171        match self.store.lookup(key)? {
172            Some(p) => {
173                // Backends like `RemoteStore` cannot pre-populate the
174                // registry through `iter_meta` because there is no bulk
175                // listing over the wire; the registry stays empty and
176                // entries are discovered one round-trip at a time. Seed
177                // the registry from the payload meta so subsequent
178                // operations (invalidate_path, invalidate_upstream,
179                // entry_count) see the entry without another round-trip.
180                if !in_reg {
181                    self.populate_registry_from_meta(key, &p);
182                }
183                Ok(LookupOutcome::Hit(p))
184            }
185            None => {
186                if in_reg {
187                    // Registry says we have it but the store does not —
188                    // happens if the store was truncated externally
189                    // between persist and lookup, or if a `_shared`
190                    // entry the registry knows about was server-side
191                    // invalidated. Drop the orphan so subsequent
192                    // lookups return Miss directly.
193                    self.registry
194                        .write()
195                        .unwrap_or_else(|e| e.into_inner())
196                        .remove(&key.0);
197                }
198                Ok(LookupOutcome::Miss)
199            }
200        }
201    }
202
203    /// Lookup with revalidation: re-blake3 every recorded file root and
204    /// require the hash to still match the value captured on persist. On
205    /// any mismatch the entry is removed from the registry and `Invalidated`
206    /// is returned so the caller knows to re-execute the real tool.
207    ///
208    /// This is the primary lookup path for `read`, `glob`, and `grep`
209    /// tools whose output is a pure function of named file contents. Bash
210    /// typically cannot use this path because the set of files Bash
211    /// reads is not known a priori.
212    pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
213        // Snapshot the metadata under a read lock, drop the lock before
214        // doing any I/O so we don't hold it across blake3 of large files,
215        // then upgrade to a write lock only if invalidation is required.
216        let cached_meta = {
217            let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
218            reg.get(&key.0).cloned()
219        };
220
221        // Fast path: registry already knows the roots. Revalidate them
222        // first to avoid a round-trip to a slow store on a dirty entry.
223        if let Some(meta) = &cached_meta {
224            match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
225                RevalidationOutcome::Ok => {}
226                RevalidationOutcome::Invalidated => {
227                    self.registry
228                        .write()
229                        .unwrap_or_else(|e| e.into_inner())
230                        .remove(&key.0);
231                    return Ok(LookupOutcome::Invalidated);
232                }
233            }
234        }
235
236        match self.store.lookup(key)? {
237            Some(p) => {
238                // If we did not have the entry in the registry, the
239                // store's payload meta carries the file roots the
240                // entry was persisted with. Revalidate against the
241                // local filesystem before trusting it — this is the
242                // cross-machine drift check that RemoteStore-backed
243                // caches rely on, because the server only knows its
244                // own filesystem and cannot detect that Bob's local
245                // checkout has diverged from Alice's.
246                if cached_meta.is_none() {
247                    let local_roots: Vec<FileRoot> = p
248                        .meta
249                        .file_roots
250                        .iter()
251                        .map(|f| FileRoot {
252                            path: PathBuf::from(&f.path),
253                            expected_hash: f.expected_hash.clone(),
254                        })
255                        .collect();
256                    match revalidate_file_roots(&self.workspace_base, &local_roots) {
257                        RevalidationOutcome::Ok => {
258                            self.populate_registry_from_meta(key, &p);
259                        }
260                        RevalidationOutcome::Invalidated => {
261                            return Ok(LookupOutcome::Invalidated);
262                        }
263                    }
264                }
265                Ok(LookupOutcome::Hit(p))
266            }
267            None => {
268                if cached_meta.is_some() {
269                    self.registry
270                        .write()
271                        .unwrap_or_else(|e| e.into_inner())
272                        .remove(&key.0);
273                }
274                Ok(LookupOutcome::Miss)
275            }
276        }
277    }
278
279    fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
280        let file_roots = p
281            .meta
282            .file_roots
283            .iter()
284            .map(|f| FileRoot {
285                path: PathBuf::from(&f.path),
286                expected_hash: f.expected_hash.clone(),
287            })
288            .collect();
289        self.registry
290            .write()
291            .unwrap_or_else(|e| e.into_inner())
292            .insert(
293                key.0.clone(),
294                EntryMeta {
295                    tool_kind: p.meta.tool_kind.clone(),
296                    file_roots,
297                },
298            );
299    }
300
301    /// Record a fresh tool execution. Caller has already produced the
302    /// formatted output bytes the model will see; we persist them under
303    /// `key` and register the file roots for future revalidation.
304    pub fn persist(
305        &self,
306        key: &Key,
307        bytes: &[u8],
308        tool_kind: &str,
309        file_roots: Vec<FileRoot>,
310    ) -> Result<(), CacheError> {
311        self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
312    }
313
314    /// Persist an entry whose validity depends on the listed upstream
315    /// cache keys. The proxy's LlmCall path uses this so a tool-cache
316    /// invalidation can walk the upstream edge and drop dependent
317    /// completions.
318    pub fn persist_with_upstreams(
319        &self,
320        key: &Key,
321        bytes: &[u8],
322        tool_kind: &str,
323        file_roots: Vec<FileRoot>,
324        upstream_keys: Vec<Key>,
325    ) -> Result<(), CacheError> {
326        let serde_roots: Vec<FileRootSerde> = file_roots
327            .iter()
328            .map(|r| FileRootSerde {
329                path: r.path.display().to_string(),
330                expected_hash: r.expected_hash.clone(),
331            })
332            .collect();
333        let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
334        self.store
335            .persist_with_upstreams(key, bytes, tool_kind, serde_roots, upstream_strings)?;
336        self.registry
337            .write()
338            .unwrap_or_else(|e| e.into_inner())
339            .insert(
340                key.0.clone(),
341                EntryMeta {
342                    tool_kind: tool_kind.to_string(),
343                    file_roots,
344                },
345            );
346        Ok(())
347    }
348
349    /// Drop the registry entry for `key`. The store payload remains on
350    /// disk (M1 is append-only; M2 adds eviction) but subsequent lookups
351    /// will Miss because the registry is the authoritative gate.
352    ///
353    /// Used by `write` and `edit` MCP tools to invalidate every cached
354    /// node whose path matches the written path; the verdant-mcp layer
355    /// computes the affected key set and calls this for each.
356    pub fn mark_dirty(&self, key: &Key) {
357        self.registry
358            .write()
359            .unwrap_or_else(|e| e.into_inner())
360            .remove(&key.0);
361        // Best-effort store cleanup; if it fails (concurrent writer,
362        // permissions glitch) we silently leave the bytes on disk
363        // because the registry is the authoritative gate and a stale
364        // payload that no entry references is harmless.
365        let _ = self.store.remove(key);
366    }
367
368    /// Drop every cache entry that depends on `upstream_key` either
369    /// directly (its `upstream_keys` includes that hex) or transitively
370    /// (its dependency closure does). Returns the number of entries
371    /// dropped.
372    ///
373    /// This is the cross-layer dirty propagation path that ties the
374    /// M3 LlmCall cache to M1's tool cache: when a `read` entry is
375    /// invalidated by `invalidate_path`, the proxy calls this with
376    /// the read's key and every LlmCall whose prompt consumed that
377    /// read drops out of the cache. Without this hop, an edited file
378    /// would silently feed the model the old bytes via a stale cached
379    /// completion.
380    pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
381        // Walk the PERSISTED metadata, not just the in-memory registry, so the
382        // cascade crosses process boundaries: a tool process invalidating a
383        // file must drop the LLM-call entries a separate proxy process persisted
384        // into the shared store. The registry is only a per-process perf cache;
385        // `lookup` already trusts the store as the source of truth.
386        let metas = match self.store.iter_meta() {
387            Ok(m) => m,
388            Err(_) => return 0,
389        };
390        let mut dirty: std::collections::HashSet<String> =
391            std::collections::HashSet::from([upstream_key.0.clone()]);
392        loop {
393            let before = dirty.len();
394            for (k, meta) in &metas {
395                if dirty.contains(&k.0) {
396                    continue;
397                }
398                if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
399                    dirty.insert(k.0.clone());
400                }
401            }
402            if dirty.len() == before {
403                break;
404            }
405        }
406        let mut reg = self.registry.write().unwrap_or_else(|e| e.into_inner());
407        let mut dropped = 0;
408        for k in &dirty {
409            if k == &upstream_key.0 {
410                continue;
411            }
412            reg.remove(k);
413            if self.store.remove(&Key(k.clone())).is_ok() {
414                dropped += 1;
415            }
416        }
417        dropped
418    }
419
420    /// Drop every cached entry whose recorded file roots include `path`, then
421    /// cascade up the dependency edges. Scans the persisted store metadata so
422    /// the effect is visible across processes sharing the store. O(n) in store
423    /// size; the code anticipates a future path -> keys index.
424    pub fn invalidate_path(&self, path: &Path) -> usize {
425        let target = match path.canonicalize() {
426            Ok(p) => p,
427            Err(_) => path.to_path_buf(),
428        };
429        // Compare a lowercased form so an edit recorded under one casing
430        // still invalidates an entry recorded under another. On a
431        // case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs` are
432        // one file; a missed invalidation leaves a stale hit, while a
433        // spurious extra invalidation only costs a recompute, so the
434        // conservative lowercased comparison is applied unconditionally.
435        let target_ci = lower_path(&target);
436        let path_ci = lower_path(path);
437        let metas = match self.store.iter_meta() {
438            Ok(m) => m,
439            Err(_) => return 0,
440        };
441        let to_drop: Vec<String> = metas
442            .iter()
443            .filter_map(|(k, meta)| {
444                let touches = meta.file_roots.iter().any(|r| {
445                    let recorded = PathBuf::from(&r.path);
446                    let resolved = resolve_root_path(&self.workspace_base, &recorded);
447                    let resolved_ci = lower_path(&resolved);
448                    match resolved.canonicalize() {
449                        Ok(c) => lower_path(&c) == target_ci,
450                        Err(_) => resolved_ci == path_ci || lower_path(&recorded) == path_ci,
451                    }
452                });
453                if touches {
454                    Some(k.0.clone())
455                } else {
456                    None
457                }
458            })
459            .collect();
460        let n = to_drop.len();
461        for k in to_drop {
462            let key = Key(k);
463            // Cascade up the dependency edge so any LlmCall whose prompt
464            // consumed this tool result also drops.
465            self.invalidate_upstream(&key);
466            self.registry
467                .write()
468                .unwrap_or_else(|e| e.into_inner())
469                .remove(&key.0);
470            let _ = self.store.remove(&key);
471        }
472        n
473    }
474
475    pub fn known_kinds(&self) -> Vec<String> {
476        let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
477        let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
478        kinds.sort();
479        kinds.dedup();
480        kinds
481    }
482}
483
484/// Compute the blake3 hex digest of the file at `path`. Used both to
485/// record `FileRoot::expected_hash` on persist and to revalidate on
486/// lookup.
487enum RevalidationOutcome {
488    Ok,
489    Invalidated,
490}
491
492fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
493    let debug = std::env::var_os("VERDANT_DEBUG_INVALIDATION").is_some();
494    for root in roots {
495        let resolved = resolve_root_path(workspace_base, &root.path);
496        // The recorded fingerprint dictates how to revalidate: a `stat:` prefix
497        // is a size+mtime fingerprint for a large/stable input (never re-read),
498        // anything else is a blake3 content digest (re-hashed, fully sound).
499        let current = if root.expected_hash.starts_with(STAT_PREFIX) {
500            match stat_fingerprint(&resolved) {
501                Ok(s) => s,
502                Err(_) => {
503                    if debug {
504                        eprintln!(
505                            "verdant: invalidated by missing/unreadable {}",
506                            resolved.display()
507                        );
508                    }
509                    return RevalidationOutcome::Invalidated;
510                }
511            }
512        } else {
513            match hash_file(&resolved) {
514                Ok(h) => h,
515                Err(_) => {
516                    if debug {
517                        eprintln!(
518                            "verdant: invalidated by missing/unreadable {}",
519                            resolved.display()
520                        );
521                    }
522                    return RevalidationOutcome::Invalidated;
523                }
524            }
525        };
526        if current != root.expected_hash {
527            if debug {
528                eprintln!("verdant: invalidated by changed {}", resolved.display());
529            }
530            return RevalidationOutcome::Invalidated;
531        }
532    }
533    RevalidationOutcome::Ok
534}
535
536/// Join a workspace base with a recorded `FileRoot::path`. On Unix,
537/// `PathBuf::join` replaces the base when the argument is absolute,
538/// so this also handles legacy entries persisted with absolute paths
539/// (single-machine M4) without breaking. New entries persist
540/// workspace-relative paths and resolve through the base; cross-user
541/// `_shared` entries work because every consumer joins against its
542/// own workspace.
543fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
544    workspace_base.join(recorded)
545}
546
547/// Lowercased lossy string form of a path, used as a case-insensitive
548/// comparison key for path invalidation.
549fn lower_path(p: &Path) -> String {
550    p.to_string_lossy().to_lowercase()
551}
552
553/// Default ceiling for content hashing. A file larger than this is
554/// reported `FileHash::Oversized` and is therefore uncacheable, because
555/// (see `FileHash`) a size/mtime fingerprint is not a safe substitute
556/// for a content hash.
557const HASH_MAX_BYTES: u64 = 100 * 1024 * 1024;
558
559/// The content-hash ceiling, read from `$VERDANT_HASH_MAX_BYTES` if set
560/// and parseable, otherwise `HASH_MAX_BYTES`.
561pub fn hash_max_bytes() -> u64 {
562    std::env::var("VERDANT_HASH_MAX_BYTES")
563        .ok()
564        .and_then(|s| s.parse::<u64>().ok())
565        .unwrap_or(HASH_MAX_BYTES)
566}
567
568/// Outcome of fingerprinting a file for cache keying.
569#[derive(Debug, Clone, PartialEq, Eq)]
570pub enum FileHash {
571    /// blake3 content digest, hex-encoded.
572    Content(String),
573    /// File is larger than the content-hash ceiling. It carries no
574    /// digest on purpose: a size/mtime fingerprint would collide two
575    /// different files with equal size and mtime onto one key, so a
576    /// tool whose output depends on an oversized file must not cache.
577    Oversized,
578}
579
580impl FileHash {
581    pub fn content(&self) -> Option<&str> {
582        match self {
583            FileHash::Content(h) => Some(h),
584            FileHash::Oversized => None,
585        }
586    }
587}
588
589/// blake3 hex digest of the file at `path`, streamed so memory stays
590/// bounded regardless of file size. Always content-hashes; callers that
591/// must not cache oversized files use `hash_file_with_limit` instead.
592pub fn hash_file(path: &Path) -> std::io::Result<String> {
593    let mut hasher = blake3::Hasher::new();
594    let mut f = std::fs::File::open(path)?;
595    let mut buf = [0u8; 1 << 16];
596    loop {
597        let n = std::io::Read::read(&mut f, &mut buf)?;
598        if n == 0 {
599            break;
600        }
601        hasher.update(&buf[..n]);
602    }
603    Ok(hasher.finalize().to_hex().to_string())
604}
605
606/// Fingerprint `path` for cache keying. A file at or below `max` bytes is
607/// content-hashed; a larger file is reported `Oversized` so the caller
608/// declines to cache rather than keying on a collision-prone fingerprint.
609pub fn hash_file_with_limit(path: &Path, max: u64) -> std::io::Result<FileHash> {
610    if std::fs::metadata(path)?.len() > max {
611        return Ok(FileHash::Oversized);
612    }
613    Ok(FileHash::Content(hash_file(path)?))
614}
615
616/// Content-addressed identity of a tool result. The MCP tool layer registers a
617/// node under this key carrying the file roots the result depended on, and the
618/// proxy records the same key as an upstream edge of any LLM call whose prompt
619/// embedded that result, so editing one of those files cascades file -> this
620/// node -> the dependent completions. Both layers must hash identically, so the
621/// function lives here in the shared runtime crate.
622pub fn tool_result_key(content: &[u8]) -> Key {
623    let mut framed = Vec::with_capacity(content.len() + 12);
624    framed.extend_from_slice(b"tool_result\0");
625    framed.extend_from_slice(content);
626    Key::from_bytes(&framed)
627}
628
629pub const STAT_PREFIX: &str = "stat:";
630
631/// A cheap size+mtime fingerprint for a file, used for large/stable inputs
632/// (compilers, system libraries) that would be prohibitively expensive to
633/// content-hash on every cache validation. Soundness assumption: identical
634/// (size, nanosecond mtime) implies identical content, the same assumption
635/// build tools already make for their own incrementality.
636pub fn stat_fingerprint(path: &Path) -> std::io::Result<String> {
637    let m = std::fs::metadata(path)?;
638    Ok(format!(
639        "{STAT_PREFIX}{}:{}:{}",
640        m.len(),
641        m.mtime(),
642        m.mtime_nsec()
643    ))
644}
645
646/// Fingerprint a read-set file: content-hash it if it is at or below
647/// `content_max` (sound), otherwise fall back to a size+mtime fingerprint so a
648/// huge stable input does not block caching or cost a full re-read on every
649/// validation. The returned string is what `revalidate_file_roots` recomputes
650/// and compares; its form (bare hex vs `stat:` prefix) tells revalidation which
651/// mode to use.
652pub fn fingerprint_file(path: &Path, content_max: u64) -> std::io::Result<String> {
653    if std::fs::metadata(path)?.len() > content_max {
654        stat_fingerprint(path)
655    } else {
656        hash_file(path)
657    }
658}
659
660#[cfg(test)]
661mod tests {
662    use super::*;
663    use tempfile::TempDir;
664
665    fn cache(dir: &TempDir) -> LiveCache {
666        let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
667        LiveCache::new(store)
668    }
669
670    fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
671        let p = dir.path().join(name);
672        std::fs::write(&p, content).unwrap();
673        p
674    }
675
676    fn root_for(p: &Path) -> FileRoot {
677        FileRoot {
678            path: p.to_path_buf(),
679            expected_hash: hash_file(p).unwrap(),
680        }
681    }
682
683    #[test]
684    fn miss_then_persist_then_hit() {
685        let dir = TempDir::new().unwrap();
686        let cache = cache(&dir);
687        let p = write_file(&dir, "a.txt", b"alpha");
688        let key = Key::from_bytes(b"read|a.txt|alpha");
689
690        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
691
692        cache
693            .persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
694            .unwrap();
695
696        match cache.lookup(&key).unwrap() {
697            LookupOutcome::Hit(payload) => {
698                assert_eq!(payload.bytes, b"alpha-formatted");
699                assert_eq!(payload.meta.tool_kind, "read");
700            }
701            other => panic!("expected Hit, got {other:?}"),
702        }
703    }
704
705    #[test]
706    fn revalidate_unchanged_returns_hit() {
707        let dir = TempDir::new().unwrap();
708        let cache = cache(&dir);
709        let p = write_file(&dir, "b.txt", b"beta");
710        let key = Key::from_bytes(b"read|b.txt|beta");
711        cache
712            .persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
713            .unwrap();
714        match cache.lookup_revalidate(&key).unwrap() {
715            LookupOutcome::Hit(_) => {}
716            other => panic!("expected Hit, got {other:?}"),
717        }
718    }
719
720    #[test]
721    fn revalidate_modified_invalidates() {
722        let dir = TempDir::new().unwrap();
723        let cache = cache(&dir);
724        let p = write_file(&dir, "c.txt", b"charlie");
725        let key = Key::from_bytes(b"read|c.txt|charlie");
726        cache
727            .persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
728            .unwrap();
729
730        std::fs::write(&p, b"DELTA").unwrap();
731
732        match cache.lookup_revalidate(&key).unwrap() {
733            LookupOutcome::Invalidated => {}
734            other => panic!("expected Invalidated, got {other:?}"),
735        }
736        assert_eq!(cache.entry_count(), 0);
737    }
738
739    #[test]
740    fn revalidate_deleted_invalidates() {
741        let dir = TempDir::new().unwrap();
742        let cache = cache(&dir);
743        let p = write_file(&dir, "d.txt", b"delta");
744        let key = Key::from_bytes(b"read|d.txt|delta");
745        cache
746            .persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
747            .unwrap();
748
749        std::fs::remove_file(&p).unwrap();
750
751        match cache.lookup_revalidate(&key).unwrap() {
752            LookupOutcome::Invalidated => {}
753            other => panic!("expected Invalidated, got {other:?}"),
754        }
755    }
756
757    #[test]
758    fn mark_dirty_drops_entry() {
759        let dir = TempDir::new().unwrap();
760        let cache = cache(&dir);
761        let p = write_file(&dir, "e.txt", b"echo");
762        let key = Key::from_bytes(b"read|e.txt|echo");
763        cache
764            .persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
765            .unwrap();
766        assert_eq!(cache.entry_count(), 1);
767        cache.mark_dirty(&key);
768        assert_eq!(cache.entry_count(), 0);
769        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
770    }
771
772    #[test]
773    fn invalidate_path_drops_matching_entries() {
774        let dir = TempDir::new().unwrap();
775        let cache = cache(&dir);
776        let p1 = write_file(&dir, "f1.txt", b"foxtrot");
777        let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
778        let k1 = Key::from_bytes(b"read|f1");
779        let k2 = Key::from_bytes(b"read|f2");
780        cache
781            .persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
782            .unwrap();
783        cache
784            .persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
785            .unwrap();
786        assert_eq!(cache.entry_count(), 2);
787        let n = cache.invalidate_path(&p1);
788        assert_eq!(n, 1);
789        assert_eq!(cache.entry_count(), 1);
790        // k1 invalidated, k2 still present
791        match cache.lookup(&k2).unwrap() {
792            LookupOutcome::Hit(_) => {}
793            other => panic!("k2 should still hit, got {other:?}"),
794        }
795        match cache.lookup(&k1).unwrap() {
796            LookupOutcome::Miss => {}
797            other => panic!("k1 should miss, got {other:?}"),
798        }
799    }
800
801    #[test]
802    fn invalidate_path_matches_case_insensitively() {
803        // On a case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs`
804        // name the same file; a path edit reported under one casing must
805        // still invalidate an entry whose file root was recorded under
806        // another. A missed invalidation leaves a stale hit.
807        let dir = TempDir::new().unwrap();
808        let cache = cache(&dir);
809        let p = write_file(&dir, "CaseFile.txt", b"contents");
810        let key = Key::from_bytes(b"read|casefile");
811        cache
812            .persist(&key, b"formatted", "read", vec![root_for(&p)])
813            .unwrap();
814        assert_eq!(cache.entry_count(), 1);
815
816        let differently_cased = dir.path().join("casefile.txt");
817        let n = cache.invalidate_path(&differently_cased);
818        assert_eq!(n, 1, "case-differing path must still invalidate the entry");
819        assert_eq!(cache.entry_count(), 0);
820    }
821
822    #[test]
823    fn multi_root_revalidation() {
824        let dir = TempDir::new().unwrap();
825        let cache = cache(&dir);
826        let p1 = write_file(&dir, "g1.txt", b"golf1");
827        let p2 = write_file(&dir, "g2.txt", b"golf2");
828        let key = Key::from_bytes(b"grep|pattern|g1+g2");
829        cache
830            .persist(
831                &key,
832                b"merged-output",
833                "grep",
834                vec![root_for(&p1), root_for(&p2)],
835            )
836            .unwrap();
837
838        // First revalidation: clean
839        match cache.lookup_revalidate(&key).unwrap() {
840            LookupOutcome::Hit(_) => {}
841            other => panic!("expected Hit, got {other:?}"),
842        }
843        // Modify only the second root: must invalidate
844        std::fs::write(&p2, b"changed").unwrap();
845        match cache.lookup_revalidate(&key).unwrap() {
846            LookupOutcome::Invalidated => {}
847            other => panic!("expected Invalidated, got {other:?}"),
848        }
849    }
850
851    #[test]
852    fn upstream_invalidation_drops_dependents() {
853        let dir = TempDir::new().unwrap();
854        let cache = cache(&dir);
855        let p = write_file(&dir, "src.txt", b"alpha");
856        let read_key = Key::from_bytes(b"read|src");
857        cache
858            .persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
859            .unwrap();
860        // Two LlmCalls both depend on the read result.
861        let llm1 = Key::from_bytes(b"llm|first-prompt");
862        let llm2 = Key::from_bytes(b"llm|second-prompt");
863        cache
864            .persist_with_upstreams(
865                &llm1,
866                b"completion-1",
867                "llm_call",
868                vec![],
869                vec![read_key.clone()],
870            )
871            .unwrap();
872        cache
873            .persist_with_upstreams(
874                &llm2,
875                b"completion-2",
876                "llm_call",
877                vec![],
878                vec![read_key.clone()],
879            )
880            .unwrap();
881        assert_eq!(cache.entry_count(), 3);
882
883        // Invalidating the read key must cascade to both LlmCalls.
884        let dropped = cache.invalidate_upstream(&read_key);
885        assert_eq!(dropped, 2);
886        assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
887        assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
888    }
889
890    #[test]
891    fn invalidate_path_cascades_to_dependent_llm_calls() {
892        let dir = TempDir::new().unwrap();
893        let cache = cache(&dir);
894        let p = write_file(&dir, "input.txt", b"hello");
895        let read_key = Key::from_bytes(b"read|input");
896        cache
897            .persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
898            .unwrap();
899        let llm = Key::from_bytes(b"llm|sees-read");
900        cache
901            .persist_with_upstreams(
902                &llm,
903                b"completion",
904                "llm_call",
905                vec![],
906                vec![read_key.clone()],
907            )
908            .unwrap();
909        assert_eq!(cache.entry_count(), 2);
910
911        // Modify the file and invalidate by path.
912        std::fs::write(&p, b"changed").unwrap();
913        let n = cache.invalidate_path(&p);
914        assert_eq!(n, 1, "the read entry was the direct path match");
915        // The LlmCall must also be gone via the cascade.
916        assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
917        assert_eq!(cache.entry_count(), 0);
918    }
919
920    #[test]
921    fn transitive_invalidation_walks_multi_hop_chain() {
922        // A -> B -> C: invalidating A drops B and C.
923        let dir = TempDir::new().unwrap();
924        let cache = cache(&dir);
925        let key_a = Key::from_bytes(b"a");
926        let key_b = Key::from_bytes(b"b");
927        let key_c = Key::from_bytes(b"c");
928        let p = write_file(&dir, "f.txt", b"x");
929        cache
930            .persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
931            .unwrap();
932        cache
933            .persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
934            .unwrap();
935        cache
936            .persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
937            .unwrap();
938
939        let dropped = cache.invalidate_upstream(&key_a);
940        assert_eq!(dropped, 2);
941        assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
942        assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
943    }
944
945    #[test]
946    fn upstream_keys_persist_across_rehydration() {
947        let dir = TempDir::new().unwrap();
948        let p = write_file(&dir, "g.txt", b"data");
949        let read_key = Key::from_bytes(b"read|g");
950        let llm_key = Key::from_bytes(b"llm|g-consumer");
951
952        {
953            let cache = cache(&dir);
954            cache
955                .persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
956                .unwrap();
957            cache
958                .persist_with_upstreams(
959                    &llm_key,
960                    b"completion",
961                    "llm_call",
962                    vec![],
963                    vec![read_key.clone()],
964                )
965                .unwrap();
966        }
967
968        // Fresh cache pointed at the same store: the upstream edge
969        // must come back so a subsequent invalidation cascades.
970        let store_root = dir.path().join("store");
971        let store2 = crate::store::FileStore::open(store_root).unwrap();
972        let cache2 = LiveCache::new(store2);
973        assert_eq!(cache2.entry_count(), 2);
974        let dropped = cache2.invalidate_upstream(&read_key);
975        assert_eq!(dropped, 1, "rehydrated edge must support cascade");
976    }
977
978    #[test]
979    fn cross_instance_file_edit_cascades_tool_and_llm() {
980        // The production scenario: a tool process and a proxy process share one
981        // store. Editing a file in a THIRD instance must drop both the tool
982        // result node (file dependency) and the LLM completion that consumed it,
983        // visible to a fresh instance. Proves cross-process incremental agent
984        // reasoning via the shared store, not an in-memory registry.
985        let dir = TempDir::new().unwrap();
986        let f = write_file(&dir, "dep.txt", b"v1");
987        let content = b"TOOL: contents of dep.txt";
988        let tkey = tool_result_key(content);
989        let llm_key = Key::from_bytes(b"llm|consumed-the-tool-result");
990
991        {
992            let producer = cache(&dir);
993            producer
994                .persist(&tkey, content, "tool_result", vec![root_for(&f)])
995                .unwrap();
996            producer
997                .persist_with_upstreams(
998                    &llm_key,
999                    b"completion-bytes",
1000                    "llm_call",
1001                    vec![],
1002                    vec![tkey.clone()],
1003                )
1004                .unwrap();
1005        }
1006
1007        {
1008            let editor = cache(&dir);
1009            std::fs::write(&f, b"v2-changed").unwrap();
1010            let n = editor.invalidate_path(&f);
1011            assert!(
1012                n >= 1,
1013                "the tool node depending on the file must be dropped"
1014            );
1015        }
1016
1017        let reader = cache(&dir);
1018        assert!(
1019            matches!(reader.lookup(&tkey).unwrap(), LookupOutcome::Miss),
1020            "tool result node must be gone cross-instance"
1021        );
1022        assert!(
1023            matches!(reader.lookup(&llm_key).unwrap(), LookupOutcome::Miss),
1024            "the dependent LLM completion must be gone cross-instance"
1025        );
1026    }
1027
1028    #[test]
1029    fn fresh_cache_rehydrates_from_store_on_disk() {
1030        // M1's whole point: a process restart must not invalidate the
1031        // cache. Persist via one cache instance, drop it, build a fresh
1032        // cache pointed at the same store directory, and confirm the
1033        // entry is still served as a Hit.
1034        let dir = TempDir::new().unwrap();
1035        let p = write_file(&dir, "rehydrate.txt", b"persist me");
1036        let key = Key::from_bytes(b"read|rehydrate|persist me");
1037
1038        {
1039            let cache = cache(&dir);
1040            cache
1041                .persist(&key, b"served-once", "read", vec![root_for(&p)])
1042                .unwrap();
1043            assert_eq!(cache.entry_count(), 1);
1044        } // drop cache; in-memory registry destroyed.
1045
1046        let store_root = dir.path().join("store");
1047        let store2 = crate::store::FileStore::open(store_root).unwrap();
1048        let cache2 = LiveCache::new(store2);
1049        // Without rehydration this would be 0 and the next lookup would
1050        // miss, defeating the entire cross-session caching story.
1051        assert_eq!(cache2.entry_count(), 1);
1052        match cache2.lookup_revalidate(&key).unwrap() {
1053            LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
1054            other => panic!("expected Hit after rehydrate, got {other:?}"),
1055        }
1056    }
1057
1058    #[test]
1059    fn hit_returns_byte_identical_payload() {
1060        // Critical correctness test: cache must hand back the exact bytes
1061        // it persisted, not a re-formatted view. A divergence here would
1062        // silently corrupt the model's view of the world.
1063        let dir = TempDir::new().unwrap();
1064        let cache = cache(&dir);
1065        let p = write_file(&dir, "h.txt", b"hotel");
1066        let key = Key::from_bytes(b"read|h");
1067        let original = b"  1\thotel-formatted-with-line-numbers\n  2\tetc\n";
1068        cache
1069            .persist(&key, original, "read", vec![root_for(&p)])
1070            .unwrap();
1071        match cache.lookup_revalidate(&key).unwrap() {
1072            LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
1073            other => panic!("expected Hit, got {other:?}"),
1074        }
1075    }
1076
1077    #[test]
1078    fn hash_file_with_limit_content_hashes_within_limit() {
1079        let dir = TempDir::new().unwrap();
1080        let p = write_file(&dir, "small.bin", b"comfortably within the limit");
1081        match hash_file_with_limit(&p, 1024).unwrap() {
1082            FileHash::Content(h) => assert_eq!(h, hash_file(&p).unwrap()),
1083            FileHash::Oversized => panic!("a file within the limit must content-hash"),
1084        }
1085    }
1086
1087    #[test]
1088    fn hash_file_with_limit_reports_oversized_above_limit() {
1089        let dir = TempDir::new().unwrap();
1090        let p = write_file(&dir, "big.bin", &[7u8; 4096]);
1091        assert_eq!(hash_file_with_limit(&p, 64).unwrap(), FileHash::Oversized);
1092    }
1093
1094    #[test]
1095    fn oversized_files_yield_no_keyable_digest() {
1096        // The removed metadata fallback hashed (path, size, mtime), so two
1097        // distinct oversized files with equal size and mtime collided onto
1098        // one key. FileHash::Oversized carries no digest, so distinct
1099        // oversized files cannot be keyed against each other at all.
1100        let dir = TempDir::new().unwrap();
1101        let a = write_file(&dir, "a.bin", &[1u8; 4096]);
1102        let b = write_file(&dir, "b.bin", &[2u8; 4096]);
1103        let ha = hash_file_with_limit(&a, 64).unwrap();
1104        let hb = hash_file_with_limit(&b, 64).unwrap();
1105        assert_eq!(ha, FileHash::Oversized);
1106        assert_eq!(hb, FileHash::Oversized);
1107        assert!(ha.content().is_none() && hb.content().is_none());
1108    }
1109}