verdant-cache-runtime 0.3.1

//! `LiveCache` — the M1 cache surface that the MCP server consumes.
//!
//! Keying philosophy: every cache entry is keyed by a deterministic blake3
//! hash of the tool's *inputs*, where "inputs" includes any file content
//! the tool's output is a function of. The store payload is the exact
//! formatted bytes the MCP tool fed back to the model on the first
//! execution. A subsequent identical call hits when (a) the input hash
//! matches AND (b) every recorded file root revalidates clean against the
//! current filesystem. If either fails, the entry is treated as invalid
//! and the registered metadata is removed so a stale entry does not
//! linger. M1 keeps the registry in memory; M2 will persist it.
//!
//! The cache surface is deliberately tool-agnostic: callers compute the
//! input bytes (we provide canonicalization helpers in `key`), invoke
//! `lookup` or `lookup_revalidate`, and on miss they execute the real
//! tool and call `persist`. The cache does not run tools itself; that
//! lives one layer up in `verdant-mcp`.

use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::RwLock;

#[derive(Debug, thiserror::Error)]
pub enum CacheError {
    #[error("store: {0}")]
    Store(#[from] StoreError),
    #[error("io: {0}")]
    Io(#[from] std::io::Error),
}

/// One file dependency of a cache entry. The tool computed its output as a
/// function of (path, contents at expected_hash). On every green hit we
/// re-blake3 the file and require the hash to still match; if it does not,
/// the entry is invalidated.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileRoot {
    pub path: PathBuf,
    pub expected_hash: String,
}

#[derive(Debug, Clone)]
struct EntryMeta {
    tool_kind: String,
    file_roots: Vec<FileRoot>,
    /// Upstream cache keys this entry depends on. Empty for leaf
    /// tool-call entries; populated for LlmCall entries with the
    /// tool-call keys whose payloads appeared in the prompt's
    /// `tool_result` blocks.
    upstream_keys: Vec<String>,
}

pub struct LiveCache {
    store: Box<dyn Store>,
    registry: RwLock<HashMap<String, EntryMeta>>,
    /// Workspace base used to resolve workspace-relative `FileRoot::path`
    /// during revalidation. `FileRoot` paths are stored relative so a
    /// cache entry persisted on Alice's machine at `/home/alice/repo/`
    /// is reusable on Bob's machine at `/home/bob/work/repo/` without
    /// changing the cache key. `LiveCache::new` defaults the base to
    /// the process cwd at construction time; binaries that know the
    /// real project root should call `LiveCache::with_workspace`.
    workspace_base: PathBuf,
}

#[derive(Debug, Clone, PartialEq)]
pub enum LookupOutcome {
    /// Cache hit. The payload is byte-for-byte the same as the original
    /// execution and (for revalidating lookups) every file root has been
    /// confirmed unchanged.
    Hit(Payload),
    /// No entry for this key.
    Miss,
    /// Entry existed but a file root has changed; the entry has been
    /// removed from the registry so subsequent lookups behave as Miss
    /// without paying the revalidation cost again.
    Invalidated,
}

impl LiveCache {
    pub fn new<S: Store + 'static>(store: S) -> Self {
        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
        Self::from_box_with_workspace(Box::new(store), base)
    }

    pub fn with_workspace<S: Store + 'static>(
        store: S,
        workspace_base: impl Into<PathBuf>,
    ) -> Self {
        Self::from_box_with_workspace(Box::new(store), workspace_base.into())
    }

    pub fn from_box(store: Box<dyn Store>) -> Self {
        let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
        Self::from_box_with_workspace(store, base)
    }

    pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
        // Rehydrate the in-memory registry from on-disk meta files so a
        // freshly constructed cache (e.g., a brand-new MCP server
        // process started by Claude Code on each `claude -p` invocation)
        // can serve previously-persisted entries instead of treating
        // every key as Miss. Without this, M1's cross-session cache
        // doesn't actually exist — every Run-2 lookup would miss,
        // overwrite the same payload file, and the cache would provide
        // zero savings between sessions.
        let mut reg = HashMap::new();
        if let Ok(items) = store.iter_meta() {
            for (key, meta) in items {
                let file_roots = meta
                    .file_roots
                    .into_iter()
                    .map(|f| FileRoot {
                        path: PathBuf::from(f.path),
                        expected_hash: f.expected_hash,
                    })
                    .collect();
                reg.insert(
                    key.0,
                    EntryMeta {
                        tool_kind: meta.tool_kind,
                        file_roots,
                        upstream_keys: meta.upstream_keys,
                    },
                );
            }
        }
        Self {
            store,
            registry: RwLock::new(reg),
            workspace_base,
        }
    }

    pub fn store(&self) -> &dyn Store {
        self.store.as_ref()
    }

    pub fn workspace_base(&self) -> &Path {
        &self.workspace_base
    }

    pub fn entry_count(&self) -> usize {
        self.registry
            .read()
            .unwrap_or_else(|e| e.into_inner())
            .len()
    }

    /// Bare lookup with no file revalidation. Used by tools whose output
    /// has no filesystem dependency (rare in M1 — even Bash depends on
    /// the cwd's contents in practice). Most callers want
    /// `lookup_revalidate`.
    pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
        let in_reg = self
            .registry
            .read()
            .unwrap_or_else(|e| e.into_inner())
            .contains_key(&key.0);
        match self.store.lookup(key)? {
            Some(p) => {
                // Backends like `RemoteStore` cannot pre-populate the
                // registry through `iter_meta` because there is no bulk
                // listing over the wire; the registry stays empty and
                // entries are discovered one round-trip at a time. Seed
                // the registry from the payload meta so subsequent
                // operations (invalidate_path, invalidate_upstream,
                // entry_count) see the entry without another round-trip.
                if !in_reg {
                    self.populate_registry_from_meta(key, &p);
                }
                Ok(LookupOutcome::Hit(p))
            }
            None => {
                if in_reg {
                    // Registry says we have it but the store does not —
                    // happens if the store was truncated externally
                    // between persist and lookup, or if a `_shared`
                    // entry the registry knows about was server-side
                    // invalidated. Drop the orphan so subsequent
                    // lookups return Miss directly.
                    self.registry
                        .write()
                        .unwrap_or_else(|e| e.into_inner())
                        .remove(&key.0);
                }
                Ok(LookupOutcome::Miss)
            }
        }
    }

    /// Lookup with revalidation: re-blake3 every recorded file root and
    /// require the hash to still match the value captured on persist. On
    /// any mismatch the entry is removed from the registry and `Invalidated`
    /// is returned so the caller knows to re-execute the real tool.
    ///
    /// This is the primary lookup path for `read`, `glob`, and `grep`
    /// tools whose output is a pure function of named file contents. Bash
    /// typically cannot use this path because the set of files Bash
    /// reads is not known a priori.
    pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
        // Snapshot the metadata under a read lock, drop the lock before
        // doing any I/O so we don't hold it across blake3 of large files,
        // then upgrade to a write lock only if invalidation is required.
        let cached_meta = {
            let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
            reg.get(&key.0).cloned()
        };

        // Fast path: registry already knows the roots. Revalidate them
        // first to avoid a round-trip to a slow store on a dirty entry.
        if let Some(meta) = &cached_meta {
            match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
                RevalidationOutcome::Ok => {}
                RevalidationOutcome::Invalidated => {
                    self.registry
                        .write()
                        .unwrap_or_else(|e| e.into_inner())
                        .remove(&key.0);
                    return Ok(LookupOutcome::Invalidated);
                }
            }
        }

        match self.store.lookup(key)? {
            Some(p) => {
                // If we did not have the entry in the registry, the
                // store's payload meta carries the file roots the
                // entry was persisted with. Revalidate against the
                // local filesystem before trusting it — this is the
                // cross-machine drift check that RemoteStore-backed
                // caches rely on, because the server only knows its
                // own filesystem and cannot detect that Bob's local
                // checkout has diverged from Alice's.
                if cached_meta.is_none() {
                    let local_roots: Vec<FileRoot> = p
                        .meta
                        .file_roots
                        .iter()
                        .map(|f| FileRoot {
                            path: PathBuf::from(&f.path),
                            expected_hash: f.expected_hash.clone(),
                        })
                        .collect();
                    match revalidate_file_roots(&self.workspace_base, &local_roots) {
                        RevalidationOutcome::Ok => {
                            self.populate_registry_from_meta(key, &p);
                        }
                        RevalidationOutcome::Invalidated => {
                            return Ok(LookupOutcome::Invalidated);
                        }
                    }
                }
                Ok(LookupOutcome::Hit(p))
            }
            None => {
                if cached_meta.is_some() {
                    self.registry
                        .write()
                        .unwrap_or_else(|e| e.into_inner())
                        .remove(&key.0);
                }
                Ok(LookupOutcome::Miss)
            }
        }
    }

    fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
        let file_roots = p
            .meta
            .file_roots
            .iter()
            .map(|f| FileRoot {
                path: PathBuf::from(&f.path),
                expected_hash: f.expected_hash.clone(),
            })
            .collect();
        self.registry
            .write()
            .unwrap_or_else(|e| e.into_inner())
            .insert(
                key.0.clone(),
                EntryMeta {
                    tool_kind: p.meta.tool_kind.clone(),
                    file_roots,
                    upstream_keys: p.meta.upstream_keys.clone(),
                },
            );
    }

    /// Record a fresh tool execution. Caller has already produced the
    /// formatted output bytes the model will see; we persist them under
    /// `key` and register the file roots for future revalidation.
    pub fn persist(
        &self,
        key: &Key,
        bytes: &[u8],
        tool_kind: &str,
        file_roots: Vec<FileRoot>,
    ) -> Result<(), CacheError> {
        self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
    }

    /// Persist an entry whose validity depends on the listed upstream
    /// cache keys. The proxy's LlmCall path uses this so a tool-cache
    /// invalidation can walk the upstream edge and drop dependent
    /// completions.
    pub fn persist_with_upstreams(
        &self,
        key: &Key,
        bytes: &[u8],
        tool_kind: &str,
        file_roots: Vec<FileRoot>,
        upstream_keys: Vec<Key>,
    ) -> Result<(), CacheError> {
        let serde_roots: Vec<FileRootSerde> = file_roots
            .iter()
            .map(|r| FileRootSerde {
                path: r.path.display().to_string(),
                expected_hash: r.expected_hash.clone(),
            })
            .collect();
        let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
        self.store.persist_with_upstreams(
            key,
            bytes,
            tool_kind,
            serde_roots,
            upstream_strings.clone(),
        )?;
        self.registry
            .write()
            .unwrap_or_else(|e| e.into_inner())
            .insert(
                key.0.clone(),
                EntryMeta {
                    tool_kind: tool_kind.to_string(),
                    file_roots,
                    upstream_keys: upstream_strings,
                },
            );
        Ok(())
    }

    /// Drop the registry entry for `key`. The store payload remains on
    /// disk (M1 is append-only; M2 adds eviction) but subsequent lookups
    /// will Miss because the registry is the authoritative gate.
    ///
    /// Used by `write` and `edit` MCP tools to invalidate every cached
    /// node whose path matches the written path; the verdant-mcp layer
    /// computes the affected key set and calls this for each.
    pub fn mark_dirty(&self, key: &Key) {
        self.registry
            .write()
            .unwrap_or_else(|e| e.into_inner())
            .remove(&key.0);
        // Best-effort store cleanup; if it fails (concurrent writer,
        // permissions glitch) we silently leave the bytes on disk
        // because the registry is the authoritative gate and a stale
        // payload that no entry references is harmless.
        let _ = self.store.remove(key);
    }

    /// Drop every cache entry that depends on `upstream_key` either
    /// directly (its `upstream_keys` includes that hex) or transitively
    /// (its dependency closure does). Returns the number of entries
    /// dropped.
    ///
    /// This is the cross-layer dirty propagation path that ties the
    /// M3 LlmCall cache to M1's tool cache: when a `read` entry is
    /// invalidated by `invalidate_path`, the proxy calls this with
    /// the read's key and every LlmCall whose prompt consumed that
    /// read drops out of the cache. Without this hop, an edited file
    /// would silently feed the model the old bytes via a stale cached
    /// completion.
    pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
        let mut reg = self.registry.write().unwrap_or_else(|e| e.into_inner());
        // Compute the transitive closure of dependents. Each iteration
        // collects entries whose declared upstream set intersects the
        // current dirty set; we repeat until no new keys are added.
        let mut dirty: std::collections::HashSet<String> =
            std::collections::HashSet::from([upstream_key.0.clone()]);
        loop {
            let before = dirty.len();
            for (k, meta) in reg.iter() {
                if dirty.contains(k) {
                    continue;
                }
                if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
                    dirty.insert(k.clone());
                }
            }
            if dirty.len() == before {
                break;
            }
        }
        // The originating key itself may or may not be in the registry;
        // we only count entries we actually drop.
        let mut dropped = 0;
        for k in &dirty {
            if k == &upstream_key.0 {
                continue;
            }
            if reg.remove(k).is_some() {
                dropped += 1;
                let _ = self.store.remove(&Key(k.clone()));
            }
        }
        dropped
    }

    /// Drop every registry entry whose recorded file roots include `path`.
    /// O(n) in the registry size; M1 keeps registries small (a few hundred
    /// entries per session) so this is fine. M2's persistent index will
    /// add a path → keys reverse map.
    pub fn invalidate_path(&self, path: &Path) -> usize {
        let target = match path.canonicalize() {
            Ok(p) => p,
            Err(_) => path.to_path_buf(),
        };
        // Compare a lowercased form so an edit recorded under one casing
        // still invalidates an entry recorded under another. On a
        // case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs` are
        // one file; a missed invalidation leaves a stale hit, while a
        // spurious extra invalidation only costs a recompute, so the
        // conservative lowercased comparison is applied unconditionally.
        let target_ci = lower_path(&target);
        let path_ci = lower_path(path);
        let to_drop: Vec<String> = {
            let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
            reg.iter()
                .filter_map(|(k, meta)| {
                    let touches = meta.file_roots.iter().any(|r| {
                        let resolved = resolve_root_path(&self.workspace_base, &r.path);
                        let resolved_ci = lower_path(&resolved);
                        match resolved.canonicalize() {
                            Ok(c) => lower_path(&c) == target_ci,
                            Err(_) => resolved_ci == path_ci || lower_path(&r.path) == path_ci,
                        }
                    });
                    if touches {
                        Some(k.clone())
                    } else {
                        None
                    }
                })
                .collect()
        };
        let n = to_drop.len();
        for k in to_drop {
            let key = Key(k);
            // Cascade up the dependency edge so any LlmCall whose
            // prompt consumed this tool result also drops.
            self.invalidate_upstream(&key);
            self.registry
                .write()
                .unwrap_or_else(|e| e.into_inner())
                .remove(&key.0);
            let _ = self.store.remove(&key);
        }
        n
    }

    pub fn known_kinds(&self) -> Vec<String> {
        let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
        let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
        kinds.sort();
        kinds.dedup();
        kinds
    }
}

/// Compute the blake3 hex digest of the file at `path`. Used both to
/// record `FileRoot::expected_hash` on persist and to revalidate on
/// lookup.
enum RevalidationOutcome {
    Ok,
    Invalidated,
}

fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
    for root in roots {
        let resolved = resolve_root_path(workspace_base, &root.path);
        let current = match hash_file(&resolved) {
            Ok(h) => h,
            Err(_) => return RevalidationOutcome::Invalidated,
        };
        if current != root.expected_hash {
            return RevalidationOutcome::Invalidated;
        }
    }
    RevalidationOutcome::Ok
}

/// Join a workspace base with a recorded `FileRoot::path`. On Unix,
/// `PathBuf::join` replaces the base when the argument is absolute,
/// so this also handles legacy entries persisted with absolute paths
/// (single-machine M4) without breaking. New entries persist
/// workspace-relative paths and resolve through the base; cross-user
/// `_shared` entries work because every consumer joins against its
/// own workspace.
fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
    workspace_base.join(recorded)
}

/// Lowercased lossy string form of a path, used as a case-insensitive
/// comparison key for path invalidation.
fn lower_path(p: &Path) -> String {
    p.to_string_lossy().to_lowercase()
}

/// Default ceiling for content hashing. A file larger than this is
/// reported `FileHash::Oversized` and is therefore uncacheable, because
/// (see `FileHash`) a size/mtime fingerprint is not a safe substitute
/// for a content hash.
const HASH_MAX_BYTES: u64 = 100 * 1024 * 1024;

/// The content-hash ceiling, read from `$VERDANT_HASH_MAX_BYTES` if set
/// and parseable, otherwise `HASH_MAX_BYTES`.
pub fn hash_max_bytes() -> u64 {
    std::env::var("VERDANT_HASH_MAX_BYTES")
        .ok()
        .and_then(|s| s.parse::<u64>().ok())
        .unwrap_or(HASH_MAX_BYTES)
}

/// Outcome of fingerprinting a file for cache keying.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileHash {
    /// blake3 content digest, hex-encoded.
    Content(String),
    /// File is larger than the content-hash ceiling. It carries no
    /// digest on purpose: a size/mtime fingerprint would collide two
    /// different files with equal size and mtime onto one key, so a
    /// tool whose output depends on an oversized file must not cache.
    Oversized,
}

impl FileHash {
    pub fn content(&self) -> Option<&str> {
        match self {
            FileHash::Content(h) => Some(h),
            FileHash::Oversized => None,
        }
    }
}

/// blake3 hex digest of the file at `path`, streamed so memory stays
/// bounded regardless of file size. Always content-hashes; callers that
/// must not cache oversized files use `hash_file_with_limit` instead.
pub fn hash_file(path: &Path) -> std::io::Result<String> {
    let mut hasher = blake3::Hasher::new();
    let mut f = std::fs::File::open(path)?;
    let mut buf = [0u8; 1 << 16];
    loop {
        let n = std::io::Read::read(&mut f, &mut buf)?;
        if n == 0 {
            break;
        }
        hasher.update(&buf[..n]);
    }
    Ok(hasher.finalize().to_hex().to_string())
}

/// Fingerprint `path` for cache keying. A file at or below `max` bytes is
/// content-hashed; a larger file is reported `Oversized` so the caller
/// declines to cache rather than keying on a collision-prone fingerprint.
pub fn hash_file_with_limit(path: &Path, max: u64) -> std::io::Result<FileHash> {
    if std::fs::metadata(path)?.len() > max {
        return Ok(FileHash::Oversized);
    }
    Ok(FileHash::Content(hash_file(path)?))
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    fn cache(dir: &TempDir) -> LiveCache {
        let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
        LiveCache::new(store)
    }

    fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
        let p = dir.path().join(name);
        std::fs::write(&p, content).unwrap();
        p
    }

    fn root_for(p: &Path) -> FileRoot {
        FileRoot {
            path: p.to_path_buf(),
            expected_hash: hash_file(p).unwrap(),
        }
    }

    #[test]
    fn miss_then_persist_then_hit() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "a.txt", b"alpha");
        let key = Key::from_bytes(b"read|a.txt|alpha");

        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);

        cache
            .persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
            .unwrap();

        match cache.lookup(&key).unwrap() {
            LookupOutcome::Hit(payload) => {
                assert_eq!(payload.bytes, b"alpha-formatted");
                assert_eq!(payload.meta.tool_kind, "read");
            }
            other => panic!("expected Hit, got {other:?}"),
        }
    }

    #[test]
    fn revalidate_unchanged_returns_hit() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "b.txt", b"beta");
        let key = Key::from_bytes(b"read|b.txt|beta");
        cache
            .persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
            .unwrap();
        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Hit(_) => {}
            other => panic!("expected Hit, got {other:?}"),
        }
    }

    #[test]
    fn revalidate_modified_invalidates() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "c.txt", b"charlie");
        let key = Key::from_bytes(b"read|c.txt|charlie");
        cache
            .persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
            .unwrap();

        std::fs::write(&p, b"DELTA").unwrap();

        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Invalidated => {}
            other => panic!("expected Invalidated, got {other:?}"),
        }
        assert_eq!(cache.entry_count(), 0);
    }

    #[test]
    fn revalidate_deleted_invalidates() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "d.txt", b"delta");
        let key = Key::from_bytes(b"read|d.txt|delta");
        cache
            .persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
            .unwrap();

        std::fs::remove_file(&p).unwrap();

        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Invalidated => {}
            other => panic!("expected Invalidated, got {other:?}"),
        }
    }

    #[test]
    fn mark_dirty_drops_entry() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "e.txt", b"echo");
        let key = Key::from_bytes(b"read|e.txt|echo");
        cache
            .persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
            .unwrap();
        assert_eq!(cache.entry_count(), 1);
        cache.mark_dirty(&key);
        assert_eq!(cache.entry_count(), 0);
        assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
    }

    #[test]
    fn invalidate_path_drops_matching_entries() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p1 = write_file(&dir, "f1.txt", b"foxtrot");
        let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
        let k1 = Key::from_bytes(b"read|f1");
        let k2 = Key::from_bytes(b"read|f2");
        cache
            .persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
            .unwrap();
        cache
            .persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
            .unwrap();
        assert_eq!(cache.entry_count(), 2);
        let n = cache.invalidate_path(&p1);
        assert_eq!(n, 1);
        assert_eq!(cache.entry_count(), 1);
        // k1 invalidated, k2 still present
        match cache.lookup(&k2).unwrap() {
            LookupOutcome::Hit(_) => {}
            other => panic!("k2 should still hit, got {other:?}"),
        }
        match cache.lookup(&k1).unwrap() {
            LookupOutcome::Miss => {}
            other => panic!("k1 should miss, got {other:?}"),
        }
    }

    #[test]
    fn invalidate_path_matches_case_insensitively() {
        // On a case-insensitive filesystem `Src/Foo.rs` and `src/foo.rs`
        // name the same file; a path edit reported under one casing must
        // still invalidate an entry whose file root was recorded under
        // another. A missed invalidation leaves a stale hit.
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "CaseFile.txt", b"contents");
        let key = Key::from_bytes(b"read|casefile");
        cache
            .persist(&key, b"formatted", "read", vec![root_for(&p)])
            .unwrap();
        assert_eq!(cache.entry_count(), 1);

        let differently_cased = dir.path().join("casefile.txt");
        let n = cache.invalidate_path(&differently_cased);
        assert_eq!(n, 1, "case-differing path must still invalidate the entry");
        assert_eq!(cache.entry_count(), 0);
    }

    #[test]
    fn multi_root_revalidation() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p1 = write_file(&dir, "g1.txt", b"golf1");
        let p2 = write_file(&dir, "g2.txt", b"golf2");
        let key = Key::from_bytes(b"grep|pattern|g1+g2");
        cache
            .persist(
                &key,
                b"merged-output",
                "grep",
                vec![root_for(&p1), root_for(&p2)],
            )
            .unwrap();

        // First revalidation: clean
        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Hit(_) => {}
            other => panic!("expected Hit, got {other:?}"),
        }
        // Modify only the second root: must invalidate
        std::fs::write(&p2, b"changed").unwrap();
        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Invalidated => {}
            other => panic!("expected Invalidated, got {other:?}"),
        }
    }

    #[test]
    fn upstream_invalidation_drops_dependents() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "src.txt", b"alpha");
        let read_key = Key::from_bytes(b"read|src");
        cache
            .persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
            .unwrap();
        // Two LlmCalls both depend on the read result.
        let llm1 = Key::from_bytes(b"llm|first-prompt");
        let llm2 = Key::from_bytes(b"llm|second-prompt");
        cache
            .persist_with_upstreams(
                &llm1,
                b"completion-1",
                "llm_call",
                vec![],
                vec![read_key.clone()],
            )
            .unwrap();
        cache
            .persist_with_upstreams(
                &llm2,
                b"completion-2",
                "llm_call",
                vec![],
                vec![read_key.clone()],
            )
            .unwrap();
        assert_eq!(cache.entry_count(), 3);

        // Invalidating the read key must cascade to both LlmCalls.
        let dropped = cache.invalidate_upstream(&read_key);
        assert_eq!(dropped, 2);
        assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
        assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
    }

    #[test]
    fn invalidate_path_cascades_to_dependent_llm_calls() {
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "input.txt", b"hello");
        let read_key = Key::from_bytes(b"read|input");
        cache
            .persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
            .unwrap();
        let llm = Key::from_bytes(b"llm|sees-read");
        cache
            .persist_with_upstreams(
                &llm,
                b"completion",
                "llm_call",
                vec![],
                vec![read_key.clone()],
            )
            .unwrap();
        assert_eq!(cache.entry_count(), 2);

        // Modify the file and invalidate by path.
        std::fs::write(&p, b"changed").unwrap();
        let n = cache.invalidate_path(&p);
        assert_eq!(n, 1, "the read entry was the direct path match");
        // The LlmCall must also be gone via the cascade.
        assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
        assert_eq!(cache.entry_count(), 0);
    }

    #[test]
    fn transitive_invalidation_walks_multi_hop_chain() {
        // A -> B -> C: invalidating A drops B and C.
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let key_a = Key::from_bytes(b"a");
        let key_b = Key::from_bytes(b"b");
        let key_c = Key::from_bytes(b"c");
        let p = write_file(&dir, "f.txt", b"x");
        cache
            .persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
            .unwrap();
        cache
            .persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
            .unwrap();
        cache
            .persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
            .unwrap();

        let dropped = cache.invalidate_upstream(&key_a);
        assert_eq!(dropped, 2);
        assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
        assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
    }

    #[test]
    fn upstream_keys_persist_across_rehydration() {
        let dir = TempDir::new().unwrap();
        let p = write_file(&dir, "g.txt", b"data");
        let read_key = Key::from_bytes(b"read|g");
        let llm_key = Key::from_bytes(b"llm|g-consumer");

        {
            let cache = cache(&dir);
            cache
                .persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
                .unwrap();
            cache
                .persist_with_upstreams(
                    &llm_key,
                    b"completion",
                    "llm_call",
                    vec![],
                    vec![read_key.clone()],
                )
                .unwrap();
        }

        // Fresh cache pointed at the same store: the upstream edge
        // must come back so a subsequent invalidation cascades.
        let store_root = dir.path().join("store");
        let store2 = crate::store::FileStore::open(store_root).unwrap();
        let cache2 = LiveCache::new(store2);
        assert_eq!(cache2.entry_count(), 2);
        let dropped = cache2.invalidate_upstream(&read_key);
        assert_eq!(dropped, 1, "rehydrated edge must support cascade");
    }

    #[test]
    fn fresh_cache_rehydrates_from_store_on_disk() {
        // M1's whole point: a process restart must not invalidate the
        // cache. Persist via one cache instance, drop it, build a fresh
        // cache pointed at the same store directory, and confirm the
        // entry is still served as a Hit.
        let dir = TempDir::new().unwrap();
        let p = write_file(&dir, "rehydrate.txt", b"persist me");
        let key = Key::from_bytes(b"read|rehydrate|persist me");

        {
            let cache = cache(&dir);
            cache
                .persist(&key, b"served-once", "read", vec![root_for(&p)])
                .unwrap();
            assert_eq!(cache.entry_count(), 1);
        } // drop cache; in-memory registry destroyed.

        let store_root = dir.path().join("store");
        let store2 = crate::store::FileStore::open(store_root).unwrap();
        let cache2 = LiveCache::new(store2);
        // Without rehydration this would be 0 and the next lookup would
        // miss, defeating the entire cross-session caching story.
        assert_eq!(cache2.entry_count(), 1);
        match cache2.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
            other => panic!("expected Hit after rehydrate, got {other:?}"),
        }
    }

    #[test]
    fn hit_returns_byte_identical_payload() {
        // Critical correctness test: cache must hand back the exact bytes
        // it persisted, not a re-formatted view. A divergence here would
        // silently corrupt the model's view of the world.
        let dir = TempDir::new().unwrap();
        let cache = cache(&dir);
        let p = write_file(&dir, "h.txt", b"hotel");
        let key = Key::from_bytes(b"read|h");
        let original = b"  1\thotel-formatted-with-line-numbers\n  2\tetc\n";
        cache
            .persist(&key, original, "read", vec![root_for(&p)])
            .unwrap();
        match cache.lookup_revalidate(&key).unwrap() {
            LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
            other => panic!("expected Hit, got {other:?}"),
        }
    }

    #[test]
    fn hash_file_with_limit_content_hashes_within_limit() {
        let dir = TempDir::new().unwrap();
        let p = write_file(&dir, "small.bin", b"comfortably within the limit");
        match hash_file_with_limit(&p, 1024).unwrap() {
            FileHash::Content(h) => assert_eq!(h, hash_file(&p).unwrap()),
            FileHash::Oversized => panic!("a file within the limit must content-hash"),
        }
    }

    #[test]
    fn hash_file_with_limit_reports_oversized_above_limit() {
        let dir = TempDir::new().unwrap();
        let p = write_file(&dir, "big.bin", &[7u8; 4096]);
        assert_eq!(hash_file_with_limit(&p, 64).unwrap(), FileHash::Oversized);
    }

    #[test]
    fn oversized_files_yield_no_keyable_digest() {
        // The removed metadata fallback hashed (path, size, mtime), so two
        // distinct oversized files with equal size and mtime collided onto
        // one key. FileHash::Oversized carries no digest, so distinct
        // oversized files cannot be keyed against each other at all.
        let dir = TempDir::new().unwrap();
        let a = write_file(&dir, "a.bin", &[1u8; 4096]);
        let b = write_file(&dir, "b.bin", &[2u8; 4096]);
        let ha = hash_file_with_limit(&a, 64).unwrap();
        let hb = hash_file_with_limit(&b, 64).unwrap();
        assert_eq!(ha, FileHash::Oversized);
        assert_eq!(hb, FileHash::Oversized);
        assert!(ha.content().is_none() && hb.content().is_none());
    }
}