use crate::store::{FileRootSerde, Key, Payload, Store, StoreError};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::RwLock;
#[derive(Debug, thiserror::Error)]
pub enum CacheError {
#[error("store: {0}")]
Store(#[from] StoreError),
#[error("io: {0}")]
Io(#[from] std::io::Error),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileRoot {
pub path: PathBuf,
pub expected_hash: String,
}
#[derive(Debug, Clone)]
struct EntryMeta {
tool_kind: String,
file_roots: Vec<FileRoot>,
upstream_keys: Vec<String>,
}
pub struct LiveCache {
store: Box<dyn Store>,
registry: RwLock<HashMap<String, EntryMeta>>,
workspace_base: PathBuf,
}
#[derive(Debug, Clone, PartialEq)]
pub enum LookupOutcome {
Hit(Payload),
Miss,
Invalidated,
}
impl LiveCache {
pub fn new<S: Store + 'static>(store: S) -> Self {
let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
Self::from_box_with_workspace(Box::new(store), base)
}
pub fn with_workspace<S: Store + 'static>(
store: S,
workspace_base: impl Into<PathBuf>,
) -> Self {
Self::from_box_with_workspace(Box::new(store), workspace_base.into())
}
pub fn from_box(store: Box<dyn Store>) -> Self {
let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
Self::from_box_with_workspace(store, base)
}
pub fn from_box_with_workspace(store: Box<dyn Store>, workspace_base: PathBuf) -> Self {
let mut reg = HashMap::new();
if let Ok(items) = store.iter_meta() {
for (key, meta) in items {
let file_roots = meta
.file_roots
.into_iter()
.map(|f| FileRoot {
path: PathBuf::from(f.path),
expected_hash: f.expected_hash,
})
.collect();
reg.insert(
key.0,
EntryMeta {
tool_kind: meta.tool_kind,
file_roots,
upstream_keys: meta.upstream_keys,
},
);
}
}
Self {
store,
registry: RwLock::new(reg),
workspace_base,
}
}
pub fn store(&self) -> &dyn Store {
self.store.as_ref()
}
pub fn workspace_base(&self) -> &Path {
&self.workspace_base
}
pub fn entry_count(&self) -> usize {
self.registry
.read()
.unwrap_or_else(|e| e.into_inner())
.len()
}
pub fn lookup(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
let in_reg = self
.registry
.read()
.unwrap_or_else(|e| e.into_inner())
.contains_key(&key.0);
match self.store.lookup(key)? {
Some(p) => {
if !in_reg {
self.populate_registry_from_meta(key, &p);
}
Ok(LookupOutcome::Hit(p))
}
None => {
if in_reg {
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.remove(&key.0);
}
Ok(LookupOutcome::Miss)
}
}
}
pub fn lookup_revalidate(&self, key: &Key) -> Result<LookupOutcome, CacheError> {
let cached_meta = {
let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
reg.get(&key.0).cloned()
};
if let Some(meta) = &cached_meta {
match revalidate_file_roots(&self.workspace_base, &meta.file_roots) {
RevalidationOutcome::Ok => {}
RevalidationOutcome::Invalidated => {
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.remove(&key.0);
return Ok(LookupOutcome::Invalidated);
}
}
}
match self.store.lookup(key)? {
Some(p) => {
if cached_meta.is_none() {
let local_roots: Vec<FileRoot> = p
.meta
.file_roots
.iter()
.map(|f| FileRoot {
path: PathBuf::from(&f.path),
expected_hash: f.expected_hash.clone(),
})
.collect();
match revalidate_file_roots(&self.workspace_base, &local_roots) {
RevalidationOutcome::Ok => {
self.populate_registry_from_meta(key, &p);
}
RevalidationOutcome::Invalidated => {
return Ok(LookupOutcome::Invalidated);
}
}
}
Ok(LookupOutcome::Hit(p))
}
None => {
if cached_meta.is_some() {
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.remove(&key.0);
}
Ok(LookupOutcome::Miss)
}
}
}
fn populate_registry_from_meta(&self, key: &Key, p: &Payload) {
let file_roots = p
.meta
.file_roots
.iter()
.map(|f| FileRoot {
path: PathBuf::from(&f.path),
expected_hash: f.expected_hash.clone(),
})
.collect();
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.insert(
key.0.clone(),
EntryMeta {
tool_kind: p.meta.tool_kind.clone(),
file_roots,
upstream_keys: p.meta.upstream_keys.clone(),
},
);
}
pub fn persist(
&self,
key: &Key,
bytes: &[u8],
tool_kind: &str,
file_roots: Vec<FileRoot>,
) -> Result<(), CacheError> {
self.persist_with_upstreams(key, bytes, tool_kind, file_roots, Vec::new())
}
pub fn persist_with_upstreams(
&self,
key: &Key,
bytes: &[u8],
tool_kind: &str,
file_roots: Vec<FileRoot>,
upstream_keys: Vec<Key>,
) -> Result<(), CacheError> {
let serde_roots: Vec<FileRootSerde> = file_roots
.iter()
.map(|r| FileRootSerde {
path: r.path.display().to_string(),
expected_hash: r.expected_hash.clone(),
})
.collect();
let upstream_strings: Vec<String> = upstream_keys.iter().map(|k| k.0.clone()).collect();
self.store.persist_with_upstreams(
key,
bytes,
tool_kind,
serde_roots,
upstream_strings.clone(),
)?;
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.insert(
key.0.clone(),
EntryMeta {
tool_kind: tool_kind.to_string(),
file_roots,
upstream_keys: upstream_strings,
},
);
Ok(())
}
pub fn mark_dirty(&self, key: &Key) {
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.remove(&key.0);
let _ = self.store.remove(key);
}
pub fn invalidate_upstream(&self, upstream_key: &Key) -> usize {
let mut reg = self.registry.write().unwrap_or_else(|e| e.into_inner());
let mut dirty: std::collections::HashSet<String> =
std::collections::HashSet::from([upstream_key.0.clone()]);
loop {
let before = dirty.len();
for (k, meta) in reg.iter() {
if dirty.contains(k) {
continue;
}
if meta.upstream_keys.iter().any(|u| dirty.contains(u)) {
dirty.insert(k.clone());
}
}
if dirty.len() == before {
break;
}
}
let mut dropped = 0;
for k in &dirty {
if k == &upstream_key.0 {
continue;
}
if reg.remove(k).is_some() {
dropped += 1;
let _ = self.store.remove(&Key(k.clone()));
}
}
dropped
}
pub fn invalidate_path(&self, path: &Path) -> usize {
let target = match path.canonicalize() {
Ok(p) => p,
Err(_) => path.to_path_buf(),
};
let target_ci = lower_path(&target);
let path_ci = lower_path(path);
let to_drop: Vec<String> = {
let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
reg.iter()
.filter_map(|(k, meta)| {
let touches = meta.file_roots.iter().any(|r| {
let resolved = resolve_root_path(&self.workspace_base, &r.path);
let resolved_ci = lower_path(&resolved);
match resolved.canonicalize() {
Ok(c) => lower_path(&c) == target_ci,
Err(_) => resolved_ci == path_ci || lower_path(&r.path) == path_ci,
}
});
if touches {
Some(k.clone())
} else {
None
}
})
.collect()
};
let n = to_drop.len();
for k in to_drop {
let key = Key(k);
self.invalidate_upstream(&key);
self.registry
.write()
.unwrap_or_else(|e| e.into_inner())
.remove(&key.0);
let _ = self.store.remove(&key);
}
n
}
pub fn known_kinds(&self) -> Vec<String> {
let reg = self.registry.read().unwrap_or_else(|e| e.into_inner());
let mut kinds: Vec<String> = reg.values().map(|m| m.tool_kind.clone()).collect();
kinds.sort();
kinds.dedup();
kinds
}
}
enum RevalidationOutcome {
Ok,
Invalidated,
}
fn revalidate_file_roots(workspace_base: &Path, roots: &[FileRoot]) -> RevalidationOutcome {
for root in roots {
let resolved = resolve_root_path(workspace_base, &root.path);
let current = match hash_file(&resolved) {
Ok(h) => h,
Err(_) => return RevalidationOutcome::Invalidated,
};
if current != root.expected_hash {
return RevalidationOutcome::Invalidated;
}
}
RevalidationOutcome::Ok
}
fn resolve_root_path(workspace_base: &Path, recorded: &Path) -> PathBuf {
workspace_base.join(recorded)
}
fn lower_path(p: &Path) -> String {
p.to_string_lossy().to_lowercase()
}
const HASH_MAX_BYTES: u64 = 100 * 1024 * 1024;
pub fn hash_max_bytes() -> u64 {
std::env::var("VERDANT_HASH_MAX_BYTES")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(HASH_MAX_BYTES)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FileHash {
Content(String),
Oversized,
}
impl FileHash {
pub fn content(&self) -> Option<&str> {
match self {
FileHash::Content(h) => Some(h),
FileHash::Oversized => None,
}
}
}
pub fn hash_file(path: &Path) -> std::io::Result<String> {
let mut hasher = blake3::Hasher::new();
let mut f = std::fs::File::open(path)?;
let mut buf = [0u8; 1 << 16];
loop {
let n = std::io::Read::read(&mut f, &mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(hasher.finalize().to_hex().to_string())
}
pub fn hash_file_with_limit(path: &Path, max: u64) -> std::io::Result<FileHash> {
if std::fs::metadata(path)?.len() > max {
return Ok(FileHash::Oversized);
}
Ok(FileHash::Content(hash_file(path)?))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn cache(dir: &TempDir) -> LiveCache {
let store = crate::store::FileStore::open(dir.path().join("store")).unwrap();
LiveCache::new(store)
}
fn write_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
let p = dir.path().join(name);
std::fs::write(&p, content).unwrap();
p
}
fn root_for(p: &Path) -> FileRoot {
FileRoot {
path: p.to_path_buf(),
expected_hash: hash_file(p).unwrap(),
}
}
#[test]
fn miss_then_persist_then_hit() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "a.txt", b"alpha");
let key = Key::from_bytes(b"read|a.txt|alpha");
assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
cache
.persist(&key, b"alpha-formatted", "read", vec![root_for(&p)])
.unwrap();
match cache.lookup(&key).unwrap() {
LookupOutcome::Hit(payload) => {
assert_eq!(payload.bytes, b"alpha-formatted");
assert_eq!(payload.meta.tool_kind, "read");
}
other => panic!("expected Hit, got {other:?}"),
}
}
#[test]
fn revalidate_unchanged_returns_hit() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "b.txt", b"beta");
let key = Key::from_bytes(b"read|b.txt|beta");
cache
.persist(&key, b"beta-formatted", "read", vec![root_for(&p)])
.unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Hit(_) => {}
other => panic!("expected Hit, got {other:?}"),
}
}
#[test]
fn revalidate_modified_invalidates() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "c.txt", b"charlie");
let key = Key::from_bytes(b"read|c.txt|charlie");
cache
.persist(&key, b"charlie-formatted", "read", vec![root_for(&p)])
.unwrap();
std::fs::write(&p, b"DELTA").unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Invalidated => {}
other => panic!("expected Invalidated, got {other:?}"),
}
assert_eq!(cache.entry_count(), 0);
}
#[test]
fn revalidate_deleted_invalidates() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "d.txt", b"delta");
let key = Key::from_bytes(b"read|d.txt|delta");
cache
.persist(&key, b"delta-formatted", "read", vec![root_for(&p)])
.unwrap();
std::fs::remove_file(&p).unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Invalidated => {}
other => panic!("expected Invalidated, got {other:?}"),
}
}
#[test]
fn mark_dirty_drops_entry() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "e.txt", b"echo");
let key = Key::from_bytes(b"read|e.txt|echo");
cache
.persist(&key, b"echo-formatted", "read", vec![root_for(&p)])
.unwrap();
assert_eq!(cache.entry_count(), 1);
cache.mark_dirty(&key);
assert_eq!(cache.entry_count(), 0);
assert_eq!(cache.lookup(&key).unwrap(), LookupOutcome::Miss);
}
#[test]
fn invalidate_path_drops_matching_entries() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p1 = write_file(&dir, "f1.txt", b"foxtrot");
let p2 = write_file(&dir, "f2.txt", b"foxtrot2");
let k1 = Key::from_bytes(b"read|f1");
let k2 = Key::from_bytes(b"read|f2");
cache
.persist(&k1, b"f1-out", "read", vec![root_for(&p1)])
.unwrap();
cache
.persist(&k2, b"f2-out", "read", vec![root_for(&p2)])
.unwrap();
assert_eq!(cache.entry_count(), 2);
let n = cache.invalidate_path(&p1);
assert_eq!(n, 1);
assert_eq!(cache.entry_count(), 1);
match cache.lookup(&k2).unwrap() {
LookupOutcome::Hit(_) => {}
other => panic!("k2 should still hit, got {other:?}"),
}
match cache.lookup(&k1).unwrap() {
LookupOutcome::Miss => {}
other => panic!("k1 should miss, got {other:?}"),
}
}
#[test]
fn invalidate_path_matches_case_insensitively() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "CaseFile.txt", b"contents");
let key = Key::from_bytes(b"read|casefile");
cache
.persist(&key, b"formatted", "read", vec![root_for(&p)])
.unwrap();
assert_eq!(cache.entry_count(), 1);
let differently_cased = dir.path().join("casefile.txt");
let n = cache.invalidate_path(&differently_cased);
assert_eq!(n, 1, "case-differing path must still invalidate the entry");
assert_eq!(cache.entry_count(), 0);
}
#[test]
fn multi_root_revalidation() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p1 = write_file(&dir, "g1.txt", b"golf1");
let p2 = write_file(&dir, "g2.txt", b"golf2");
let key = Key::from_bytes(b"grep|pattern|g1+g2");
cache
.persist(
&key,
b"merged-output",
"grep",
vec![root_for(&p1), root_for(&p2)],
)
.unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Hit(_) => {}
other => panic!("expected Hit, got {other:?}"),
}
std::fs::write(&p2, b"changed").unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Invalidated => {}
other => panic!("expected Invalidated, got {other:?}"),
}
}
#[test]
fn upstream_invalidation_drops_dependents() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "src.txt", b"alpha");
let read_key = Key::from_bytes(b"read|src");
cache
.persist(&read_key, b"alpha-formatted", "read", vec![root_for(&p)])
.unwrap();
let llm1 = Key::from_bytes(b"llm|first-prompt");
let llm2 = Key::from_bytes(b"llm|second-prompt");
cache
.persist_with_upstreams(
&llm1,
b"completion-1",
"llm_call",
vec![],
vec![read_key.clone()],
)
.unwrap();
cache
.persist_with_upstreams(
&llm2,
b"completion-2",
"llm_call",
vec![],
vec![read_key.clone()],
)
.unwrap();
assert_eq!(cache.entry_count(), 3);
let dropped = cache.invalidate_upstream(&read_key);
assert_eq!(dropped, 2);
assert_eq!(cache.lookup(&llm1).unwrap(), LookupOutcome::Miss);
assert_eq!(cache.lookup(&llm2).unwrap(), LookupOutcome::Miss);
}
#[test]
fn invalidate_path_cascades_to_dependent_llm_calls() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "input.txt", b"hello");
let read_key = Key::from_bytes(b"read|input");
cache
.persist(&read_key, b"hello-formatted", "read", vec![root_for(&p)])
.unwrap();
let llm = Key::from_bytes(b"llm|sees-read");
cache
.persist_with_upstreams(
&llm,
b"completion",
"llm_call",
vec![],
vec![read_key.clone()],
)
.unwrap();
assert_eq!(cache.entry_count(), 2);
std::fs::write(&p, b"changed").unwrap();
let n = cache.invalidate_path(&p);
assert_eq!(n, 1, "the read entry was the direct path match");
assert_eq!(cache.lookup(&llm).unwrap(), LookupOutcome::Miss);
assert_eq!(cache.entry_count(), 0);
}
#[test]
fn transitive_invalidation_walks_multi_hop_chain() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let key_a = Key::from_bytes(b"a");
let key_b = Key::from_bytes(b"b");
let key_c = Key::from_bytes(b"c");
let p = write_file(&dir, "f.txt", b"x");
cache
.persist(&key_a, b"a-bytes", "read", vec![root_for(&p)])
.unwrap();
cache
.persist_with_upstreams(&key_b, b"b-bytes", "llm_call", vec![], vec![key_a.clone()])
.unwrap();
cache
.persist_with_upstreams(&key_c, b"c-bytes", "llm_call", vec![], vec![key_b.clone()])
.unwrap();
let dropped = cache.invalidate_upstream(&key_a);
assert_eq!(dropped, 2);
assert_eq!(cache.lookup(&key_b).unwrap(), LookupOutcome::Miss);
assert_eq!(cache.lookup(&key_c).unwrap(), LookupOutcome::Miss);
}
#[test]
fn upstream_keys_persist_across_rehydration() {
let dir = TempDir::new().unwrap();
let p = write_file(&dir, "g.txt", b"data");
let read_key = Key::from_bytes(b"read|g");
let llm_key = Key::from_bytes(b"llm|g-consumer");
{
let cache = cache(&dir);
cache
.persist(&read_key, b"data-formatted", "read", vec![root_for(&p)])
.unwrap();
cache
.persist_with_upstreams(
&llm_key,
b"completion",
"llm_call",
vec![],
vec![read_key.clone()],
)
.unwrap();
}
let store_root = dir.path().join("store");
let store2 = crate::store::FileStore::open(store_root).unwrap();
let cache2 = LiveCache::new(store2);
assert_eq!(cache2.entry_count(), 2);
let dropped = cache2.invalidate_upstream(&read_key);
assert_eq!(dropped, 1, "rehydrated edge must support cascade");
}
#[test]
fn fresh_cache_rehydrates_from_store_on_disk() {
let dir = TempDir::new().unwrap();
let p = write_file(&dir, "rehydrate.txt", b"persist me");
let key = Key::from_bytes(b"read|rehydrate|persist me");
{
let cache = cache(&dir);
cache
.persist(&key, b"served-once", "read", vec![root_for(&p)])
.unwrap();
assert_eq!(cache.entry_count(), 1);
}
let store_root = dir.path().join("store");
let store2 = crate::store::FileStore::open(store_root).unwrap();
let cache2 = LiveCache::new(store2);
assert_eq!(cache2.entry_count(), 1);
match cache2.lookup_revalidate(&key).unwrap() {
LookupOutcome::Hit(payload) => assert_eq!(payload.bytes, b"served-once"),
other => panic!("expected Hit after rehydrate, got {other:?}"),
}
}
#[test]
fn hit_returns_byte_identical_payload() {
let dir = TempDir::new().unwrap();
let cache = cache(&dir);
let p = write_file(&dir, "h.txt", b"hotel");
let key = Key::from_bytes(b"read|h");
let original = b" 1\thotel-formatted-with-line-numbers\n 2\tetc\n";
cache
.persist(&key, original, "read", vec![root_for(&p)])
.unwrap();
match cache.lookup_revalidate(&key).unwrap() {
LookupOutcome::Hit(p) => assert_eq!(p.bytes, original),
other => panic!("expected Hit, got {other:?}"),
}
}
#[test]
fn hash_file_with_limit_content_hashes_within_limit() {
let dir = TempDir::new().unwrap();
let p = write_file(&dir, "small.bin", b"comfortably within the limit");
match hash_file_with_limit(&p, 1024).unwrap() {
FileHash::Content(h) => assert_eq!(h, hash_file(&p).unwrap()),
FileHash::Oversized => panic!("a file within the limit must content-hash"),
}
}
#[test]
fn hash_file_with_limit_reports_oversized_above_limit() {
let dir = TempDir::new().unwrap();
let p = write_file(&dir, "big.bin", &[7u8; 4096]);
assert_eq!(hash_file_with_limit(&p, 64).unwrap(), FileHash::Oversized);
}
#[test]
fn oversized_files_yield_no_keyable_digest() {
let dir = TempDir::new().unwrap();
let a = write_file(&dir, "a.bin", &[1u8; 4096]);
let b = write_file(&dir, "b.bin", &[2u8; 4096]);
let ha = hash_file_with_limit(&a, 64).unwrap();
let hb = hash_file_with_limit(&b, 64).unwrap();
assert_eq!(ha, FileHash::Oversized);
assert_eq!(hb, FileHash::Oversized);
assert!(ha.content().is_none() && hb.content().is_none());
}
}