use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
const MAX_CACHE_SIZE_BYTES: u64 = 50 * 1024 * 1024;
const MAX_CACHE_ENTRIES: usize = 50_000;
pub const CACHE_FILENAME: &str = ".build-cache.json";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BuildCacheEntry {
pub hash: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub deps: Vec<String>,
}
#[derive(Debug)]
pub struct BuildCache {
path: PathBuf,
entries: HashMap<String, BuildCacheEntry>,
}
impl BuildCache {
pub fn empty() -> Self {
Self {
path: PathBuf::new(),
entries: HashMap::new(),
}
}
pub fn load(content_root: &Path) -> Result<Self, String> {
let path = content_root.join(CACHE_FILENAME);
let entries = if path.exists() {
let meta =
std::fs::metadata(&path).map_err(|e| format!("failed to stat cache file: {e}"))?;
if meta.len() > MAX_CACHE_SIZE_BYTES {
eprintln!("build cache exceeds {MAX_CACHE_SIZE_BYTES} bytes, starting fresh");
HashMap::new()
} else {
let content = std::fs::read_to_string(&path)
.map_err(|e| format!("failed to read cache file: {e}"))?;
serde_json::from_str(&content).unwrap_or_default()
}
} else {
HashMap::new()
};
Ok(Self { path, entries })
}
pub fn is_unchanged(&self, path: &str, current_hash: &str) -> bool {
if let Some(entry) = self.entries.get(path) {
if entry.hash != current_hash {
return false;
}
for dep in &entry.deps {
if let Some(dep_entry) = self.entries.get(dep) {
let _ = dep_entry;
} else {
return false;
}
}
true
} else {
false
}
}
pub fn is_unchanged_with_hashes(
&self,
path: &str,
current_hash: &str,
current_hashes: &HashMap<String, String>,
) -> bool {
let Some(entry) = self.entries.get(path) else {
return false;
};
if entry.hash != current_hash {
return false;
}
for dep in &entry.deps {
let Some(dep_entry) = self.entries.get(dep) else {
return false;
};
if let Some(current_dep_hash) = current_hashes.get(dep) {
if dep_entry.hash != *current_dep_hash {
return false;
}
} else {
return false;
}
}
true
}
pub fn put(&mut self, path: &str, hash: String, deps: Vec<String>) {
if self.entries.len() >= MAX_CACHE_ENTRIES && !self.entries.contains_key(path) {
return;
}
self.entries
.insert(path.to_string(), BuildCacheEntry { hash, deps });
}
pub fn prune(&mut self, existing_files: &HashSet<String>) {
self.entries.retain(|k, _| existing_files.contains(k));
}
pub fn save(&self) -> Result<(), String> {
if self.path.as_os_str().is_empty() {
return Ok(());
}
let json = serde_json::to_string_pretty(&self.entries)
.map_err(|e| format!("failed to serialize build cache: {e}"))?;
std::fs::write(&self.path, json).map_err(|e| format!("failed to write build cache: {e}"))
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
pub fn hash_file(path: &Path) -> Result<String, String> {
let content =
std::fs::read(path).map_err(|e| format!("failed to read {}: {e}", path.display()))?;
Ok(hash_bytes(&content))
}
pub fn hash_bytes(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
let result = hasher.finalize();
hex_encode(&result)
}
fn hex_encode(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
use std::fmt::Write;
let _ = write!(s, "{b:02x}");
}
s
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hash_bytes_deterministic() {
let h1 = hash_bytes(b"hello world");
let h2 = hash_bytes(b"hello world");
assert_eq!(h1, h2);
assert_eq!(h1.len(), 64); }
#[test]
fn hash_bytes_different_input() {
let h1 = hash_bytes(b"hello");
let h2 = hash_bytes(b"world");
assert_ne!(h1, h2);
}
#[test]
fn cache_put_and_check() {
let mut cache = BuildCache::empty();
cache.put("cases/test.md", "abc123".to_string(), vec![]);
assert!(cache.is_unchanged("cases/test.md", "abc123"));
assert!(!cache.is_unchanged("cases/test.md", "different"));
}
#[test]
fn cache_missing_entry() {
let cache = BuildCache::empty();
assert!(!cache.is_unchanged("missing.md", "abc"));
}
#[test]
fn cache_with_deps() {
let mut cache = BuildCache::empty();
cache.put("people/test.md", "entity_hash".to_string(), vec![]);
cache.put(
"cases/test.md",
"case_hash".to_string(),
vec!["people/test.md".to_string()],
);
let mut hashes = HashMap::new();
hashes.insert("cases/test.md".to_string(), "case_hash".to_string());
hashes.insert("people/test.md".to_string(), "entity_hash".to_string());
assert!(cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
hashes.insert("people/test.md".to_string(), "changed".to_string());
assert!(!cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
}
#[test]
fn cache_prune() {
let mut cache = BuildCache::empty();
cache.put("keep.md", "h1".to_string(), vec![]);
cache.put("remove.md", "h2".to_string(), vec![]);
let existing: HashSet<String> = ["keep.md".to_string()].into();
cache.prune(&existing);
assert_eq!(cache.len(), 1);
assert!(cache.is_unchanged("keep.md", "h1"));
}
#[test]
fn cache_boundary_enforced() {
let mut cache = BuildCache::empty();
for i in 0..MAX_CACHE_ENTRIES {
cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
}
assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
cache.put("overflow.md", "hx".to_string(), vec![]);
assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
assert!(!cache.is_unchanged("overflow.md", "hx"));
}
#[test]
fn cache_update_existing_within_boundary() {
let mut cache = BuildCache::empty();
for i in 0..MAX_CACHE_ENTRIES {
cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
}
cache.put("file0.md", "updated".to_string(), vec![]);
assert!(cache.is_unchanged("file0.md", "updated"));
assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
}
}