use std::path::{Path, PathBuf};
use thiserror::Error;
use crate::manifest::{Manifest, PathType};
use crate::merkle::Hasher;
use crate::store::{manifest_path, object_path, OBJECTS_DIR};
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum CacheError {
#[error("manifest not found locally for {id}. Did you forget to fetch {id} from the store?")]
ManifestNotFound {
id: String,
},
#[error("object not found in cache: {checksum}")]
ObjectNotFound {
checksum: String,
},
#[error("checksum mismatch for {expected}: cached bytes hash to {actual}")]
Integrity {
expected: String,
actual: String,
},
#[error("failed to parse cached manifest: {0}")]
Parse(#[from] crate::manifest::ParseError),
#[error("cache I/O error: {0}")]
Io(#[from] std::io::Error),
}
pub fn load_cached_manifest(cache_dir: &Path, id: &str) -> Result<Manifest, CacheError> {
let path = cache_dir.join(manifest_path(id));
let text = match std::fs::read_to_string(&path) {
Ok(text) => text,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(CacheError::ManifestNotFound { id: id.to_owned() });
}
Err(err) => return Err(CacheError::Io(err)),
};
Ok(Manifest::parse(&text)?)
}
pub fn check_snapshot_integrity(
cache_dir: &Path,
id: &str,
hasher: &dyn Hasher,
) -> Result<(), CacheError> {
let manifest = load_cached_manifest(cache_dir, id)?;
check_manifest_integrity(cache_dir, &manifest, hasher)
}
pub fn check_manifest_integrity(
cache_dir: &Path,
manifest: &Manifest,
hasher: &dyn Hasher,
) -> Result<(), CacheError> {
for entry in manifest.entries() {
if entry.path_type == PathType::Directory {
continue;
}
let checksum = &entry.checksum;
let object = cache_dir.join(object_path(checksum));
let bytes = match std::fs::read(&object) {
Ok(bytes) => bytes,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(CacheError::ObjectNotFound {
checksum: checksum.clone(),
});
}
Err(err) => return Err(CacheError::Io(err)),
};
let actual = hasher.hash_hex(&bytes);
if &actual != checksum {
return Err(CacheError::Integrity {
expected: checksum.clone(),
actual,
});
}
}
Ok(())
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct CacheReport {
pub checked: usize,
pub corrupt: Vec<String>,
pub purged: Vec<String>,
}
impl CacheReport {
#[must_use]
pub fn is_clean(&self) -> bool {
self.corrupt.is_empty()
}
}
pub fn verify_cache(
cache_dir: &Path,
purge: bool,
hasher: &dyn Hasher,
) -> Result<CacheReport, CacheError> {
let objects_root = cache_dir.join(OBJECTS_DIR);
if !objects_root.is_dir() {
return Ok(CacheReport::default());
}
let mut report = CacheReport::default();
for path in collect_objects(&objects_root)? {
report.checked += 1;
let Some(expected) = expected_checksum_from_path(&objects_root, &path) else {
continue;
};
let bytes = std::fs::read(&path)?;
let actual = hasher.hash_hex(&bytes);
if actual != expected {
report.corrupt.push(expected.clone());
if purge {
std::fs::remove_file(&path)?;
report.purged.push(expected);
}
}
}
report.corrupt.sort();
report.purged.sort();
Ok(report)
}
fn collect_objects(objects_root: &Path) -> Result<Vec<PathBuf>, CacheError> {
let mut out = Vec::new();
for l0 in read_subdirs(objects_root)? {
for l1 in read_subdirs(&l0)? {
for l2 in read_subdirs(&l1)? {
for entry in std::fs::read_dir(&l2)? {
let path = entry?.path();
if path.is_file() {
out.push(path);
}
}
}
}
}
out.sort();
Ok(out)
}
fn read_subdirs(dir: &Path) -> Result<Vec<PathBuf>, CacheError> {
let mut out = Vec::new();
for entry in std::fs::read_dir(dir)? {
let path = entry?.path();
if path.is_dir() {
out.push(path);
}
}
Ok(out)
}
fn expected_checksum_from_path(objects_root: &Path, object: &Path) -> Option<String> {
let rel = object.strip_prefix(objects_root).ok()?;
let mut checksum = String::new();
for component in rel.components() {
checksum.push_str(component.as_os_str().to_str()?);
}
Some(checksum)
}
pub fn flush_cache(cache_dir: &Path) -> Result<(), CacheError> {
match std::fs::read_dir(cache_dir) {
Ok(entries) => {
for entry in entries {
let path = entry?.path();
if path.is_dir() {
std::fs::remove_dir_all(&path)?;
} else {
std::fs::remove_file(&path)?;
}
}
Ok(())
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(err) => Err(CacheError::Io(err)),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::manifest::ManifestEntry;
use crate::merkle::Blake3Hasher;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
struct Scratch {
path: PathBuf,
}
impl Scratch {
fn new() -> Self {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = std::process::id();
let base = std::env::temp_dir();
let path = base.join(format!("snapdir-cache-test-{pid}-{n}"));
fs::create_dir_all(&path).expect("create scratch dir");
Scratch { path }
}
fn path(&self) -> &Path {
&self.path
}
}
impl Drop for Scratch {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.path);
}
}
fn put_object(cache_dir: &Path, bytes: &[u8]) -> String {
let checksum = Blake3Hasher.hash_hex(bytes);
let path = cache_dir.join(object_path(&checksum));
fs::create_dir_all(path.parent().unwrap()).unwrap();
fs::write(&path, bytes).unwrap();
checksum
}
fn put_manifest(cache_dir: &Path, id: &str, manifest: &Manifest) {
let path = cache_dir.join(manifest_path(id));
fs::create_dir_all(path.parent().unwrap()).unwrap();
fs::write(&path, format!("{manifest}")).unwrap();
}
fn build_clean_cache(cache_dir: &Path) -> (String, String, String) {
let foo = b"foo\n";
let bar = b"bar\n";
let foo_sum = put_object(cache_dir, foo);
let bar_sum = put_object(cache_dir, bar);
let mut manifest = Manifest::new();
manifest.push(ManifestEntry::new(
PathType::Directory,
"700",
"rootsum",
0,
"./",
));
manifest.push(ManifestEntry::new(
PathType::File,
"600",
&foo_sum,
foo.len() as u64,
"./foo",
));
manifest.push(ManifestEntry::new(
PathType::File,
"600",
&bar_sum,
bar.len() as u64,
"./bar",
));
let id = "cafef00dcafef00dcafef00dcafef00dcafef00dcafef00dcafef00dcafef00d".to_string();
put_manifest(cache_dir, &id, &manifest);
(id, foo_sum, bar_sum)
}
#[test]
fn cache_clean_passes_integrity_and_verify() {
let tmp = Scratch::new();
let (id, _foo, _bar) = build_clean_cache(tmp.path());
check_snapshot_integrity(tmp.path(), &id, &Blake3Hasher).expect("clean cache passes");
let report = verify_cache(tmp.path(), false, &Blake3Hasher).unwrap();
assert_eq!(report.checked, 2, "two objects scanned");
assert!(report.is_clean(), "no corruption: {report:?}");
assert!(report.purged.is_empty());
}
#[test]
fn cache_tampered_object_detected_by_both_checks() {
let tmp = Scratch::new();
let (id, foo_sum, _bar) = build_clean_cache(tmp.path());
let foo_path = tmp.path().join(object_path(&foo_sum));
fs::write(&foo_path, b"TAMPERED").unwrap();
match check_snapshot_integrity(tmp.path(), &id, &Blake3Hasher) {
Err(CacheError::Integrity { expected, .. }) => assert_eq!(expected, foo_sum),
other => panic!("expected Integrity error, got {other:?}"),
}
let report = verify_cache(tmp.path(), false, &Blake3Hasher).unwrap();
assert_eq!(report.checked, 2);
assert_eq!(report.corrupt, vec![foo_sum.clone()]);
assert!(report.purged.is_empty(), "no purge without flag");
assert!(!report.is_clean());
assert!(foo_path.exists());
}
#[test]
fn cache_purge_removes_only_corrupt_object() {
let tmp = Scratch::new();
let (_id, foo_sum, bar_sum) = build_clean_cache(tmp.path());
let foo_path = tmp.path().join(object_path(&foo_sum));
let bar_path = tmp.path().join(object_path(&bar_sum));
fs::write(&foo_path, b"TAMPERED").unwrap();
let report = verify_cache(tmp.path(), true, &Blake3Hasher).unwrap();
assert_eq!(report.checked, 2);
assert_eq!(report.corrupt, vec![foo_sum.clone()]);
assert_eq!(report.purged, vec![foo_sum]);
assert!(!foo_path.exists(), "corrupt object purged");
assert!(bar_path.exists(), "clean object kept");
let rescan = verify_cache(tmp.path(), false, &Blake3Hasher).unwrap();
assert_eq!(rescan.checked, 1);
assert!(rescan.is_clean());
}
#[test]
fn cache_missing_manifest_yields_not_found() {
let tmp = Scratch::new();
let id = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
match check_snapshot_integrity(tmp.path(), id, &Blake3Hasher) {
Err(CacheError::ManifestNotFound { id: got }) => assert_eq!(got, id),
other => panic!("expected ManifestNotFound, got {other:?}"),
}
}
#[test]
fn cache_missing_object_yields_not_found() {
let tmp = Scratch::new();
let (id, foo_sum, _bar) = build_clean_cache(tmp.path());
fs::remove_file(tmp.path().join(object_path(&foo_sum))).unwrap();
match check_snapshot_integrity(tmp.path(), &id, &Blake3Hasher) {
Err(CacheError::ObjectNotFound { checksum }) => assert_eq!(checksum, foo_sum),
other => panic!("expected ObjectNotFound, got {other:?}"),
}
}
#[test]
fn cache_directory_lines_excluded_from_integrity() {
let tmp = Scratch::new();
let mut manifest = Manifest::new();
manifest.push(ManifestEntry::new(
PathType::Directory,
"700",
"deadbeef",
0,
"./",
));
let id = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
put_manifest(tmp.path(), id, &manifest);
check_snapshot_integrity(tmp.path(), id, &Blake3Hasher)
.expect("directory-only manifest passes");
}
#[test]
fn cache_empty_or_absent_objects_dir_is_clean_pass() {
let tmp = Scratch::new();
let report = verify_cache(tmp.path(), false, &Blake3Hasher).unwrap();
assert_eq!(report, CacheReport::default());
assert!(report.is_clean());
assert_eq!(report.checked, 0);
fs::create_dir_all(tmp.path().join(OBJECTS_DIR)).unwrap();
let report = verify_cache(tmp.path(), false, &Blake3Hasher).unwrap();
assert_eq!(report.checked, 0);
assert!(report.is_clean());
}
#[test]
fn cache_verify_reconstructs_expected_checksum_from_path() {
let tmp = Scratch::new();
let checksum = put_object(tmp.path(), b"hello cache\n");
let objects_root = tmp.path().join(OBJECTS_DIR);
let object = tmp.path().join(object_path(&checksum));
let got = expected_checksum_from_path(&objects_root, &object).unwrap();
assert_eq!(got, checksum);
}
#[test]
fn cache_flush_empties_objects_and_manifests() {
let tmp = Scratch::new();
let (_id, _foo, _bar) = build_clean_cache(tmp.path());
assert!(tmp.path().join(OBJECTS_DIR).exists());
assert!(tmp.path().join(MANIFESTS_DIR_TEST).exists());
flush_cache(tmp.path()).expect("flush succeeds");
assert!(!tmp.path().join(OBJECTS_DIR).exists());
assert!(!tmp.path().join(MANIFESTS_DIR_TEST).exists());
assert!(tmp.path().is_dir());
assert_eq!(fs::read_dir(tmp.path()).unwrap().count(), 0);
}
#[test]
fn cache_flush_is_idempotent_on_missing_dir() {
let tmp = Scratch::new();
let missing = tmp.path().join("does-not-exist");
flush_cache(&missing).expect("flush on missing dir is a no-op");
}
const MANIFESTS_DIR_TEST: &str = ".manifests";
}