use std::collections::BTreeMap;
use std::path::Path;
use haz_vfs::{EntryKind, Filesystem, FsError};
use snafu::{ResultExt, Snafu};
use crate::layout;
use crate::manifest::{HashFunctionLabel, Manifest};
use crate::reader::CacheReader;
#[derive(Debug, Snafu)]
pub enum CacheInfoError {
#[snafu(display("filesystem error during cache info walk: {source}"))]
Io {
source: FsError,
},
}
pub type SchemaPrefix = (u8, HashFunctionLabel);
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct CacheInfoReport {
pub well_formed_entries: u64,
pub corrupt_entries: u64,
pub orphan_tmp_dirs: u64,
pub orphan_restore_dirs: u64,
pub total_bytes: u64,
pub by_schema: BTreeMap<SchemaPrefix, u64>,
}
impl<Fs: Filesystem> CacheReader<Fs> {
pub fn info(&self) -> Result<CacheInfoReport, CacheInfoError> {
let mut report = CacheInfoReport::default();
let cache_entries = match self.fs().read_dir(self.cache_root()) {
Ok(es) => es,
Err(FsError::NotFound { .. }) => return Ok(report),
Err(e) => return Err(CacheInfoError::Io { source: e }),
};
for entry in cache_entries {
let name = entry
.path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_default();
if name.starts_with(".restore-") {
report.orphan_restore_dirs += 1;
report.total_bytes = report
.total_bytes
.saturating_add(self.sum_recursive(&entry.path)?);
continue;
}
match entry.metadata.kind {
EntryKind::Dir => self.walk_shard(&entry.path, &mut report)?,
EntryKind::File => {
report.total_bytes = report.total_bytes.saturating_add(entry.metadata.size);
}
_ => {}
}
}
Ok(report)
}
fn walk_shard(
&self,
shard_dir: &Path,
report: &mut CacheInfoReport,
) -> Result<(), CacheInfoError> {
let shard_entries = self.fs().read_dir(shard_dir).context(IoSnafu)?;
for shard_entry in shard_entries {
let sname = shard_entry
.path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_default();
if sname.starts_with(".tmp-") {
report.orphan_tmp_dirs += 1;
report.total_bytes = report
.total_bytes
.saturating_add(self.sum_recursive(&shard_entry.path)?);
continue;
}
match shard_entry.metadata.kind {
EntryKind::Dir => self.classify_entry(&shard_entry.path, report)?,
EntryKind::File => {
report.total_bytes =
report.total_bytes.saturating_add(shard_entry.metadata.size);
}
_ => {}
}
}
Ok(())
}
fn classify_entry(
&self,
entry_dir: &Path,
report: &mut CacheInfoReport,
) -> Result<(), CacheInfoError> {
report.total_bytes = report
.total_bytes
.saturating_add(self.sum_recursive(entry_dir)?);
let manifest_path = entry_dir.join(layout::MANIFEST_FILE_NAME);
let bytes = match self.fs().read(&manifest_path) {
Ok(b) => b,
Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => {
report.corrupt_entries += 1;
return Ok(());
}
Err(e) => return Err(CacheInfoError::Io { source: e }),
};
let Ok(manifest) = Manifest::from_json(&bytes) else {
report.corrupt_entries += 1;
return Ok(());
};
let chapter_ok = manifest.current_chapter_revision_matches();
let hash_ok = HashFunctionLabel::from(self.hash_algo()) == manifest.hash_function;
if !chapter_ok || !hash_ok {
report.corrupt_entries += 1;
} else {
report.well_formed_entries += 1;
let schema_key: SchemaPrefix = (manifest.chapter_revision, manifest.hash_function);
*report.by_schema.entry(schema_key).or_insert(0) += 1;
}
Ok(())
}
fn sum_recursive(&self, path: &Path) -> Result<u64, CacheInfoError> {
let entries = self.fs().read_dir(path).context(IoSnafu)?;
let mut sum = 0u64;
for entry in entries {
match entry.metadata.kind {
EntryKind::File => sum = sum.saturating_add(entry.metadata.size),
EntryKind::Dir => sum = sum.saturating_add(self.sum_recursive(&entry.path)?),
_ => {}
}
}
Ok(sum)
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use std::path::Path;
use haz_domain::path::CanonicalPath;
use haz_domain::settings::cache::HashAlgo;
use haz_vfs::{Filesystem, WritableFilesystem};
use haz_vfs_testing::MemFilesystem;
use crate::info::{CacheInfoReport, SchemaPrefix};
use crate::key::CacheKey;
use crate::key::prefix::CHAPTER_REVISION;
use crate::layout;
use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
use crate::store::{StoreInputs, StoredOutput};
use crate::writer::CacheWriter;
const WORKSPACE_ROOT: &str = "/ws";
fn cp(s: &str) -> CanonicalPath {
CanonicalPath::parse_workspace_absolute(s)
.expect("test helper expects a valid workspace-absolute path")
}
fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> CacheWriter<MemFilesystem> {
CacheWriter::new(fs, Path::new(WORKSPACE_ROOT), algo)
}
fn key_with_first_byte(first: u8) -> CacheKey {
let mut bytes = [0u8; 32];
bytes[0] = first;
CacheKey::from_bytes(bytes)
}
fn store_a_valid_entry(
cache: &CacheWriter<MemFilesystem>,
key: &CacheKey,
rel: &str,
bytes: &[u8],
) {
let target = Path::new(WORKSPACE_ROOT).join(rel);
let anchored = format!("/{rel}");
cache.fs().create_dir_all(target.parent().unwrap()).unwrap();
cache.fs().write_file(&target, bytes).unwrap();
let outs = [StoredOutput {
workspace_absolute_path: &anchored,
on_disk_path: &target,
mode: 0o644,
}];
cache
.store(
key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap();
}
fn write_manifest_to_entry(
cache: &CacheWriter<MemFilesystem>,
key: &CacheKey,
manifest: &Manifest,
) {
cache
.fs()
.create_dir_all(&layout::entry_dir(cache.cache_root(), key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), key),
&manifest.to_json_bytes(),
)
.unwrap();
}
fn schema_blake3_current() -> SchemaPrefix {
(CHAPTER_REVISION, HashFunctionLabel::Blake3)
}
#[test]
fn aux_019_info_on_absent_cache_root_reports_zero() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let report = cache.reader().info().unwrap();
assert_eq!(report, CacheInfoReport::default());
}
#[test]
fn aux_019_info_counts_one_well_formed_entry() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
store_a_valid_entry(&cache, &key, "proj/out", b"hello");
let report = cache.reader().info().unwrap();
assert_eq!(report.well_formed_entries, 1);
assert_eq!(report.corrupt_entries, 0);
assert_eq!(report.orphan_tmp_dirs, 0);
assert_eq!(report.orphan_restore_dirs, 0);
let mut expected = BTreeMap::new();
expected.insert(schema_blake3_current(), 1);
assert_eq!(report.by_schema, expected);
assert!(
report.total_bytes >= 5,
"expected at least 5 bytes for the `hello` blob, got {}",
report.total_bytes,
);
}
#[test]
fn aux_019_info_counts_entry_without_a_manifest_as_corrupt() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
cache
.fs()
.create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
let report = cache.reader().info().unwrap();
assert_eq!(report.corrupt_entries, 1);
assert_eq!(report.well_formed_entries, 0);
assert!(report.by_schema.is_empty());
}
#[test]
fn aux_019_info_counts_entry_with_unparseable_manifest_as_corrupt() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
cache
.fs()
.create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
b"this is not json",
)
.unwrap();
let report = cache.reader().info().unwrap();
assert_eq!(report.corrupt_entries, 1);
assert_eq!(report.well_formed_entries, 0);
assert!(report.by_schema.is_empty());
}
#[test]
fn aux_019_info_counts_schema_mismatched_entry_as_corrupt() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
let manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::Sha256, key,
outputs: vec![],
stdout_len: 0,
stderr_len: 0,
stdout_hash: [0u8; 32],
stderr_hash: [0u8; 32],
exit_status: 0,
created_at_unix: 0,
};
write_manifest_to_entry(&cache, &key, &manifest);
let report = cache.reader().info().unwrap();
assert_eq!(report.corrupt_entries, 1);
assert_eq!(report.well_formed_entries, 0);
assert!(report.by_schema.is_empty());
}
#[test]
fn aux_019_info_counts_orphan_tmp_directory() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
let tmp = layout::tmp_entry_dir(cache.cache_root(), &key, "abcdef");
cache.fs().create_dir_all(&tmp).unwrap();
cache
.fs()
.write_file(&tmp.join("partial.bin"), &[0u8; 17])
.unwrap();
let report = cache.reader().info().unwrap();
assert_eq!(report.orphan_tmp_dirs, 1);
assert_eq!(report.well_formed_entries, 0);
assert_eq!(report.corrupt_entries, 0);
assert!(report.total_bytes >= 17);
}
#[test]
fn aux_019_info_counts_orphan_restore_directory() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
let staging = layout::restore_staging_dir(cache.cache_root(), &key, "feedface");
cache.fs().create_dir_all(&staging).unwrap();
cache
.fs()
.write_file(&staging.join("leftover.bin"), &[0u8; 9])
.unwrap();
let report = cache.reader().info().unwrap();
assert_eq!(report.orphan_restore_dirs, 1);
assert_eq!(report.orphan_tmp_dirs, 0);
assert!(report.total_bytes >= 9);
}
#[test]
fn aux_019_info_breaks_down_by_schema_prefix() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key_a = key_with_first_byte(0xAA);
store_a_valid_entry(&cache, &key_a, "proj/out_a", b"x");
let key_b = key_with_first_byte(0xBB);
store_a_valid_entry(&cache, &key_b, "proj/out_b", b"y");
let report = cache.reader().info().unwrap();
assert_eq!(report.well_formed_entries, 2);
let mut expected = BTreeMap::new();
expected.insert(schema_blake3_current(), 2);
assert_eq!(report.by_schema, expected);
}
#[test]
fn aux_019_info_classifies_mixed_state_correctly() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key_good = key_with_first_byte(0xAB);
store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
let key_stale = key_with_first_byte(0xCD);
let stale_manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::Sha256,
key: key_stale,
outputs: vec![],
stdout_len: 0,
stderr_len: 0,
stdout_hash: [0u8; 32],
stderr_hash: [0u8; 32],
exit_status: 0,
created_at_unix: 0,
};
write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
let key_tmp = key_with_first_byte(0xEF);
let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_tmp, "rnd1");
cache.fs().create_dir_all(&tmp).unwrap();
let key_restore = key_with_first_byte(0x12);
let staging = layout::restore_staging_dir(cache.cache_root(), &key_restore, "rnd2");
cache.fs().create_dir_all(&staging).unwrap();
let report = cache.reader().info().unwrap();
assert_eq!(report.well_formed_entries, 1);
assert_eq!(report.corrupt_entries, 1);
assert_eq!(report.orphan_tmp_dirs, 1);
assert_eq!(report.orphan_restore_dirs, 1);
let mut expected = BTreeMap::new();
expected.insert(schema_blake3_current(), 1);
assert_eq!(report.by_schema, expected);
}
#[test]
fn aux_019_info_total_bytes_sums_blob_sizes() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = key_with_first_byte(0xAB);
let small = Path::new(WORKSPACE_ROOT).join("proj/small.txt");
let larger = Path::new(WORKSPACE_ROOT).join("proj/larger.txt");
cache.fs().create_dir_all(small.parent().unwrap()).unwrap();
cache.fs().write_file(&small, b"hello").unwrap();
cache.fs().write_file(&larger, b"helloXX").unwrap();
let outs = [
StoredOutput {
workspace_absolute_path: "/proj/small.txt",
on_disk_path: &small,
mode: 0o644,
},
StoredOutput {
workspace_absolute_path: "/proj/larger.txt",
on_disk_path: &larger,
mode: 0o644,
},
];
cache
.store(
&key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap();
let report = cache.reader().info().unwrap();
assert!(
report.total_bytes >= 12,
"expected at least 12 bytes for the two blobs; got {}",
report.total_bytes,
);
assert_eq!(report.well_formed_entries, 1);
}
#[test]
fn aux_018_info_does_not_mutate_the_cache_root() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key_good = key_with_first_byte(0xAB);
store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
let key_stale = key_with_first_byte(0xCD);
let stale_manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::Sha256,
key: key_stale,
outputs: vec![OutputBlob {
workspace_absolute_path: cp("/proj/missing"),
content_hash: [0u8; 32],
size: 0,
mode: 0o644,
}],
stdout_len: 0,
stderr_len: 0,
stdout_hash: [0u8; 32],
stderr_hash: [0u8; 32],
exit_status: 0,
created_at_unix: 0,
};
write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_with_first_byte(0xEF), "r1");
cache.fs().create_dir_all(&tmp).unwrap();
cache.fs().write_file(&tmp.join("x"), b"y").unwrap();
let staging =
layout::restore_staging_dir(cache.cache_root(), &key_with_first_byte(0x12), "r2");
cache.fs().create_dir_all(&staging).unwrap();
let before = snapshot_cache(&cache);
cache.reader().info().unwrap();
let after = snapshot_cache(&cache);
assert_eq!(
before, after,
"cache root state must not change under info()",
);
}
fn snapshot_cache(cache: &CacheWriter<MemFilesystem>) -> BTreeMap<String, Vec<u8>> {
let mut out = BTreeMap::new();
snapshot_into(cache, cache.cache_root(), &mut out);
out
}
fn snapshot_into(
cache: &CacheWriter<MemFilesystem>,
path: &Path,
out: &mut BTreeMap<String, Vec<u8>>,
) {
let Ok(entries) = cache.fs().read_dir(path) else {
return;
};
for entry in entries {
match entry.metadata.kind {
haz_vfs::EntryKind::File => {
let key = entry.path.to_string_lossy().into_owned();
let bytes = cache.fs().read(&entry.path).unwrap_or_default();
out.insert(key, bytes);
}
haz_vfs::EntryKind::Dir => snapshot_into(cache, &entry.path, out),
_ => {}
}
}
}
}