use haz_vfs::{EntryKind, Filesystem, FsError, FsMetadata};
use snafu::Snafu;
use crate::key::CacheKey;
use crate::layout;
use crate::manifest::{HashFunctionLabel, Manifest};
use crate::reader::CacheReader;
#[derive(Debug, Snafu)]
pub enum CacheLookupError {
#[snafu(display("filesystem error during cache lookup: {source}"))]
Io {
source: FsError,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CacheLookupStatus {
Hit(Manifest),
MissNoEntry,
MissSchemaMismatch,
MissCorruptEntry,
}
impl<Fs: Filesystem> CacheReader<Fs> {
#[must_use]
pub fn lookup(&self, key: &CacheKey) -> Option<Manifest> {
let manifest_path = layout::manifest_path(self.cache_root(), key);
let bytes = self.fs().read(&manifest_path).ok()?;
let manifest = Manifest::from_json(&bytes).ok()?;
if !manifest.current_chapter_revision_matches() {
return None;
}
if HashFunctionLabel::from(self.hash_algo()) != manifest.hash_function {
return None;
}
for blob in &manifest.outputs {
let blob_path = layout::output_blob_path(self.cache_root(), key, &blob.content_hash);
let m = self.fs().metadata(&blob_path).ok()?;
if m.kind != EntryKind::File || m.size != blob.size {
return None;
}
}
let stdout_m = self
.fs()
.metadata(&layout::stdout_path(self.cache_root(), key))
.ok()?;
if stdout_m.kind != EntryKind::File || stdout_m.size != manifest.stdout_len {
return None;
}
let stderr_m = self
.fs()
.metadata(&layout::stderr_path(self.cache_root(), key))
.ok()?;
if stderr_m.kind != EntryKind::File || stderr_m.size != manifest.stderr_len {
return None;
}
Some(manifest)
}
pub fn lookup_status(&self, key: &CacheKey) -> Result<CacheLookupStatus, CacheLookupError> {
let manifest_path = layout::manifest_path(self.cache_root(), key);
let bytes = match self.fs().read(&manifest_path) {
Ok(b) => b,
Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => {
return Ok(CacheLookupStatus::MissNoEntry);
}
Err(source) => return Err(CacheLookupError::Io { source }),
};
let Ok(manifest) = Manifest::from_json(&bytes) else {
return Ok(CacheLookupStatus::MissCorruptEntry);
};
if !manifest.current_chapter_revision_matches()
|| HashFunctionLabel::from(self.hash_algo()) != manifest.hash_function
{
return Ok(CacheLookupStatus::MissSchemaMismatch);
}
for blob in &manifest.outputs {
let blob_path = layout::output_blob_path(self.cache_root(), key, &blob.content_hash);
let Some(m) = self.metadata_for_lookup(&blob_path)? else {
return Ok(CacheLookupStatus::MissCorruptEntry);
};
if m.kind != EntryKind::File || m.size != blob.size {
return Ok(CacheLookupStatus::MissCorruptEntry);
}
}
let stdout_path = layout::stdout_path(self.cache_root(), key);
let Some(stdout_m) = self.metadata_for_lookup(&stdout_path)? else {
return Ok(CacheLookupStatus::MissCorruptEntry);
};
if stdout_m.kind != EntryKind::File || stdout_m.size != manifest.stdout_len {
return Ok(CacheLookupStatus::MissCorruptEntry);
}
let stderr_path = layout::stderr_path(self.cache_root(), key);
let Some(stderr_m) = self.metadata_for_lookup(&stderr_path)? else {
return Ok(CacheLookupStatus::MissCorruptEntry);
};
if stderr_m.kind != EntryKind::File || stderr_m.size != manifest.stderr_len {
return Ok(CacheLookupStatus::MissCorruptEntry);
}
Ok(CacheLookupStatus::Hit(manifest))
}
fn metadata_for_lookup(
&self,
path: &std::path::Path,
) -> Result<Option<FsMetadata>, CacheLookupError> {
match self.fs().metadata(path) {
Ok(m) => Ok(Some(m)),
Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => Ok(None),
Err(source) => Err(CacheLookupError::Io { source }),
}
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use haz_domain::path::CanonicalPath;
use haz_domain::settings::cache::HashAlgo;
use haz_vfs::WritableFilesystem;
use haz_vfs_testing::MemFilesystem;
use crate::hasher::Hasher;
use crate::key::CacheKey;
use crate::key::prefix::CHAPTER_REVISION;
use crate::layout;
use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
use crate::reader::CacheReader;
use crate::writer::CacheWriter;
fn cp(s: &str) -> CanonicalPath {
CanonicalPath::parse_workspace_absolute(s)
.expect("test helper expects a valid workspace-absolute path")
}
const WORKSPACE_ROOT: &str = "/ws";
fn sample_key() -> CacheKey {
let mut bytes = [0u8; 32];
bytes[0] = 0xAB;
bytes[1] = 0xCD;
CacheKey::from_bytes(bytes)
}
fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
let mut h = Hasher::new(algo);
h.update(data);
h.finalize()
}
fn write_valid_entry(
fs: &MemFilesystem,
cache_root: &Path,
key: &CacheKey,
algo: HashAlgo,
) -> Manifest {
let stdout_bytes = b"stdout-body".to_vec();
let stderr_bytes = b"stderr-body".to_vec();
let blob_bytes = b"blob-body".to_vec();
let content_hash = hash_bytes(algo, &blob_bytes);
let manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::from(algo),
key: *key,
outputs: vec![OutputBlob {
workspace_absolute_path: cp("/proj/out"),
content_hash,
#[allow(clippy::cast_possible_truncation)]
size: blob_bytes.len() as u64,
mode: 0o644,
}],
#[allow(clippy::cast_possible_truncation)]
stdout_len: stdout_bytes.len() as u64,
#[allow(clippy::cast_possible_truncation)]
stderr_len: stderr_bytes.len() as u64,
stdout_hash: hash_bytes(algo, &stdout_bytes),
stderr_hash: hash_bytes(algo, &stderr_bytes),
exit_status: 0,
created_at_unix: 1_715_700_000,
};
fs.create_dir_all(&layout::outputs_dir(cache_root, key))
.unwrap();
fs.write_file(
&layout::manifest_path(cache_root, key),
&manifest.to_json_bytes(),
)
.unwrap();
fs.write_file(&layout::stdout_path(cache_root, key), &stdout_bytes)
.unwrap();
fs.write_file(&layout::stderr_path(cache_root, key), &stderr_bytes)
.unwrap();
fs.write_file(
&layout::output_blob_path(cache_root, key, &content_hash),
&blob_bytes,
)
.unwrap();
manifest
}
fn fresh_cache(algo: HashAlgo) -> (CacheReader<MemFilesystem>, CacheKey) {
let fs = MemFilesystem::new();
let cache = CacheReader::new(fs, Path::new(WORKSPACE_ROOT), algo);
(cache, sample_key())
}
#[test]
fn writer_lookup_through_reader_matches_direct_reader() {
let fs = MemFilesystem::new();
let writer = CacheWriter::new(fs, Path::new(WORKSPACE_ROOT), HashAlgo::Blake3);
let key = sample_key();
write_valid_entry(writer.fs(), writer.cache_root(), &key, HashAlgo::Blake3);
assert!(writer.reader().lookup(&key).is_some());
}
#[test]
fn cache_015_hit_returns_manifest() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let expected = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
let got = cache.lookup(&key).expect("expected a hit");
assert_eq!(got, expected);
}
#[test]
fn cache_016_miss_when_manifest_absent() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_manifest_unparseable() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
cache
.fs()
.create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
b"not json at all",
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_hash_function_mismatches() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Sha256);
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_chapter_revision_mismatches() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let mut manifest =
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
manifest.chapter_revision = CHAPTER_REVISION.saturating_add(1);
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&manifest.to_json_bytes(),
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_output_blob_missing() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let manifest = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
let mut tampered = manifest;
tampered.outputs[0].content_hash = [0x99u8; 32];
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&tampered.to_json_bytes(),
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_output_blob_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let manifest = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
let blob_path =
layout::output_blob_path(cache.cache_root(), &key, &manifest.outputs[0].content_hash);
cache
.fs()
.write_file(&blob_path, b"a-much-longer-payload")
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_stdout_missing() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.remove_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
let stderr_bytes = b"stderr-body".to_vec();
let blob_bytes = b"blob-body".to_vec();
let content_hash = hash_bytes(HashAlgo::Blake3, &blob_bytes);
let manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::Blake3,
key,
outputs: vec![OutputBlob {
workspace_absolute_path: cp("/proj/out"),
content_hash,
#[allow(clippy::cast_possible_truncation)]
size: blob_bytes.len() as u64,
mode: 0o644,
}],
stdout_len: 11,
#[allow(clippy::cast_possible_truncation)]
stderr_len: stderr_bytes.len() as u64,
stdout_hash: [0u8; 32],
stderr_hash: hash_bytes(HashAlgo::Blake3, &stderr_bytes),
exit_status: 0,
created_at_unix: 0,
};
cache
.fs()
.create_dir_all(&layout::outputs_dir(cache.cache_root(), &key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&manifest.to_json_bytes(),
)
.unwrap();
cache
.fs()
.write_file(
&layout::stderr_path(cache.cache_root(), &key),
&stderr_bytes,
)
.unwrap();
cache
.fs()
.write_file(
&layout::output_blob_path(cache.cache_root(), &key, &content_hash),
&blob_bytes,
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_stdout_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.write_file(
&layout::stdout_path(cache.cache_root(), &key),
b"different-length-stdout-payload",
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_stderr_missing() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.remove_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
let stdout_bytes = b"stdout-body".to_vec();
let blob_bytes = b"blob-body".to_vec();
let content_hash = hash_bytes(HashAlgo::Blake3, &blob_bytes);
let manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::Blake3,
key,
outputs: vec![OutputBlob {
workspace_absolute_path: cp("/proj/out"),
content_hash,
#[allow(clippy::cast_possible_truncation)]
size: blob_bytes.len() as u64,
mode: 0o644,
}],
#[allow(clippy::cast_possible_truncation)]
stdout_len: stdout_bytes.len() as u64,
stderr_len: 11,
stdout_hash: hash_bytes(HashAlgo::Blake3, &stdout_bytes),
stderr_hash: [0u8; 32],
exit_status: 0,
created_at_unix: 0,
};
cache
.fs()
.create_dir_all(&layout::outputs_dir(cache.cache_root(), &key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&manifest.to_json_bytes(),
)
.unwrap();
cache
.fs()
.write_file(
&layout::stdout_path(cache.cache_root(), &key),
&stdout_bytes,
)
.unwrap();
cache
.fs()
.write_file(
&layout::output_blob_path(cache.cache_root(), &key, &content_hash),
&blob_bytes,
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
#[test]
fn cache_016_miss_when_stderr_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.write_file(
&layout::stderr_path(cache.cache_root(), &key),
b"different-length-stderr-payload",
)
.unwrap();
assert!(cache.lookup(&key).is_none());
}
use crate::lookup::CacheLookupStatus;
#[test]
fn lookup_status_returns_hit_for_valid_entry() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let expected = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
match cache.lookup_status(&key).unwrap() {
CacheLookupStatus::Hit(m) => assert_eq!(m, expected),
other => panic!("expected Hit, got {other:?}"),
}
}
#[test]
fn lookup_status_returns_miss_no_entry_for_absent_manifest() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissNoEntry,
);
}
#[test]
fn lookup_status_returns_miss_corrupt_entry_for_unparseable_manifest() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
cache
.fs()
.create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
.unwrap();
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
b"not json",
)
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissCorruptEntry,
);
}
#[test]
fn lookup_status_returns_miss_schema_mismatch_for_chapter_revision() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let mut manifest =
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
manifest.chapter_revision = CHAPTER_REVISION.saturating_add(1);
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&manifest.to_json_bytes(),
)
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissSchemaMismatch,
);
}
#[test]
fn lookup_status_returns_miss_schema_mismatch_for_hash_function() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Sha256);
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissSchemaMismatch,
);
}
#[test]
fn lookup_status_returns_miss_corrupt_entry_for_missing_blob() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let manifest = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
let mut tampered = manifest;
tampered.outputs[0].content_hash = [0x99u8; 32];
cache
.fs()
.write_file(
&layout::manifest_path(cache.cache_root(), &key),
&tampered.to_json_bytes(),
)
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissCorruptEntry,
);
}
#[test]
fn lookup_status_returns_miss_corrupt_entry_for_blob_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
let manifest = write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
let blob_path =
layout::output_blob_path(cache.cache_root(), &key, &manifest.outputs[0].content_hash);
cache
.fs()
.write_file(&blob_path, b"different-payload-length")
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissCorruptEntry,
);
}
#[test]
fn lookup_status_returns_miss_corrupt_entry_for_stdout_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.write_file(
&layout::stdout_path(cache.cache_root(), &key),
b"different-length-stdout-payload",
)
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissCorruptEntry,
);
}
#[test]
fn lookup_status_returns_miss_corrupt_entry_for_stderr_size_mismatch() {
let (cache, key) = fresh_cache(HashAlgo::Blake3);
write_valid_entry(cache.fs(), cache.cache_root(), &key, HashAlgo::Blake3);
cache
.fs()
.write_file(
&layout::stderr_path(cache.cache_root(), &key),
b"different-length-stderr-payload",
)
.unwrap();
assert_eq!(
cache.lookup_status(&key).unwrap(),
CacheLookupStatus::MissCorruptEntry,
);
}
#[test]
fn partial_tmp_dir_does_not_affect_other_keys() {
let (cache, key_a) = fresh_cache(HashAlgo::Blake3);
let mut b_bytes = [0u8; 32];
b_bytes[0] = 0xAB;
b_bytes[31] = 0xFF;
let key_b = CacheKey::from_bytes(b_bytes);
assert_eq!(layout::shard(&key_a), layout::shard(&key_b));
write_valid_entry(cache.fs(), cache.cache_root(), &key_a, HashAlgo::Blake3);
let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_b, "rnd123");
cache.fs().create_dir_all(&tmp).unwrap();
cache
.fs()
.write_file(&tmp.join("manifest.json"), b"partial junk")
.unwrap();
assert!(cache.lookup(&key_a).is_some());
assert!(cache.lookup(&key_b).is_none());
}
}