use std::path::Path;
use haz_domain::path::{CanonicalPath, ParseAbsoluteError};
use haz_domain::settings::cache::HashAlgo;
use haz_vfs::{FsError, WritableFilesystem};
use snafu::{ResultExt, Snafu};
use crate::hasher::Hasher;
use crate::hex;
use crate::key::CacheKey;
use crate::key::prefix::CHAPTER_REVISION;
use crate::layout;
use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
use crate::writer::CacheWriter;
#[derive(Debug, Clone, Copy)]
pub struct StoredOutput<'a> {
pub workspace_absolute_path: &'a str,
pub on_disk_path: &'a Path,
pub mode: u32,
}
#[derive(Debug, Clone, Copy)]
pub struct StoreInputs<'a> {
pub outputs: &'a [StoredOutput<'a>],
pub stdout: &'a [u8],
pub stderr: &'a [u8],
pub created_at_unix: u64,
}
#[derive(Debug, Snafu)]
pub enum StoreError {
#[snafu(display("filesystem error during cache store: {source}"))]
Io {
source: FsError,
},
#[snafu(display("invalid workspace-absolute output path '{path}': {source}"))]
InvalidOutputPath {
path: String,
source: ParseAbsoluteError,
},
}
impl<Fs: WritableFilesystem> CacheWriter<Fs> {
pub fn store(&self, key: &CacheKey, inputs: &StoreInputs<'_>) -> Result<(), StoreError> {
let suffix = random_suffix_hex();
let shard_dir = layout::shard_dir(self.cache_root(), key);
let tmp_dir = layout::tmp_entry_dir(self.cache_root(), key, &suffix);
let outputs_dir = tmp_dir.join(layout::OUTPUTS_SUBDIR);
self.fs().create_dir_all(&outputs_dir).context(IoSnafu)?;
let manifest_outputs = self.write_output_blobs(&outputs_dir, inputs.outputs)?;
let stdout_path = tmp_dir.join(layout::STDOUT_FILE_NAME);
self.fs()
.write_file(&stdout_path, inputs.stdout)
.context(IoSnafu)?;
self.fs().fsync_file(&stdout_path).context(IoSnafu)?;
let stderr_path = tmp_dir.join(layout::STDERR_FILE_NAME);
self.fs()
.write_file(&stderr_path, inputs.stderr)
.context(IoSnafu)?;
self.fs().fsync_file(&stderr_path).context(IoSnafu)?;
let stdout_hash = hash_bytes(self.hash_algo(), inputs.stdout);
let stderr_hash = hash_bytes(self.hash_algo(), inputs.stderr);
#[allow(clippy::cast_possible_truncation)]
let stdout_len = inputs.stdout.len() as u64;
#[allow(clippy::cast_possible_truncation)]
let stderr_len = inputs.stderr.len() as u64;
let manifest = Manifest {
chapter_revision: CHAPTER_REVISION,
hash_function: HashFunctionLabel::from(self.hash_algo()),
key: *key,
outputs: manifest_outputs,
stdout_len,
stderr_len,
stdout_hash,
stderr_hash,
exit_status: 0,
created_at_unix: inputs.created_at_unix,
};
let manifest_path = tmp_dir.join(layout::MANIFEST_FILE_NAME);
self.fs()
.write_file(&manifest_path, &manifest.to_json_bytes())
.context(IoSnafu)?;
self.fs().fsync_file(&manifest_path).context(IoSnafu)?;
self.fs().fsync_dir(&tmp_dir).context(IoSnafu)?;
let entry_dir = layout::entry_dir(self.cache_root(), key);
match self.fs().remove_dir_all(&entry_dir) {
Ok(()) | Err(FsError::NotFound { .. }) => {}
Err(e) => return Err(StoreError::Io { source: e }),
}
self.fs().rename(&tmp_dir, &entry_dir).context(IoSnafu)?;
self.fs().fsync_dir(&shard_dir).context(IoSnafu)?;
Ok(())
}
fn write_output_blobs(
&self,
outputs_dir: &Path,
outputs: &[StoredOutput<'_>],
) -> Result<Vec<OutputBlob>, StoreError> {
let mut entries = Vec::with_capacity(outputs.len());
for out in outputs {
let workspace_absolute_path = CanonicalPath::parse_workspace_absolute(
out.workspace_absolute_path,
)
.map_err(|source| StoreError::InvalidOutputPath {
path: out.workspace_absolute_path.to_owned(),
source,
})?;
let bytes = self.fs().read(out.on_disk_path).context(IoSnafu)?;
let content_hash = hash_bytes(self.hash_algo(), &bytes);
let blob_path = outputs_dir.join(hex::encode_32(&content_hash));
self.fs().write_file(&blob_path, &bytes).context(IoSnafu)?;
self.fs()
.set_permissions(&blob_path, out.mode)
.context(IoSnafu)?;
self.fs().fsync_file(&blob_path).context(IoSnafu)?;
#[allow(clippy::cast_possible_truncation)]
let size = bytes.len() as u64;
entries.push(OutputBlob {
workspace_absolute_path,
content_hash,
size,
mode: out.mode,
});
}
Ok(entries)
}
}
fn random_suffix_hex() -> String {
let r: u64 = rand::random();
format!("{r:016x}")
}
fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
let mut h = Hasher::new(algo);
h.update(data);
h.finalize()
}
#[cfg(test)]
mod tests {
use std::path::{Path, PathBuf};
use haz_domain::path::ParseAbsoluteError;
use haz_domain::settings::cache::HashAlgo;
use haz_vfs::{EntryKind, Filesystem, WritableFilesystem};
use haz_vfs_testing::MemFilesystem;
use crate::hasher::Hasher;
use crate::key::CacheKey;
use crate::key::prefix::CHAPTER_REVISION;
use crate::layout;
use crate::manifest::HashFunctionLabel;
use crate::store::{StoreError, StoreInputs, StoredOutput};
use crate::writer::CacheWriter;
const WORKSPACE_ROOT: &str = "/ws";
const PROJ_OUT_ABS: &str = "/proj/out";
const PROJ_OUT_DISK: &str = "/ws/proj/out";
fn sample_key() -> CacheKey {
let mut bytes = [0u8; 32];
bytes[0] = 0xAB;
bytes[1] = 0xCD;
CacheKey::from_bytes(bytes)
}
fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
let mut h = Hasher::new(algo);
h.update(data);
h.finalize()
}
fn fs_with_one_output(path: &Path, bytes: &[u8], mode: u32) -> MemFilesystem {
let mut fs = MemFilesystem::new();
fs.add_dir(path.parent().unwrap()).unwrap();
fs.add_file_with_mode(path, bytes.to_vec(), mode).unwrap();
fs
}
fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> CacheWriter<MemFilesystem> {
CacheWriter::new(fs, Path::new(WORKSPACE_ROOT), algo)
}
#[test]
fn cache_017_store_then_lookup_round_trips() {
let blob = b"output-bytes-v1";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob, 0o644);
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let outs = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o644,
}];
let inputs = StoreInputs {
outputs: &outs,
stdout: b"hello, stdout",
stderr: b"hello, stderr",
created_at_unix: 1_715_700_000,
};
cache.store(&key, &inputs).unwrap();
let manifest = cache
.reader()
.lookup(&key)
.expect("expected a hit after store");
assert_eq!(manifest.outputs.len(), 1);
assert_eq!(
manifest.outputs[0].workspace_absolute_path.to_string(),
PROJ_OUT_ABS
);
#[allow(clippy::cast_possible_truncation)]
let expected_size = blob.len() as u64;
assert_eq!(manifest.outputs[0].size, expected_size);
assert_eq!(manifest.outputs[0].mode, 0o644);
assert_eq!(
manifest.outputs[0].content_hash,
hash_bytes(HashAlgo::Blake3, blob)
);
}
#[test]
fn cache_011_manifest_records_chapter_revision_and_active_hash_function() {
let blob = b"x";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob, 0o600);
let cache = make_cache(fs, HashAlgo::Sha256);
let key = sample_key();
let outs = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o600,
}];
let inputs = StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 7,
};
cache.store(&key, &inputs).unwrap();
let manifest = cache.reader().lookup(&key).unwrap();
assert_eq!(manifest.chapter_revision, CHAPTER_REVISION);
assert_eq!(manifest.hash_function, HashFunctionLabel::Sha256);
assert_eq!(manifest.exit_status, 0);
assert_eq!(manifest.created_at_unix, 7);
assert_eq!(manifest.key, key);
}
#[test]
fn cache_011_stream_hashes_match_finalised_hasher_output() {
let blob = b"";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob, 0o644);
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let stdout = b"line on stdout\n".as_ref();
let stderr = b"line on stderr\n".as_ref();
let outs = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o644,
}];
let inputs = StoreInputs {
outputs: &outs,
stdout,
stderr,
created_at_unix: 0,
};
cache.store(&key, &inputs).unwrap();
let manifest = cache.reader().lookup(&key).unwrap();
assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, stdout));
assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, stderr));
#[allow(clippy::cast_possible_truncation)]
let stdout_len = stdout.len() as u64;
#[allow(clippy::cast_possible_truncation)]
let stderr_len = stderr.len() as u64;
assert_eq!(manifest.stdout_len, stdout_len);
assert_eq!(manifest.stderr_len, stderr_len);
}
#[test]
fn cache_017_store_with_no_outputs_and_empty_streams_still_round_trips() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let inputs = StoreInputs {
outputs: &[],
stdout: b"",
stderr: b"",
created_at_unix: 0,
};
cache.store(&key, &inputs).unwrap();
let manifest = cache
.reader()
.lookup(&key)
.expect("zero-output entry is still a hit");
assert_eq!(manifest.outputs.len(), 0);
assert_eq!(manifest.stdout_len, 0);
assert_eq!(manifest.stderr_len, 0);
assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, b""));
assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, b""));
}
#[test]
fn cache_017_store_with_multiple_outputs_records_them_in_order() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws/proj").unwrap();
fs.add_file_with_mode("/ws/proj/a", b"alpha".to_vec(), 0o644)
.unwrap();
fs.add_file_with_mode("/ws/proj/b", b"beta-bytes".to_vec(), 0o755)
.unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let on_a = PathBuf::from("/ws/proj/a");
let on_b = PathBuf::from("/ws/proj/b");
let outs = [
StoredOutput {
workspace_absolute_path: "/proj/a",
on_disk_path: &on_a,
mode: 0o644,
},
StoredOutput {
workspace_absolute_path: "/proj/b",
on_disk_path: &on_b,
mode: 0o755,
},
];
let inputs = StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
};
cache.store(&key, &inputs).unwrap();
let manifest = cache.reader().lookup(&key).unwrap();
assert_eq!(manifest.outputs.len(), 2);
assert_eq!(
manifest.outputs[0].workspace_absolute_path.to_string(),
"/proj/a"
);
assert_eq!(manifest.outputs[0].mode, 0o644);
assert_eq!(
manifest.outputs[1].workspace_absolute_path.to_string(),
"/proj/b"
);
assert_eq!(manifest.outputs[1].mode, 0o755);
}
#[test]
fn cache_011_after_store_blob_file_has_recorded_mode() {
let blob = b"executable";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob, 0o755);
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let outs = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o755,
}];
let inputs = StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
};
cache.store(&key, &inputs).unwrap();
let content_hash = hash_bytes(HashAlgo::Blake3, blob);
let blob_path = layout::output_blob_path(cache.cache_root(), &key, &content_hash);
let mode = cache.fs().mode_of(&blob_path).unwrap();
assert_eq!(mode, 0o755);
}
#[test]
fn cache_017_after_store_tmp_directory_no_longer_exists() {
let blob = b"";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob, 0o644);
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let outs = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o644,
}];
cache
.store(
&key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap();
let shard = layout::shard_dir(cache.cache_root(), &key);
let mut saw_entry = false;
for entry in cache.fs().read_dir(&shard).unwrap() {
let name = entry
.path
.file_name()
.unwrap()
.to_string_lossy()
.into_owned();
assert!(
!name.starts_with(".tmp-"),
"expected no tmp directory after a successful store, found: {name}"
);
if name == key.to_hex() {
saw_entry = true;
assert_eq!(entry.metadata.kind, EntryKind::Dir);
}
}
assert!(saw_entry, "final entry directory must be present");
}
#[test]
fn cache_014_second_store_of_same_key_overwrites_and_remains_a_hit() {
let blob_v1 = b"v1";
let on_disk = PathBuf::from(PROJ_OUT_DISK);
let fs = fs_with_one_output(&on_disk, blob_v1, 0o644);
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let outs_v1 = [StoredOutput {
workspace_absolute_path: PROJ_OUT_ABS,
on_disk_path: &on_disk,
mode: 0o644,
}];
cache
.store(
&key,
&StoreInputs {
outputs: &outs_v1,
stdout: b"first",
stderr: b"first-err",
created_at_unix: 1,
},
)
.unwrap();
cache.fs().write_file(&on_disk, b"v2-longer").unwrap();
cache.fs().set_permissions(&on_disk, 0o644).unwrap();
cache
.store(
&key,
&StoreInputs {
outputs: &outs_v1,
stdout: b"second",
stderr: b"second-err",
created_at_unix: 2,
},
)
.unwrap();
let manifest = cache
.reader()
.lookup(&key)
.expect("entry must still hit after a second store");
assert_eq!(manifest.stdout_len, b"second".len() as u64);
assert_eq!(manifest.created_at_unix, 2);
assert_eq!(
manifest.outputs[0].content_hash,
hash_bytes(HashAlgo::Blake3, b"v2-longer")
);
}
#[test]
fn store_propagates_missing_output_file_as_io_error() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws").unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let on_disk = PathBuf::from("/ws/missing");
let outs = [StoredOutput {
workspace_absolute_path: "/missing",
on_disk_path: &on_disk,
mode: 0o644,
}];
let err = cache
.store(
&key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("filesystem error"), "got: {msg}");
assert!(cache.reader().lookup(&key).is_none());
}
#[test]
fn store_rejects_output_with_traversal_in_workspace_absolute_path() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws/proj").unwrap();
fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
.unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let on_disk = PathBuf::from("/ws/proj/out");
let outs = [StoredOutput {
workspace_absolute_path: "/proj/../etc/passwd",
on_disk_path: &on_disk,
mode: 0o644,
}];
let err = cache
.store(
&key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap_err();
assert!(
matches!(err, StoreError::InvalidOutputPath { .. }),
"expected InvalidOutputPath, got {err:?}"
);
assert!(cache.reader().lookup(&key).is_none());
}
#[test]
fn store_rejects_output_with_project_relative_workspace_absolute_path() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws/proj").unwrap();
fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
.unwrap();
let cache = make_cache(fs, HashAlgo::Blake3);
let key = sample_key();
let on_disk = PathBuf::from("/ws/proj/out");
let outs = [StoredOutput {
workspace_absolute_path: "proj/out", on_disk_path: &on_disk,
mode: 0o644,
}];
let err = cache
.store(
&key,
&StoreInputs {
outputs: &outs,
stdout: b"",
stderr: b"",
created_at_unix: 0,
},
)
.unwrap_err();
assert!(
matches!(
err,
StoreError::InvalidOutputPath {
source: ParseAbsoluteError::NotWorkspaceAbsolute,
..
}
),
"expected NotWorkspaceAbsolute, got {err:?}"
);
}
}