use anodizer_core::DeterminismReport;
use anyhow::{Context, Result};
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::io::Read;
use std::path::Path;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct PreservedArtifact {
pub name: String,
pub path: String,
pub sha256: String,
pub size: u64,
}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct PreservedDistContext {
pub artifacts: Vec<PreservedArtifact>,
pub targets: Vec<String>,
pub version: String,
pub commit: String,
}
pub(super) fn preserve_dist_tree(worktree_path: &Path, dest: &Path) -> Result<()> {
let src = worktree_path.join("dist");
match std::fs::remove_dir_all(dest) {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => {
return Err(e)
.with_context(|| format!("clearing stale preserved-dist at {}", dest.display()));
}
}
std::fs::create_dir_all(dest)
.with_context(|| format!("creating preserved-dist root at {}", dest.display()))?;
match std::fs::read_dir(&src) {
Ok(entries) => {
for entry in entries {
let entry = entry.with_context(|| format!("reading entry in {}", src.display()))?;
let name = entry.file_name();
let src_path = entry.path();
let dst_path = dest.join(&name);
let ft = entry
.file_type()
.with_context(|| format!("stat {}", src_path.display()))?;
if ft.is_dir() {
copy_dir_recursive(&src_path, &dst_path).with_context(|| {
format!("copying {} -> {}", src_path.display(), dst_path.display())
})?;
} else {
std::fs::copy(&src_path, &dst_path).with_context(|| {
format!("copying {} -> {}", src_path.display(), dst_path.display())
})?;
}
}
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(e) => {
return Err(e).with_context(|| format!("reading source dir {}", src.display()));
}
}
Ok(())
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> {
std::fs::create_dir_all(dst)
.with_context(|| format!("creating destination dir {}", dst.display()))?;
for entry in
std::fs::read_dir(src).with_context(|| format!("reading source dir {}", src.display()))?
{
let entry = entry?;
let ft = entry.file_type()?;
let src_path = entry.path();
let dst_path = dst.join(entry.file_name());
if ft.is_dir() {
copy_dir_recursive(&src_path, &dst_path)?;
} else {
std::fs::copy(&src_path, &dst_path).with_context(|| {
format!("copying {} -> {}", src_path.display(), dst_path.display())
})?;
}
}
Ok(())
}
pub(super) struct ContextInputs<'a> {
pub report: &'a DeterminismReport,
pub harness_targets: Option<&'a [String]>,
pub version_hint: &'a str,
}
pub(super) fn write_preserved_dist_context(dest: &Path, inputs: ContextInputs<'_>) -> Result<()> {
let report = inputs.report;
let artifacts_json: Option<serde_json::Value> =
read_optional_json(&dest.join("artifacts.json"));
let mut targets: Vec<String> = artifacts_json
.as_ref()
.and_then(|v| v.as_array())
.map(|arr| {
let mut seen: BTreeSet<String> = BTreeSet::new();
for entry in arr {
if let Some(t) = entry.get("target").and_then(|t| t.as_str())
&& !t.is_empty()
{
seen.insert(t.to_string());
}
}
seen.into_iter().collect()
})
.unwrap_or_default();
if targets.is_empty()
&& let Some(harness_targets) = inputs.harness_targets
{
let mut sorted: BTreeSet<String> = BTreeSet::new();
for t in harness_targets {
if !t.is_empty() {
sorted.insert(t.clone());
}
}
targets = sorted.into_iter().collect();
}
let version: String = match read_optional_json(&dest.join("metadata.json")) {
Some(v) => v
.get("version")
.and_then(|s| s.as_str())
.map(str::to_string)
.filter(|s| !s.is_empty())
.unwrap_or_else(|| inputs.version_hint.to_string()),
None => inputs.version_hint.to_string(),
};
let report_by_rel_path: HashMap<String, &anodizer_core::ArtifactRow> = report
.artifacts
.iter()
.map(|a| (a.name.clone(), a))
.collect();
let mut entries: Vec<PreservedArtifact> = Vec::new();
collect_preserved_entries(dest, dest, &report_by_rel_path, &mut entries)?;
entries.sort_by(|a, b| a.name.cmp(&b.name));
let ctx = PreservedDistContext {
artifacts: entries,
targets,
version,
commit: report.commit.clone(),
};
let json =
serde_json::to_string_pretty(&ctx).context("serializing PreservedDistContext to JSON")?;
let ctx_path = dest.join("context.json");
let tmp_path = ctx_path.with_extension("json.tmp");
std::fs::write(&tmp_path, &json)
.with_context(|| format!("writing context.json tmp to {}", tmp_path.display()))?;
std::fs::rename(&tmp_path, &ctx_path).with_context(|| {
format!(
"atomically renaming {} -> {}",
tmp_path.display(),
ctx_path.display()
)
})?;
Ok(())
}
fn read_optional_json(path: &Path) -> Option<serde_json::Value> {
match std::fs::read(path) {
Ok(bytes) => match serde_json::from_slice::<serde_json::Value>(&bytes) {
Ok(v) => Some(v),
Err(e) => {
eprintln!(
"warn: preserved-dist {} present but malformed ({}); \
proceeding with harness-supplied defaults",
path.display(),
e
);
None
}
},
Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
Err(e) => {
eprintln!(
"warn: preserved-dist {} unreadable ({}); proceeding with \
harness-supplied defaults",
path.display(),
e
);
None
}
}
}
fn collect_preserved_entries(
root: &Path,
dir: &Path,
report_by_rel_path: &HashMap<String, &anodizer_core::ArtifactRow>,
out: &mut Vec<PreservedArtifact>,
) -> Result<()> {
for entry in std::fs::read_dir(dir)
.with_context(|| format!("reading preserved-dist dir {}", dir.display()))?
{
let entry = entry?;
let path = entry.path();
let ft = entry.file_type()?;
if ft.is_dir() {
collect_preserved_entries(root, &path, report_by_rel_path, out)?;
continue;
}
if !ft.is_file() {
continue;
}
let name = entry.file_name().to_string_lossy().into_owned();
if matches!(
name.as_str(),
"context.json"
| "context.json.tmp"
| "artifacts.json"
| "artifacts.json.tmp"
| "metadata.json"
| "metadata.json.tmp"
) {
continue;
}
let rel = path
.strip_prefix(root)
.unwrap_or(&path)
.to_string_lossy()
.replace('\\', "/");
let (sha256, size) = if let Some(row) = report_by_rel_path.get(rel.as_str())
&& let Some(hash) = row.hash.as_ref()
{
(hash.clone(), row.size_bytes)
} else {
hash_file_streaming(&path)?
};
out.push(PreservedArtifact {
name,
path: rel,
sha256,
size,
});
}
Ok(())
}
fn hash_file_streaming(path: &Path) -> Result<(String, u64)> {
use sha2::{Digest, Sha256};
let mut file = File::open(path)
.with_context(|| format!("opening preserved artifact {}", path.display()))?;
let mut hasher = Sha256::new();
let mut buf = [0u8; 64 * 1024];
let mut total: u64 = 0;
loop {
let n = file
.read(&mut buf)
.with_context(|| format!("reading preserved artifact {}", path.display()))?;
if n == 0 {
break;
}
Digest::update(&mut hasher, &buf[..n]);
total += n as u64;
}
Ok((format!("sha256:{:x}", hasher.finalize()), total))
}
pub(super) fn remove_preserved_on_drift(dest: &Path) {
match std::fs::remove_dir_all(dest) {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => {
eprintln!(
"warn: failed to remove preserved-dist `{}` after drift detection: {}",
dest.display(),
e
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use anodizer_core::{ArtifactRow, DeterminismReport};
use tempfile::TempDir;
fn empty_report(commit: &str) -> DeterminismReport {
DeterminismReport {
schema_version: 1,
anodize_version: "test".into(),
commit: commit.into(),
commit_timestamp: 1_715_000_000,
runs: 2,
stages_under_test: vec![],
allowlist: anodizer_core::AllowList::default(),
artifacts: vec![],
drift: vec![],
drift_count: 0,
}
}
#[test]
fn write_context_prefers_report_hash_over_fresh_rehash() {
let tmp = TempDir::new().unwrap();
let dest = tmp.path();
std::fs::write(dest.join("foo.tar.gz"), b"original-bytes").unwrap();
let recorded_hash = {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(b"original-bytes");
format!("sha256:{:x}", h.finalize())
};
let mut report = empty_report("deadbeef");
report.artifacts.push(ArtifactRow {
name: "foo.tar.gz".into(),
path: "dist/foo.tar.gz".into(),
size_bytes: b"original-bytes".len() as u64,
stage: "archive".into(),
deterministic: true,
nondeterministic_reason: None,
hash: Some(recorded_hash.clone()),
hashes: vec![],
});
std::fs::write(dest.join("foo.tar.gz"), b"mutated-bytes-after-record").unwrap();
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: None,
version_hint: "",
},
)
.expect("write_preserved_dist_context");
let ctx_bytes = std::fs::read(dest.join("context.json")).unwrap();
let ctx: PreservedDistContext = serde_json::from_slice(&ctx_bytes).unwrap();
let entry = ctx
.artifacts
.iter()
.find(|a| a.name == "foo.tar.gz")
.expect("manifest must include foo.tar.gz");
assert_eq!(
entry.sha256, recorded_hash,
"context.json must prefer the report's hash over re-hashing disk bytes"
);
}
#[test]
fn targets_falls_back_to_harness_targets_when_artifacts_json_lacks_them() {
let tmp = TempDir::new().unwrap();
let dest = tmp.path();
let report = empty_report("c0ffee");
let harness_targets = vec![
"x86_64-unknown-linux-gnu".to_string(),
"aarch64-unknown-linux-gnu".to_string(),
];
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: Some(&harness_targets),
version_hint: "0.0.0-fixture",
},
)
.unwrap();
let ctx: PreservedDistContext =
serde_json::from_slice(&std::fs::read(dest.join("context.json")).unwrap()).unwrap();
assert_eq!(
ctx.targets,
vec![
"aarch64-unknown-linux-gnu".to_string(),
"x86_64-unknown-linux-gnu".to_string()
],
"harness_targets must populate `targets` when artifacts.json is missing"
);
assert_eq!(
ctx.version, "0.0.0-fixture",
"version_hint must populate `version` when metadata.json is missing"
);
}
#[test]
fn malformed_sibling_json_falls_back_to_defaults() {
let tmp = TempDir::new().unwrap();
let dest = tmp.path();
std::fs::write(dest.join("artifacts.json"), b"{not valid json").unwrap();
std::fs::write(dest.join("metadata.json"), b"also not valid").unwrap();
let report = empty_report("badf00d");
let harness_targets = vec!["x86_64-pc-windows-msvc".to_string()];
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: Some(&harness_targets),
version_hint: "1.2.3-snapshot",
},
)
.expect("malformed sibling JSON must not abort the manifest write");
let ctx: PreservedDistContext =
serde_json::from_slice(&std::fs::read(dest.join("context.json")).unwrap()).unwrap();
assert_eq!(ctx.targets, vec!["x86_64-pc-windows-msvc".to_string()]);
assert_eq!(ctx.version, "1.2.3-snapshot");
}
#[test]
fn write_context_is_atomic_no_tmp_left_behind() {
let tmp = TempDir::new().unwrap();
let dest = tmp.path();
let report = empty_report("a1b2c3d");
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: None,
version_hint: "",
},
)
.unwrap();
assert!(dest.join("context.json").exists());
assert!(
!dest.join("context.json.tmp").exists(),
"atomic write must rename the .tmp away on success"
);
}
#[test]
fn context_excludes_harness_sidecar_manifests() {
let tmp = TempDir::new().unwrap();
let dest = tmp.path();
std::fs::write(dest.join("artifacts.json"), b"[]").unwrap();
std::fs::write(dest.join("metadata.json"), b"{}").unwrap();
std::fs::write(dest.join("foo.tar.gz"), b"real artifact bytes").unwrap();
let report = empty_report("c0ffee");
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: None,
version_hint: "0.0.0-fixture",
},
)
.unwrap();
let ctx: PreservedDistContext =
serde_json::from_slice(&std::fs::read(dest.join("context.json")).unwrap()).unwrap();
let names: Vec<&str> = ctx.artifacts.iter().map(|a| a.name.as_str()).collect();
assert!(
!names.contains(&"artifacts.json"),
"artifacts.json must not appear as a preserved artifact (would dangle after rename): {names:?}"
);
assert!(
!names.contains(&"metadata.json"),
"metadata.json must not appear as a preserved artifact (would dangle after rename): {names:?}"
);
assert!(
names.contains(&"foo.tar.gz"),
"real artifacts must still be preserved: {names:?}"
);
}
#[test]
fn preserve_dist_tree_includes_makeself_per_arch_dirs() {
let src_root = TempDir::new().unwrap();
let dest_root = TempDir::new().unwrap();
let dist = src_root.path().join("dist");
std::fs::create_dir_all(dist.join("linux")).unwrap();
std::fs::write(
dist.join("linux")
.join("anodizer-0.3.0-linux-amd64-installer.run"),
b"shippable .run bytes",
)
.unwrap();
for arch in &["linux_amd64", "linux_arm64"] {
let stage_dir = dist.join("makeself").join("default").join(arch);
std::fs::create_dir_all(&stage_dir).unwrap();
std::fs::write(stage_dir.join("anodizer"), format!("staging-{}", arch)).unwrap();
std::fs::write(stage_dir.join("makeself-install.sh"), b"install").unwrap();
}
preserve_dist_tree(src_root.path(), dest_root.path())
.expect("preserve_dist_tree must succeed");
assert!(
dest_root
.path()
.join("linux/anodizer-0.3.0-linux-amd64-installer.run")
.exists(),
"shippable .run must survive preservation",
);
assert!(
dest_root
.path()
.join("makeself/default/linux_amd64/anodizer")
.exists(),
"makeself/linux_amd64/anodizer must be preserved",
);
assert!(
dest_root
.path()
.join("makeself/default/linux_arm64/anodizer")
.exists(),
"makeself/linux_arm64/anodizer must be preserved",
);
}
#[test]
fn multi_arch_round_trip_preserves_distinct_hashes_from_report() {
use super::super::artifacts::{discover_artifacts, hash_artifacts};
let wt = TempDir::new().unwrap();
let dest = TempDir::new().unwrap();
let dist = wt.path().join("dist");
std::fs::create_dir_all(dist.join("makeself/default/linux_amd64")).unwrap();
std::fs::create_dir_all(dist.join("makeself/default/linux_arm64")).unwrap();
std::fs::write(
dist.join("makeself/default/linux_amd64/anodizer"),
b"amd64-bytes-original",
)
.unwrap();
std::fs::write(
dist.join("makeself/default/linux_arm64/anodizer"),
b"arm64-bytes-original",
)
.unwrap();
let paths = discover_artifacts(wt.path()).unwrap();
let hash_map = hash_artifacts(wt.path(), &paths).unwrap();
let amd64_key = "makeself/default/linux_amd64/anodizer";
let arm64_key = "makeself/default/linux_arm64/anodizer";
let amd64_hash = hash_map[amd64_key].hash.clone();
let arm64_hash = hash_map[arm64_key].hash.clone();
assert_ne!(
amd64_hash, arm64_hash,
"fixture must produce distinct hashes"
);
let mut report = empty_report("e2e-commit");
for (key, info) in &hash_map {
report.artifacts.push(ArtifactRow {
name: key.clone(),
path: format!("dist/{}", key),
size_bytes: info.size_bytes,
stage: info.stage.clone(),
deterministic: true,
nondeterministic_reason: None,
hash: Some(info.hash.clone()),
hashes: vec![],
});
}
preserve_dist_tree(wt.path(), dest.path()).expect("preserve_dist_tree");
std::fs::write(
dest.path().join("makeself/default/linux_arm64/anodizer"),
b"arm64-bytes-MUTATED",
)
.unwrap();
write_preserved_dist_context(
dest.path(),
ContextInputs {
report: &report,
harness_targets: None,
version_hint: "0.0.0-fixture",
},
)
.expect("write_preserved_dist_context");
let ctx: PreservedDistContext =
serde_json::from_slice(&std::fs::read(dest.path().join("context.json")).unwrap())
.unwrap();
let amd64_entry = ctx
.artifacts
.iter()
.find(|a| a.path == amd64_key)
.unwrap_or_else(|| panic!("amd64 entry missing in {:?}", ctx.artifacts));
let arm64_entry = ctx
.artifacts
.iter()
.find(|a| a.path == arm64_key)
.unwrap_or_else(|| panic!("arm64 entry missing in {:?}", ctx.artifacts));
assert_eq!(
amd64_entry.sha256, amd64_hash,
"amd64 entry must carry the harness-recorded hash"
);
assert_eq!(
arm64_entry.sha256, arm64_hash,
"arm64 entry must carry the harness-recorded hash even after \
the bytes on disk were tampered with — proves the lookup hit \
the report instead of re-hashing"
);
}
#[test]
fn hash_file_streaming_handles_multi_chunk_files() {
let tmp = TempDir::new().unwrap();
let body = vec![0xAB_u8; 64 * 1024 + 1];
let p = tmp.path().join("big.bin");
std::fs::write(&p, &body).unwrap();
let (sha, size) = hash_file_streaming(&p).unwrap();
assert_eq!(size, body.len() as u64);
assert!(sha.starts_with("sha256:"));
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(&body);
assert_eq!(sha, format!("sha256:{:x}", h.finalize()));
}
}