mod artifacts;
mod drift;
mod env;
mod preserve;
use anodizer_core::git::worktree::Worktree;
use anodizer_core::harness_signing::EphemeralSigningKeys;
use anodizer_core::{AllowList, ArtifactRow, CURRENT_SCHEMA_VERSION, DeterminismReport, DriftRow};
use anyhow::{Context, Result};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use artifacts::{
ArtifactInfo, copy_artifacts_to_dump, discover_artifacts, hash_artifacts, prune_dump_to_drifted,
};
use drift::{inject_drift_byte, pick_first_artifact_for_stage, summarize_drift};
use env::{BuildSubprocessEnv, build_subprocess_env};
use preserve::{
ContextInputs, preserve_dist_tree, remove_preserved_on_drift, write_preserved_dist_context,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StageId {
Build,
Source,
Upx,
Archive,
Nfpm,
Makeself,
Snapcraft,
Sbom,
Sign,
Checksum,
}
impl StageId {
pub fn as_str(self) -> &'static str {
match self {
StageId::Build => "build",
StageId::Source => "source",
StageId::Upx => "upx",
StageId::Archive => "archive",
StageId::Nfpm => "nfpm",
StageId::Makeself => "makeself",
StageId::Snapcraft => "snapcraft",
StageId::Sbom => "sbom",
StageId::Sign => "sign",
StageId::Checksum => "checksum",
}
}
}
const PRESERVE_SET: &[&str] = &["validate", "before", "templatefiles"];
fn compute_extra_skip(requested: &[StageId]) -> Vec<String> {
use anodizer_core::context::VALID_RELEASE_SKIPS;
use anodizer_core::determinism_runner::SIDE_EFFECT_STAGES;
let requested_names: BTreeSet<&str> = requested.iter().map(|s| s.as_str()).collect();
VALID_RELEASE_SKIPS
.iter()
.copied()
.filter(|name| !requested_names.contains(name))
.filter(|name| !PRESERVE_SET.contains(name))
.filter(|name| !SIDE_EFFECT_STAGES.contains(name))
.map(str::to_string)
.collect()
}
fn matches_artifact_pattern(pattern: &str, artifact: &str) -> bool {
if let Some(suffix) = pattern.strip_prefix('*') {
return artifact.ends_with(suffix);
}
pattern == artifact
}
pub struct Harness {
pub repo_root: PathBuf,
pub commit: String,
pub stages: Vec<StageId>,
pub runs: u32,
pub sde: i64,
pub allowlist: AllowList,
pub report_path: PathBuf,
pub inject_drift: Option<String>,
pub targets: Option<Vec<String>>,
pub preserve_dist: Option<PathBuf>,
pub version_hint: String,
pub child_snapshot: bool,
}
impl Harness {
pub fn run(&self) -> Result<DeterminismReport> {
let mut per_run_hashes: Vec<BTreeMap<String, ArtifactInfo>> =
Vec::with_capacity(self.runs as usize);
let skip_sign_for_preserve = self.preserve_dist.is_some()
&& (std::env::var_os("COSIGN_KEY").is_some()
|| std::env::var_os("GPG_PRIVATE_KEY").is_some());
let effective_stages: Vec<StageId> = if skip_sign_for_preserve {
self.stages
.iter()
.copied()
.filter(|s| *s != StageId::Sign)
.collect()
} else {
self.stages.clone()
};
let signing_keys: Option<EphemeralSigningKeys> =
if effective_stages.contains(&StageId::Sign) {
Some(anodizer_core::harness_signing::provision_ephemeral_keys(
self.sde,
)?)
} else {
None
};
let worktree_root = std::env::var_os("RUNNER_TEMP")
.map(std::path::PathBuf::from)
.unwrap_or_else(|| self.repo_root.join(".det-worktrees"));
let _ = std::fs::create_dir_all(&worktree_root);
let worktree_path =
worktree_root.join(format!("anodize-determinism-{}", std::process::id()));
for run_idx in 0..self.runs {
let _ = std::fs::remove_dir_all(&worktree_path);
let worktree = Worktree::add(&self.repo_root, &worktree_path, &self.commit)
.with_context(|| format!("creating worktree for determinism run {}", run_idx))?;
let env = self.build_isolated_env(&worktree, signing_keys.as_ref())?;
self.run_build_pipeline(worktree.path(), &env, &effective_stages)
.with_context(|| format!("building pipeline for determinism run {}", run_idx))?;
let artifacts = discover_artifacts(worktree.path())?;
if let Some(stage) = self.inject_drift.as_deref() {
match pick_first_artifact_for_stage(&artifacts, stage) {
Some(victim) => {
inject_drift_byte(victim).with_context(|| {
format!(
"injecting drift byte into {} on run {}",
victim.display(),
run_idx
)
})?;
}
None => {
let summary: Vec<String> = artifacts
.iter()
.map(|p| {
let s = p.to_string_lossy();
format!(
" {} -> {}",
p.display(),
artifacts::infer_stage_from_path(&s)
)
})
.collect();
eprintln!(
"warn: --inject-drift={} matched no artifact on run {}; \
discovered artifacts ({}):\n{}",
stage,
run_idx,
artifacts.len(),
summary.join("\n")
);
}
}
}
per_run_hashes.push(hash_artifacts(worktree.path(), &artifacts)?);
if let Some(parent) = self.report_path.parent() {
let dump_root = parent.join("drift-bins").join(format!("run-{}", run_idx));
copy_artifacts_to_dump(worktree.path(), &artifacts, &dump_root).with_context(
|| {
format!(
"dumping artifacts to {} for determinism run {}",
dump_root.display(),
run_idx
)
},
)?;
}
if run_idx == 0
&& let Some(dest) = self.preserve_dist.as_ref()
{
preserve_dist_tree(worktree.path(), dest).with_context(|| {
format!(
"preserving run-0 dist tree from {} to {}",
worktree.path().join("dist").display(),
dest.display()
)
})?;
}
}
let report = self.build_report(per_run_hashes);
if let Some(parent) = self.report_path.parent() {
prune_dump_to_drifted(&parent.join("drift-bins"), &report);
}
if let Some(dest) = self.preserve_dist.as_ref() {
if report.drift_count > 0 {
remove_preserved_on_drift(dest);
} else {
write_preserved_dist_context(
dest,
ContextInputs {
report: &report,
harness_targets: self.targets.as_deref(),
version_hint: &self.version_hint,
},
)
.with_context(|| {
format!(
"writing context.json under preserved dist {}",
dest.display()
)
})?;
}
}
Ok(report)
}
fn build_isolated_env(
&self,
worktree: &Worktree,
signing_keys: Option<&EphemeralSigningKeys>,
) -> Result<HashMap<String, String>> {
let tmpdir = worktree.path().join(".det-tmp");
std::fs::create_dir_all(&tmpdir)?;
let cargo_home = tmpdir.join("cargo");
let cargo_target = tmpdir.join("target");
let home_dir = tmpdir.join("home");
std::fs::create_dir_all(&cargo_home)?;
std::fs::create_dir_all(&home_dir)?;
Ok(build_subprocess_env(&BuildSubprocessEnv {
cargo_home: &cargo_home,
cargo_target: &cargo_target,
tmpdir: &tmpdir,
home_dir: &home_dir,
sde: self.sde,
worktree: worktree.path(),
signing_keys,
}))
}
fn run_build_pipeline(
&self,
worktree_path: &Path,
env: &HashMap<String, String>,
effective_stages: &[StageId],
) -> Result<()> {
let exe = anodizer_core::determinism_runner::current_anodize_binary()?;
let extra_skip = compute_extra_skip(effective_stages);
anodizer_core::determinism_runner::run_build_pipeline_subprocess(
&exe,
worktree_path,
env,
self.targets.as_deref(),
&extra_skip,
self.child_snapshot,
)
}
fn build_report(
&self,
per_run_hashes: Vec<BTreeMap<String, ArtifactInfo>>,
) -> DeterminismReport {
let mut all_names: BTreeSet<String> = BTreeSet::new();
for run in &per_run_hashes {
for name in run.keys() {
all_names.insert(name.clone());
}
}
let mut artifacts: Vec<ArtifactRow> = Vec::new();
let mut drift: Vec<DriftRow> = Vec::new();
let mut drift_count: u32 = 0;
for name in &all_names {
let mut hashes: Vec<String> = Vec::with_capacity(per_run_hashes.len());
let mut last_info: Option<&ArtifactInfo> = None;
for run in &per_run_hashes {
match run.get(name) {
Some(info) => {
hashes.push(info.hash.clone());
last_info = Some(info);
}
None => hashes.push("<missing>".into()),
}
}
let info = last_info.expect("artifact name came from union of run maps");
let all_equal =
hashes.iter().all(|h| h == &hashes[0]) && !hashes.iter().any(|h| h == "<missing>");
let signed_artifact_drift = !all_equal && info.stage == "sign";
let allow_reason = self.resolve_allow_reason(name).or_else(|| {
if signed_artifact_drift {
Some(
"signed artifact: signature bytes vary by signer \
(cosign ECDSA random nonce); validate via \
`cosign verify-blob` / `gpg --verify`"
.into(),
)
} else {
None
}
});
if all_equal {
artifacts.push(ArtifactRow {
name: name.clone(),
path: info.relative_path.clone(),
size_bytes: info.size_bytes,
stage: info.stage.clone(),
deterministic: true,
nondeterministic_reason: allow_reason.clone(),
hash: Some(hashes[0].clone()),
hashes: vec![],
});
} else {
artifacts.push(ArtifactRow {
name: name.clone(),
path: info.relative_path.clone(),
size_bytes: info.size_bytes,
stage: info.stage.clone(),
deterministic: false,
nondeterministic_reason: allow_reason.clone(),
hash: None,
hashes: hashes.clone(),
});
if allow_reason.is_none() {
let summary = summarize_drift(name, &per_run_hashes);
drift.push(DriftRow {
artifact: name.clone(),
hashes,
differing_bytes_summary: summary,
});
drift_count += 1;
}
}
}
DeterminismReport {
schema_version: CURRENT_SCHEMA_VERSION,
anodize_version: env!("CARGO_PKG_VERSION").into(),
commit: self.commit.clone(),
commit_timestamp: self.sde,
runs: self.runs,
stages_under_test: self.stages.iter().map(|s| s.as_str().into()).collect(),
allowlist: self.allowlist.clone(),
artifacts,
drift,
drift_count,
}
}
fn resolve_allow_reason(&self, artifact_name: &str) -> Option<String> {
for entry in &self.allowlist.compile_time {
if matches_artifact_pattern(&entry.artifact, artifact_name) {
return Some(entry.reason.clone());
}
}
for entry in &self.allowlist.runtime {
if matches_artifact_pattern(&entry.artifact, artifact_name) {
return Some(entry.reason.clone());
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::artifacts::{HEAD_SAMPLE_BYTES, TAIL_SAMPLE_BYTES, infer_stage_from_path};
use super::*;
use anodizer_core::AllowListEntry;
fn empty_harness() -> Harness {
Harness {
repo_root: PathBuf::from("/tmp/unused"),
commit: "deadbeef".into(),
stages: vec![StageId::Archive, StageId::Checksum],
runs: 2,
sde: 1_715_000_000,
allowlist: AllowList::default(),
report_path: PathBuf::from("/tmp/unused/report.json"),
inject_drift: None,
targets: None,
preserve_dist: None,
version_hint: String::new(),
child_snapshot: true,
}
}
fn run_with_files(
h: &Harness,
runs: Vec<Vec<(&str, &[u8])>>,
) -> Vec<BTreeMap<String, ArtifactInfo>> {
let _ = h;
runs.into_iter()
.map(|files| {
let mut map = BTreeMap::new();
for (name, bytes) in files {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(bytes);
let digest = format!("sha256:{:x}", hasher.finalize());
let head_len = bytes.len().min(HEAD_SAMPLE_BYTES);
let tail_sample = if bytes.len() > HEAD_SAMPLE_BYTES + TAIL_SAMPLE_BYTES {
bytes[bytes.len() - TAIL_SAMPLE_BYTES..].to_vec()
} else {
Vec::new()
};
map.insert(
name.into(),
ArtifactInfo {
hash: digest,
size_bytes: bytes.len() as u64,
relative_path: format!("dist/{}", name),
stage: infer_stage_from_path(name),
head_sample: bytes[..head_len].to_vec(),
tail_sample,
},
);
}
map
})
.collect()
}
#[test]
fn harness_report_shape_serializes_correctly() {
let h = empty_harness();
let runs = run_with_files(
&h,
vec![
vec![("anodizer_0.2.1.tar.gz", b"hello")],
vec![("anodizer_0.2.1.tar.gz", b"hello")],
],
);
let report = h.build_report(runs);
assert_eq!(report.schema_version, 1);
assert_eq!(report.runs, 2);
assert_eq!(report.commit, "deadbeef");
assert_eq!(report.stages_under_test, vec!["archive", "checksum"]);
assert_eq!(report.drift_count, 0);
assert_eq!(report.artifacts.len(), 1);
assert!(report.artifacts[0].deterministic);
assert!(report.artifacts[0].hash.is_some());
assert!(report.artifacts[0].hashes.is_empty());
let s = serde_json::to_string_pretty(&report).unwrap();
let back: DeterminismReport = serde_json::from_str(&s).unwrap();
assert_eq!(back, report);
}
#[test]
fn harness_diffs_artifacts_by_sha256() {
let h = empty_harness();
let runs = run_with_files(
&h,
vec![
vec![("stable.tar.gz", b"hello"), ("drifting.tar.gz", b"first")],
vec![("stable.tar.gz", b"hello"), ("drifting.tar.gz", b"second")],
],
);
let report = h.build_report(runs);
assert_eq!(report.drift_count, 1);
assert_eq!(report.drift.len(), 1);
assert_eq!(report.drift[0].artifact, "drifting.tar.gz");
assert_eq!(report.drift[0].hashes.len(), 2);
assert_ne!(report.drift[0].hashes[0], report.drift[0].hashes[1]);
let summary = report.drift[0]
.differing_bytes_summary
.as_deref()
.expect("drift row must populate differing_bytes_summary");
assert!(
summary.contains("offset 0x0"),
"summary should point at byte 0 for diverging single-byte prefixes. got={summary}"
);
let stable = report
.artifacts
.iter()
.find(|a| a.name == "stable.tar.gz")
.unwrap();
let drifting = report
.artifacts
.iter()
.find(|a| a.name == "drifting.tar.gz")
.unwrap();
assert!(stable.deterministic);
assert!(!drifting.deterministic);
assert!(drifting.hash.is_none());
assert_eq!(drifting.hashes.len(), 2);
}
#[test]
fn harness_excludes_allowlisted_artifacts_from_drift() {
let mut h = empty_harness();
h.allowlist.compile_time.push(AllowListEntry {
artifact: "*.crate".into(),
reason: "cargo package non-determinism".into(),
});
let runs = run_with_files(
&h,
vec![
vec![("anodizer-0.2.1.crate", b"crate-bytes-A")],
vec![("anodizer-0.2.1.crate", b"crate-bytes-B")],
],
);
let report = h.build_report(runs);
assert_eq!(
report.drift_count, 0,
"allowlisted artifact must not bump drift_count"
);
let row = &report.artifacts[0];
assert_eq!(row.name, "anodizer-0.2.1.crate");
assert!(!row.deterministic);
assert_eq!(
row.nondeterministic_reason.as_deref(),
Some("cargo package non-determinism")
);
assert_eq!(row.hashes.len(), 2);
}
#[test]
fn harness_treats_missing_artifact_in_one_run_as_drift() {
let h = empty_harness();
let runs = run_with_files(&h, vec![vec![("only-in-run-1.tar.gz", b"present")], vec![]]);
let report = h.build_report(runs);
assert_eq!(report.drift_count, 1);
assert_eq!(report.drift[0].artifact, "only-in-run-1.tar.gz");
assert!(report.drift[0].hashes.iter().any(|h| h == "<missing>"));
}
#[test]
fn matches_artifact_pattern_handles_glob_and_exact() {
assert!(matches_artifact_pattern("*.crate", "foo.crate"));
assert!(!matches_artifact_pattern("*.crate", "foo.tar.gz"));
assert!(matches_artifact_pattern("exact.bin", "exact.bin"));
assert!(!matches_artifact_pattern("exact.bin", "other.bin"));
}
#[test]
fn stage_id_round_trips_to_string() {
assert_eq!(StageId::Build.as_str(), "build");
assert_eq!(StageId::Archive.as_str(), "archive");
assert_eq!(StageId::Sbom.as_str(), "sbom");
assert_eq!(StageId::Sign.as_str(), "sign");
assert_eq!(StageId::Checksum.as_str(), "checksum");
}
#[test]
fn harness_extra_skip_with_default_stages_includes_nfpm() {
let stages = vec![
StageId::Build,
StageId::Archive,
StageId::Sbom,
StageId::Sign,
StageId::Checksum,
];
let extra = compute_extra_skip(&stages);
for name in [
"nfpm",
"nsis",
"msi",
"dmg",
"pkg",
"snapcraft",
"source",
"flatpak",
"appbundle",
"srpm",
"upx",
"makeself",
"notarize",
] {
assert!(
extra.iter().any(|s| s == name),
"compute_extra_skip(default-stages) missing `{name}`: {extra:?}"
);
}
}
#[test]
fn harness_extra_skip_omits_preserve_set() {
let stages = vec![StageId::Build, StageId::Archive];
let extra = compute_extra_skip(&stages);
for name in PRESERVE_SET {
assert!(
!extra.iter().any(|s| s == name),
"compute_extra_skip emitted PRESERVE_SET stage `{name}`: {extra:?}"
);
}
}
#[test]
fn harness_extra_skip_includes_changelog() {
let stages = vec![StageId::Build, StageId::Archive];
let extra = compute_extra_skip(&stages);
assert!(
extra.iter().any(|s| s == "changelog"),
"compute_extra_skip missing `changelog`: {extra:?}"
);
}
#[test]
fn harness_extra_skip_omits_requested_stages() {
let stages = vec![StageId::Build, StageId::Archive, StageId::Sign];
let extra = compute_extra_skip(&stages);
for name in ["build", "archive", "sign"] {
assert!(
!extra.iter().any(|s| s == name),
"compute_extra_skip dropped requested stage `{name}`: {extra:?}"
);
}
}
#[test]
fn harness_extra_skip_excludes_side_effect_stages() {
use anodizer_core::determinism_runner::SIDE_EFFECT_STAGES;
let stages = vec![StageId::Build];
let extra = compute_extra_skip(&stages);
for &name in SIDE_EFFECT_STAGES {
assert!(
!extra.iter().any(|s| s == name),
"compute_extra_skip double-listed side-effect stage `{name}`: {extra:?}"
);
}
}
#[test]
fn report_drift_count_matches_drift_array_len() {
let h = empty_harness();
let runs = run_with_files(
&h,
vec![
vec![("a.tar.gz", b"x"), ("b.tar.gz", b"y"), ("c.tar.gz", b"z")],
vec![
("a.tar.gz", b"x"),
("b.tar.gz", b"y-different"),
("c.tar.gz", b"z-different"),
],
],
);
let report = h.build_report(runs);
assert_eq!(report.drift.len() as u32, report.drift_count);
assert_eq!(report.drift_count, 2);
}
}