use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use anyhow::{anyhow, Context, Result};
use crate::provenance::{blake3_file, CommandCapture, FileHash, RunManifest};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Verdict {
Reproduced,
Diverged,
Inconclusive,
}
impl Verdict {
pub fn exit_code(self) -> i32 {
match self {
Verdict::Reproduced => 0,
Verdict::Diverged => 6,
Verdict::Inconclusive => 7,
}
}
pub fn label(self) -> &'static str {
match self {
Verdict::Reproduced => "REPRODUCED",
Verdict::Diverged => "DIVERGED",
Verdict::Inconclusive => "INCONCLUSIVE",
}
}
}
#[derive(Debug)]
pub struct Outcome {
pub verdict: Verdict,
pub diffs: Vec<String>,
}
pub fn classify_outputs(recorded: &[FileHash], produced: &[(String, String)]) -> Outcome {
let mut diffs = Vec::new();
for (i, rec) in recorded.iter().enumerate() {
match produced.get(i) {
Some((_, got)) if *got == rec.blake3 => {}
Some((role, got)) => {
diffs.push(format!("output {role}: recorded {} got {got}", rec.blake3))
}
None => diffs.push(format!(
"output {i}: recorded {} but not produced",
rec.blake3
)),
}
}
let verdict = if diffs.is_empty() {
Verdict::Reproduced
} else {
Verdict::Diverged
};
Outcome { verdict, diffs }
}
#[derive(Debug, Clone)]
pub struct OutputCmp {
pub role: String,
pub recorded: String,
pub observed: String,
pub matched: bool,
}
#[derive(Debug, Clone, Default)]
pub struct ResourceHere {
pub peak_rss_bytes: Option<u64>,
pub declared_budget_mb: Option<u64>,
pub fit: Option<bool>,
}
#[derive(Debug)]
pub struct ReproReport {
pub verdict_label: String,
pub exit_code: i32,
pub lines: Vec<String>,
pub parent_claim: String,
pub parent_subcommand: String,
pub outputs: Vec<OutputCmp>,
pub resource_here: ResourceHere,
pub reproducer_code: BTreeMap<String, String>,
pub compared: bool,
}
fn output_is_text(path: &str) -> bool {
let p = path.to_ascii_lowercase();
p.ends_with(".vcf") || p.ends_with(".tsv") || p.ends_with(".txt") || p.ends_with(".gvcf")
}
fn index_inputs(inputs_dir: &Path) -> Result<BTreeMap<String, PathBuf>> {
let mut idx = BTreeMap::new();
let rd = std::fs::read_dir(inputs_dir)
.with_context(|| format!("failed to read --inputs dir {}", inputs_dir.display()))?;
for entry in rd {
let p = entry?.path();
if p.is_file() {
if let Ok(h) = blake3_file(&p) {
idx.entry(h).or_insert(p);
}
}
}
Ok(idx)
}
fn make_temp_dir() -> Result<PathBuf> {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let d = std::env::temp_dir().join(format!("rosalind-reproduce-{}-{n}", std::process::id()));
std::fs::create_dir_all(&d)
.with_context(|| format!("failed to create temp dir {}", d.display()))?;
Ok(d)
}
fn current_build_identity() -> BTreeMap<String, String> {
let mut m = RunManifest::new("reproduce-probe");
m.finalize();
let mut out = BTreeMap::new();
for k in [
"code_git_sha",
"code_dirty",
"rustc_version",
"target_triple",
"deps_lock_blake3",
] {
if let Some(v) = m.params.get(k) {
out.insert(k.to_string(), v.clone());
}
}
out
}
fn read_rerun_peak(output_temp: &Path) -> Option<u64> {
let mut name = output_temp.as_os_str().to_os_string();
name.push(".manifest.json");
let text = std::fs::read_to_string(PathBuf::from(name)).ok()?;
let m = RunManifest::from_canonical_json(&text).ok()?;
m.get_recorded("peak_rss_bytes")
.and_then(|v| v.parse().ok())
}
pub fn reproduce(manifest_path: &Path, inputs_dir: &Path) -> Result<ReproReport> {
let text = std::fs::read_to_string(manifest_path)
.with_context(|| format!("failed to read receipt {}", manifest_path.display()))?;
let manifest = RunManifest::from_canonical_json(&text)
.map_err(|e| anyhow!("malformed receipt {}: {e}", manifest_path.display()))?;
let parent_claim = manifest.content_hash();
let parent_subcommand = manifest.subcommand.clone();
let reproducer_code = current_build_identity();
let report =
|verdict_label: &str, exit_code: i32, lines: Vec<String>, compared: bool| ReproReport {
verdict_label: verdict_label.to_string(),
exit_code,
lines,
parent_claim: parent_claim.clone(),
parent_subcommand: parent_subcommand.clone(),
outputs: Vec::new(),
resource_here: ResourceHere::default(),
reproducer_code: reproducer_code.clone(),
compared,
};
if manifest.self_hash_ok() == Some(false) || manifest.measurement_hash_ok() == Some(false) {
return Ok(report(
"TAMPERED",
5,
vec![
" the receipt's self-hash does not match — it was modified after it was written"
.to_string(),
" VERDICT : TAMPERED".to_string(),
],
false,
));
}
let command = match manifest.params.get("command") {
Some(c) => c.clone(),
None => {
return Ok(report(
"INCONCLUSIVE",
7,
vec![
" pre-schema-5 receipt: no recorded command to replay".to_string(),
" VERDICT : INCONCLUSIVE".to_string(),
],
false,
))
}
};
if manifest.outputs.is_empty() {
return Ok(report(
"INCONCLUSIVE",
7,
vec![
" receipt records no outputs to compare (e.g. a stdout run)".to_string(),
" VERDICT : INCONCLUSIVE".to_string(),
],
false,
));
}
if let Some(o) = manifest.outputs.iter().find(|o| !output_is_text(&o.path)) {
return Ok(report(
"INCONCLUSIVE",
7,
vec![
format!(
" output {} is not byte-comparable in v1 (BAM/bgzf rests on a C zlib outside the contract)",
o.path
),
" VERDICT : INCONCLUSIVE".to_string(),
],
false,
));
}
let index = index_inputs(inputs_dir)?;
let located = index.len();
let locate = |h: &str| index.get(h).map(|p| p.display().to_string());
let work = make_temp_dir()?;
let mut out_temp: BTreeMap<String, PathBuf> = BTreeMap::new();
for (i, o) in manifest.outputs.iter().enumerate() {
let ext = Path::new(&o.path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("out");
out_temp.insert(o.blake3.clone(), work.join(format!("out_{i}.{ext}")));
}
let temp_output = |h: &str| {
out_temp
.get(h)
.map(|p| p.display().to_string())
.unwrap_or_else(|| work.join("out_unknown").display().to_string())
};
let argv = match CommandCapture::argv_from_command(&command, &locate, &temp_output) {
Ok(a) => a,
Err(e) => {
return Ok(report(
"INCONCLUSIVE",
7,
vec![
format!(" cannot reproduce: {e}"),
" (pass --inputs pointing at a directory holding the recorded files)"
.to_string(),
" VERDICT : INCONCLUSIVE".to_string(),
],
false,
))
}
};
let exe = std::env::current_exe().context("locating the rosalind binary to re-run")?;
let child = std::process::Command::new(&exe)
.args(&argv)
.output()
.context("re-running the recorded command")?;
if !child.status.success() {
return Ok(report(
"INCONCLUSIVE",
7,
vec![
format!(
" the re-run did not complete cleanly (exit {:?})",
child.status.code()
),
format!(
" stderr: {}",
String::from_utf8_lossy(&child.stderr).trim()
),
" VERDICT : INCONCLUSIVE".to_string(),
],
false,
));
}
let mut produced: Vec<(String, String)> = Vec::new();
let mut cmps: Vec<OutputCmp> = Vec::new();
for (i, o) in manifest.outputs.iter().enumerate() {
let temp = out_temp
.get(&o.blake3)
.expect("temp path per recorded output");
let observed = blake3_file(temp).with_context(|| {
format!(
"the re-run did not produce the expected output {}",
temp.display()
)
})?;
produced.push((format!("output[{i}]"), observed.clone()));
cmps.push(OutputCmp {
role: format!("output[{i}]"),
recorded: o.blake3.clone(),
observed: observed.clone(),
matched: observed == o.blake3,
});
}
let outcome = classify_outputs(&manifest.outputs, &produced);
let declared_budget_mb = manifest
.get_recorded("memory_budget_mb")
.and_then(|v| v.parse().ok());
let peak_here = manifest
.outputs
.first()
.and_then(|o| out_temp.get(&o.blake3))
.and_then(|p| read_rerun_peak(p));
let fit = match (peak_here, declared_budget_mb) {
(Some(peak), Some(mb)) => Some(crate::core::MemoryBudget::from_mb(mb).admits(peak)),
_ => None,
};
let resource_here = ResourceHere {
peak_rss_bytes: peak_here,
declared_budget_mb,
fit,
};
let recorded_sha = manifest.params.get("code_git_sha").map(String::as_str);
let here_sha = reproducer_code.get("code_git_sha").map(String::as_str);
let code_line = match (recorded_sha, here_sha) {
(Some(r), Some(h)) if r == h => format!(" code : git {} — matches", short(h)),
(Some(r), Some(h)) => format!(
" code : git {} — DIFFERS from recorded {} (byte-match still counts)",
short(h),
short(r)
),
_ => " code : (build-identity unavailable)".to_string(),
};
let mut lines = vec![
format!(" claim : {} (re-derived)", short(&parent_claim)),
code_line,
format!(" inputs : {located} file(s) indexed by content hash"),
];
for c in &cmps {
if c.matched {
lines.push(format!(
" output : {} OK byte-identical (blake3 {})",
c.role,
short(&c.observed)
));
} else {
lines.push(format!(
" output : {} DIVERGED (recorded {} got {})",
c.role,
short(&c.recorded),
short(&c.observed)
));
}
}
if let (Some(peak), Some(mb)) = (
resource_here.peak_rss_bytes,
resource_here.declared_budget_mb,
) {
lines.push(format!(
" resource : peak {} MiB vs declared {} MiB (here: this machine)",
peak / (1 << 20),
mb
));
}
lines.push(format!(" VERDICT : {}", outcome.verdict.label()));
std::fs::remove_dir_all(&work).ok();
Ok(ReproReport {
verdict_label: outcome.verdict.label().to_string(),
exit_code: outcome.verdict.exit_code(),
lines,
parent_claim,
parent_subcommand,
outputs: cmps,
resource_here,
reproducer_code,
compared: true,
})
}
fn short(hex: &str) -> &str {
&hex[..hex.len().min(10)]
}
#[cfg(test)]
mod tests {
use super::*;
use crate::provenance::FileHash;
fn fh(hash: &str) -> FileHash {
FileHash {
path: "x".into(),
blake3: hash.into(),
}
}
#[test]
fn classify_reproduced_when_all_outputs_match() {
let recorded = vec![fh("h1")];
let produced = vec![("o0".to_string(), "h1".to_string())];
let v = classify_outputs(&recorded, &produced);
assert!(matches!(v.verdict, Verdict::Reproduced));
assert!(v.diffs.is_empty());
}
#[test]
fn classify_diverged_names_the_first_mismatch() {
let recorded = vec![fh("h1")];
let produced = vec![("o0".to_string(), "DIFFERENT".to_string())];
let v = classify_outputs(&recorded, &produced);
assert!(matches!(v.verdict, Verdict::Diverged));
assert!(v
.diffs
.iter()
.any(|d| d.contains("h1") && d.contains("DIFFERENT")));
}
#[test]
fn exit_codes_are_stable() {
assert_eq!(Verdict::Reproduced.exit_code(), 0);
assert_eq!(Verdict::Diverged.exit_code(), 6);
assert_eq!(Verdict::Inconclusive.exit_code(), 7);
}
}