use anyhow::{Context, Result};
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use super::artifacts::{ArtifactInfo, infer_stage_from_path};
pub(super) fn summarize_drift(
name: &str,
per_run_hashes: &[BTreeMap<String, ArtifactInfo>],
) -> Option<String> {
let samples: Vec<(&[u8], &[u8], u64)> = per_run_hashes
.iter()
.filter_map(|run| {
run.get(name).map(|info| {
(
info.head_sample.as_slice(),
info.tail_sample.as_slice(),
info.size_bytes,
)
})
})
.collect();
if samples.len() < 2 {
return None;
}
let (head0, tail0, size0) = samples[0];
if let Some((idx, head_n, offset)) =
samples
.iter()
.enumerate()
.skip(1)
.find_map(|(idx, &(head_n, _, _))| {
let common = head0.len().min(head_n.len());
(0..common)
.find(|&i| head0[i] != head_n[i])
.map(|off| (idx, head_n, off))
})
{
let base = format!(
"first diff at offset {:#x} (run0={:#04x}, run{idx}={:#04x})",
offset, head0[offset], head_n[offset]
);
return Some(append_text_diff(base, name, &samples, idx, offset));
}
if let Some((idx, head_n)) = samples
.iter()
.enumerate()
.skip(1)
.find_map(|(idx, &(head_n, _, _))| (head_n.len() != head0.len()).then_some((idx, head_n)))
{
return Some(format!(
"head samples differ in length: run0={} bytes, run{idx}={} bytes",
head0.len(),
head_n.len()
));
}
if !tail0.is_empty()
&& let Some((idx, tail_n, size_n, off_in_tail)) = samples
.iter()
.enumerate()
.skip(1)
.find_map(|(idx, &(_, tail_n, size_n))| {
if tail_n.is_empty() || size_n != size0 || tail_n.len() != tail0.len() {
return None;
}
(0..tail0.len())
.find(|&i| tail0[i] != tail_n[i])
.map(|off| (idx, tail_n, size_n, off))
})
{
let _ = size_n; let tail_start = (size0 as usize).saturating_sub(tail0.len());
let abs_offset = tail_start + off_in_tail;
let base = format!(
"tail diff at offset {:#x} (size {}, run0={:#04x}, run{idx}={:#04x})",
abs_offset, size0, tail0[off_in_tail], tail_n[off_in_tail]
);
return Some(append_text_diff(base, name, &samples, idx, abs_offset));
}
if let Some((idx, size_n)) = samples
.iter()
.enumerate()
.skip(1)
.find_map(|(idx, &(_, _, size_n))| (size_n != size0).then_some((idx, size_n)))
{
return Some(format!(
"no diff in first {} or last {} bytes; total size run0={} run{idx}={} \
(drift in un-sampled middle)",
head0.len(),
tail0.len(),
size0,
size_n
));
}
Some(format!(
"no diff in first {} or last {} bytes; sizes equal at {} bytes \
(drift in un-sampled middle)",
head0.len(),
tail0.len(),
size0
))
}
fn reconstruct_full(head: &[u8], tail: &[u8], size: u64) -> Option<Vec<u8>> {
let size = usize::try_from(size).ok()?;
if head.len() + tail.len() < size {
return None;
}
let tail_start = size.checked_sub(tail.len())?;
if tail_start > head.len() {
return None;
}
let mut out = Vec::with_capacity(size);
out.extend_from_slice(&head[..tail_start]);
out.extend_from_slice(tail);
Some(out)
}
fn looks_textual(name: &str, head: &[u8]) -> bool {
let lower = name.to_lowercase();
let ext_ok = lower.ends_with(".json")
|| lower.ends_with(".txt")
|| lower.ends_with(".yaml")
|| lower.ends_with(".yml")
|| lower.ends_with(".toml")
|| lower.ends_with(".csv")
|| lower.ends_with(".md");
ext_ok && !head.contains(&0)
}
fn line_at_offset(bytes: &[u8], abs_offset: usize) -> (usize, String) {
let clamped = abs_offset.min(bytes.len().saturating_sub(1));
let prefix = &bytes[..=clamped];
let line_start = prefix
.iter()
.rposition(|&b| b == b'\n')
.map(|p| p + 1)
.unwrap_or(0);
let line_end = bytes[line_start..]
.iter()
.position(|&b| b == b'\n')
.map(|p| line_start + p)
.unwrap_or(bytes.len());
let line_no = bytes[..line_start].iter().filter(|&&b| b == b'\n').count() + 1;
let s = String::from_utf8_lossy(&bytes[line_start..line_end]).into_owned();
(line_no, s)
}
fn append_text_diff(
base: String,
name: &str,
samples: &[(&[u8], &[u8], u64)],
idx: usize,
abs_offset: usize,
) -> String {
let (head0, tail0, size0) = samples[0];
let (head_n, tail_n, size_n) = samples[idx];
if !looks_textual(name, head0) {
return base;
}
let Some(bytes0) = reconstruct_full(head0, tail0, size0) else {
return base;
};
let Some(bytes_n) = reconstruct_full(head_n, tail_n, size_n) else {
return base;
};
let (ln0, line0) = line_at_offset(&bytes0, abs_offset);
let (ln_n, line_n) = line_at_offset(&bytes_n, abs_offset);
let line0 = truncate_for_summary(&line0, 240);
let line_n = truncate_for_summary(&line_n, 240);
format!(
"{base}\ntext drift detected:\n run0 line {ln0}: {line0}\n run{idx} line {ln_n}: {line_n}"
)
}
fn truncate_for_summary(s: &str, max: usize) -> String {
if s.chars().count() <= max {
return s.to_string();
}
let mut out: String = s.chars().take(max).collect();
out.push_str("...");
out
}
pub(super) fn pick_first_artifact_for_stage<'a>(
artifacts: &'a [PathBuf],
stage_name: &str,
) -> Option<&'a PathBuf> {
artifacts.iter().find(|p| {
let rel = p.to_string_lossy();
infer_stage_from_path(&rel) == stage_name
})
}
pub(super) fn inject_drift_byte(path: &Path) -> Result<()> {
use std::io::Write;
use std::sync::atomic::{AtomicU8, Ordering};
static DRIFT_BYTE_COUNTER: AtomicU8 = AtomicU8::new(1);
let byte = DRIFT_BYTE_COUNTER.fetch_add(1, Ordering::Relaxed);
let mut f = std::fs::OpenOptions::new()
.append(true)
.open(path)
.with_context(|| format!("opening {} for append", path.display()))?;
f.write_all(&[byte])
.with_context(|| format!("appending drift byte to {}", path.display()))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pick_first_artifact_for_stage_picks_first_by_inferred_stage() {
let artifacts = vec![
PathBuf::from("dist/checksums.txt"),
PathBuf::from("dist/foo.tar.gz"),
PathBuf::from("dist/bar.tar.gz"),
];
let pick = pick_first_artifact_for_stage(&artifacts, "archive").unwrap();
assert_eq!(pick, &PathBuf::from("dist/foo.tar.gz"));
let pick = pick_first_artifact_for_stage(&artifacts, "checksum").unwrap();
assert_eq!(pick, &PathBuf::from("dist/checksums.txt"));
}
#[test]
fn pick_first_artifact_for_stage_returns_none_for_missing_stage() {
let artifacts = vec![PathBuf::from("dist/foo.tar.gz")];
assert!(pick_first_artifact_for_stage(&artifacts, "sbom").is_none());
assert!(pick_first_artifact_for_stage(&artifacts, "bogus-stage").is_none());
}
fn samples_from_bytes(
name: &str,
bytes_per_run: &[&[u8]],
) -> Vec<BTreeMap<String, super::ArtifactInfo>> {
use super::super::artifacts::{HEAD_SAMPLE_BYTES, TAIL_SAMPLE_BYTES};
bytes_per_run
.iter()
.map(|bytes| {
let head_len = bytes.len().min(HEAD_SAMPLE_BYTES);
let head_sample = bytes[..head_len].to_vec();
let tail_sample: Vec<u8> = if bytes.len() <= HEAD_SAMPLE_BYTES {
Vec::new()
} else {
let tail_start = bytes
.len()
.saturating_sub(TAIL_SAMPLE_BYTES)
.max(HEAD_SAMPLE_BYTES);
bytes[tail_start..].to_vec()
};
let mut map = BTreeMap::new();
map.insert(
name.to_string(),
super::ArtifactInfo {
hash: "sha256:fixture".into(),
size_bytes: bytes.len() as u64,
relative_path: format!("dist/{name}"),
stage: "unknown".into(),
head_sample,
tail_sample,
},
);
map
})
.collect()
}
#[test]
fn samples_from_bytes_mid_size_files_have_no_unsampled_gap() {
use super::super::artifacts::{HEAD_SAMPLE_BYTES, TAIL_SAMPLE_BYTES};
let mut payload = vec![0u8; 24 * 1024];
for (i, byte) in payload.iter_mut().enumerate() {
*byte = (i & 0xff) as u8;
}
let samples = samples_from_bytes("artifacts.json", &[&payload]);
let info = samples[0].get("artifacts.json").unwrap();
assert_eq!(info.head_sample.len(), HEAD_SAMPLE_BYTES);
assert!(
!info.tail_sample.is_empty(),
"tail must be non-empty for mid-size files; got {} bytes",
info.tail_sample.len()
);
let tail_start = payload.len() - info.tail_sample.len();
assert_eq!(
tail_start, HEAD_SAMPLE_BYTES,
"tail must start exactly where head ends to close the gap"
);
let _ = TAIL_SAMPLE_BYTES; }
#[test]
fn summarize_drift_localizes_mid_size_file_drift() {
let mut run0 = vec![0xaau8; 20 * 1024];
let mut run1 = run0.clone();
run0[18_000] = 0x11;
run1[18_000] = 0x22;
let samples = samples_from_bytes("artifacts.json", &[&run0, &run1]);
let summary = summarize_drift("artifacts.json", &samples).unwrap();
assert!(
summary.contains("offset 0x4650"),
"expected absolute offset 0x4650 (=18000), got: {summary}"
);
assert!(summary.contains("0x11"), "got: {summary}");
assert!(summary.contains("0x22"), "got: {summary}");
}
#[test]
fn summarize_drift_emits_text_diff_for_json_artifacts() {
let run0 = br#"{
"name": "anodize",
"size": 119,
"kind": "Signature"
}"#;
let run1 = br#"{
"name": "anodize",
"size": 120,
"kind": "Signature"
}"#;
let samples = samples_from_bytes("artifacts.json", &[run0.as_slice(), run1.as_slice()]);
let summary = summarize_drift("artifacts.json", &samples).unwrap();
assert!(
summary.contains("text drift detected"),
"expected text-diff block; got: {summary}"
);
assert!(
summary.contains("\"size\": 119"),
"expected run0 line content; got: {summary}"
);
assert!(
summary.contains("\"size\": 120"),
"expected run1 line content; got: {summary}"
);
}
#[test]
fn summarize_drift_skips_text_diff_for_binary_artifacts() {
let mut run0 = vec![0u8; 256];
let mut run1 = run0.clone();
run0[100] = 0x11;
run1[100] = 0x22;
let samples = samples_from_bytes("anodize.exe", &[&run0, &run1]);
let summary = summarize_drift("anodize.exe", &samples).unwrap();
assert!(
!summary.contains("text drift detected"),
"binary artifact must not trigger text diff; got: {summary}"
);
assert!(
summary.contains("offset 0x64"),
"byte-offset summary still required; got: {summary}"
);
}
#[test]
fn summarize_drift_tail_diff_reports_absolute_offset() {
use super::super::artifacts::{HEAD_SAMPLE_BYTES, TAIL_SAMPLE_BYTES};
let size = HEAD_SAMPLE_BYTES + TAIL_SAMPLE_BYTES + 8 * 1024;
let mut run0 = vec![0x55u8; size];
let mut run1 = run0.clone();
run0[size - 1] = 0xaa;
run1[size - 1] = 0xbb;
let samples = samples_from_bytes("big.bin", &[&run0, &run1]);
let summary = summarize_drift("big.bin", &samples).unwrap();
let expected_offset = format!("{:#x}", size - 1);
assert!(
summary.contains(&expected_offset),
"expected absolute offset {expected_offset}; got: {summary}"
);
assert!(
!summary.contains("from end"),
"tail summary must no longer use from-end coordinates; got: {summary}"
);
}
#[test]
fn inject_drift_byte_mutates_file_so_hash_differs() {
use sha2::{Digest, Sha256};
let tmp = tempfile::tempdir().unwrap();
let p = tmp.path().join("victim.bin");
std::fs::write(&p, b"hello world").unwrap();
let before = {
let mut h = Sha256::new();
h.update(std::fs::read(&p).unwrap());
format!("{:x}", h.finalize())
};
inject_drift_byte(&p).expect("inject");
let after_bytes = std::fs::read(&p).unwrap();
let after = {
let mut h = Sha256::new();
h.update(&after_bytes);
format!("{:x}", h.finalize())
};
assert_ne!(before, after, "hash must change after drift injection");
assert_eq!(
after_bytes.len(),
b"hello world".len() + 1,
"exactly one byte must be appended"
);
}
}