use crate::normalize::{compute_hash, sha256_hex};
use crate::path::ArtifactPath;
use crate::record::ArtifactRecord;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::process::Command;
use walkdir::WalkDir;
#[derive(Debug, Clone)]
pub struct VerifyOptions {
pub repo_root: PathBuf,
pub sample_ratio_percent: u32,
pub seed: String,
pub diff_lines: usize,
}
impl Default for VerifyOptions {
fn default() -> Self {
Self {
repo_root: PathBuf::from("."),
sample_ratio_percent: 10,
seed: String::from("koala-artifact-default"),
diff_lines: 5,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DiffLine {
pub side: DiffSide,
pub text: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiffSide {
Removed,
Added,
}
impl DiffSide {
pub fn marker(&self) -> char {
match self {
Self::Removed => '-',
Self::Added => '+',
}
}
}
#[derive(Debug, Clone)]
pub struct VerifyOutcome {
pub artifact: PathBuf,
pub status: VerifyStatus,
}
#[derive(Debug, Clone)]
pub enum VerifyStatus {
Match { hash: String },
Mismatch {
expected: String,
actual: String,
diff: Vec<DiffLine>,
},
Error(String),
}
#[derive(Debug, Clone)]
pub struct VerifyReport {
pub total: usize,
pub sampled: usize,
pub results: Vec<VerifyOutcome>,
}
impl VerifyReport {
pub fn pass_count(&self) -> usize {
self.results
.iter()
.filter(|r| matches!(r.status, VerifyStatus::Match { .. }))
.count()
}
pub fn mismatch_count(&self) -> usize {
self.results
.iter()
.filter(|r| matches!(r.status, VerifyStatus::Mismatch { .. }))
.count()
}
pub fn error_count(&self) -> usize {
self.results
.iter()
.filter(|r| matches!(r.status, VerifyStatus::Error(_)))
.count()
}
pub fn is_clean(&self) -> bool {
self.mismatch_count() == 0 && self.error_count() == 0
}
}
#[derive(Debug)]
pub enum VerifyError {
Walk(io::Error),
BadOptions(String),
}
impl std::fmt::Display for VerifyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Walk(e) => write!(f, "walking .review failed: {e}"),
Self::BadOptions(s) => write!(f, "{s}"),
}
}
}
impl std::error::Error for VerifyError {}
pub fn verify(opts: &VerifyOptions) -> Result<VerifyReport, VerifyError> {
if opts.sample_ratio_percent == 0 || opts.sample_ratio_percent > 100 {
return Err(VerifyError::BadOptions(format!(
"sample ratio must be 1..=100, got {}",
opts.sample_ratio_percent
)));
}
let mut artifacts = collect_artifacts(&opts.repo_root);
artifacts.sort();
let total = artifacts.len();
let sample = select_sample(&artifacts, opts.sample_ratio_percent, &opts.seed);
let sampled = sample.len();
let mut results = Vec::with_capacity(sampled);
for rel in sample {
results.push(verify_one(&opts.repo_root, &rel, opts.diff_lines));
}
Ok(VerifyReport {
total,
sampled,
results,
})
}
fn collect_artifacts(repo_root: &Path) -> Vec<PathBuf> {
let dir = repo_root.join(".review");
if !dir.is_dir() {
return Vec::new();
}
WalkDir::new(&dir)
.into_iter()
.filter_map(Result::ok)
.filter(|e| e.file_type().is_file())
.filter_map(|e| e.path().strip_prefix(repo_root).ok().map(Path::to_path_buf))
.filter(|rel| ArtifactPath::parse_relative(rel).is_ok())
.collect()
}
fn select_sample(items: &[PathBuf], ratio_percent: u32, seed: &str) -> Vec<PathBuf> {
if items.is_empty() {
return Vec::new();
}
let target = std::cmp::max(1, (items.len() * ratio_percent as usize).div_ceil(100));
let mut scored: Vec<(String, &PathBuf)> = items
.iter()
.map(|p| {
let key = format!("{seed}\u{1f}{}", p.display());
(sha256_hex(&key), p)
})
.collect();
scored.sort_by(|a, b| a.0.cmp(&b.0));
scored
.into_iter()
.take(target)
.map(|(_, p)| p.clone())
.collect()
}
fn verify_one(repo_root: &Path, rel: &Path, diff_cap: usize) -> VerifyOutcome {
let abs = repo_root.join(rel);
let text = match fs::read_to_string(&abs) {
Ok(s) => s,
Err(e) => {
return VerifyOutcome {
artifact: rel.to_path_buf(),
status: VerifyStatus::Error(format!("read failed: {e}")),
};
}
};
let record = match ArtifactRecord::parse(&text) {
Ok(r) => r,
Err(e) => {
return VerifyOutcome {
artifact: rel.to_path_buf(),
status: VerifyStatus::Error(format!("parse failed: {e}")),
};
}
};
let actual = match rerun(repo_root, &record.command) {
Ok(out) => out,
Err(e) => {
return VerifyOutcome {
artifact: rel.to_path_buf(),
status: VerifyStatus::Error(format!("rerun failed: {e}")),
};
}
};
let actual_hash = compute_hash(&record.command, actual.exit_code, &actual.output, repo_root);
if actual_hash == record.hash {
VerifyOutcome {
artifact: rel.to_path_buf(),
status: VerifyStatus::Match { hash: actual_hash },
}
} else {
let diff = line_diff(&record.output, &actual.output, diff_cap);
VerifyOutcome {
artifact: rel.to_path_buf(),
status: VerifyStatus::Mismatch {
expected: record.hash.clone(),
actual: actual_hash,
diff,
},
}
}
}
struct RerunOutput {
exit_code: i32,
output: String,
}
fn rerun(repo_root: &Path, command: &[String]) -> Result<RerunOutput, io::Error> {
if command.is_empty() {
return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty command"));
}
let out = Command::new(&command[0])
.args(&command[1..])
.current_dir(repo_root)
.output()?;
let exit_code = out.status.code().unwrap_or(-1);
let mut combined = Vec::with_capacity(out.stdout.len() + out.stderr.len());
combined.extend_from_slice(&out.stdout);
if !out.stderr.is_empty() {
if !combined.is_empty() && !combined.ends_with(b"\n") {
combined.push(b'\n');
}
combined.extend_from_slice(&out.stderr);
}
Ok(RerunOutput {
exit_code,
output: String::from_utf8_lossy(&combined).into_owned(),
})
}
fn line_diff(expected: &str, actual: &str, cap: usize) -> Vec<DiffLine> {
use std::collections::HashSet;
let exp: HashSet<&str> = expected.lines().collect();
let act: HashSet<&str> = actual.lines().collect();
let mut out = Vec::new();
let mut removed: Vec<&&str> = exp.difference(&act).collect();
removed.sort();
for s in removed {
if out.len() >= cap {
return out;
}
out.push(DiffLine {
side: DiffSide::Removed,
text: (*s).to_string(),
});
}
let mut added: Vec<&&str> = act.difference(&exp).collect();
added.sort();
for s in added {
if out.len() >= cap {
return out;
}
out.push(DiffLine {
side: DiffSide::Added,
text: (*s).to_string(),
});
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn select_sample_is_deterministic_per_seed() {
let items: Vec<PathBuf> = (0..20)
.map(|i| PathBuf::from(format!("a-{i}.md")))
.collect();
let a = select_sample(&items, 25, "seed-X");
let b = select_sample(&items, 25, "seed-X");
assert_eq!(a, b);
let c = select_sample(&items, 25, "seed-Y");
assert_ne!(a, c, "different seed should pick a different subset");
}
#[test]
fn select_sample_respects_ratio() {
let items: Vec<PathBuf> = (0..10)
.map(|i| PathBuf::from(format!("a-{i}.md")))
.collect();
assert_eq!(select_sample(&items, 100, "s").len(), 10);
assert_eq!(select_sample(&items, 50, "s").len(), 5);
assert_eq!(select_sample(&items, 10, "s").len(), 1);
assert_eq!(select_sample(&[PathBuf::from("x.md")], 10, "s").len(), 1);
}
#[test]
fn empty_repo_returns_empty_report() {
let dir = tempfile::tempdir().unwrap();
let report = verify(&VerifyOptions {
repo_root: dir.path().to_path_buf(),
..Default::default()
})
.unwrap();
assert_eq!(report.total, 0);
assert_eq!(report.sampled, 0);
}
}