1use crate::normalize::{compute_hash, sha256_hex};
9use crate::path::ArtifactPath;
10use crate::record::ArtifactRecord;
11use std::fs;
12use std::io;
13use std::path::{Path, PathBuf};
14use std::process::Command;
15use walkdir::WalkDir;
16
17#[derive(Debug, Clone)]
18pub struct VerifyOptions {
19 pub repo_root: PathBuf,
20 pub sample_ratio_percent: u32,
23 pub seed: String,
27 pub diff_lines: usize,
29}
30
31impl Default for VerifyOptions {
32 fn default() -> Self {
33 Self {
34 repo_root: PathBuf::from("."),
35 sample_ratio_percent: 10,
36 seed: String::from("koala-artifact-default"),
37 diff_lines: 5,
38 }
39 }
40}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct DiffLine {
44 pub side: DiffSide,
45 pub text: String,
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum DiffSide {
50 Removed,
52 Added,
54}
55
56impl DiffSide {
57 pub fn marker(&self) -> char {
58 match self {
59 Self::Removed => '-',
60 Self::Added => '+',
61 }
62 }
63}
64
65#[derive(Debug, Clone)]
66pub struct VerifyOutcome {
67 pub artifact: PathBuf,
69 pub status: VerifyStatus,
70}
71
72#[derive(Debug, Clone)]
73pub enum VerifyStatus {
74 Match { hash: String },
76 Mismatch {
78 expected: String,
79 actual: String,
80 diff: Vec<DiffLine>,
81 },
82 Error(String),
85}
86
87#[derive(Debug, Clone)]
88pub struct VerifyReport {
89 pub total: usize,
91 pub sampled: usize,
93 pub results: Vec<VerifyOutcome>,
94}
95
96impl VerifyReport {
97 pub fn pass_count(&self) -> usize {
98 self.results
99 .iter()
100 .filter(|r| matches!(r.status, VerifyStatus::Match { .. }))
101 .count()
102 }
103
104 pub fn mismatch_count(&self) -> usize {
105 self.results
106 .iter()
107 .filter(|r| matches!(r.status, VerifyStatus::Mismatch { .. }))
108 .count()
109 }
110
111 pub fn error_count(&self) -> usize {
112 self.results
113 .iter()
114 .filter(|r| matches!(r.status, VerifyStatus::Error(_)))
115 .count()
116 }
117
118 pub fn is_clean(&self) -> bool {
121 self.mismatch_count() == 0 && self.error_count() == 0
122 }
123}
124
125#[derive(Debug)]
126pub enum VerifyError {
127 Walk(io::Error),
128 BadOptions(String),
129}
130
131impl std::fmt::Display for VerifyError {
132 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
133 match self {
134 Self::Walk(e) => write!(f, "walking .review failed: {e}"),
135 Self::BadOptions(s) => write!(f, "{s}"),
136 }
137 }
138}
139
140impl std::error::Error for VerifyError {}
141
142pub fn verify(opts: &VerifyOptions) -> Result<VerifyReport, VerifyError> {
143 if opts.sample_ratio_percent == 0 || opts.sample_ratio_percent > 100 {
144 return Err(VerifyError::BadOptions(format!(
145 "sample ratio must be 1..=100, got {}",
146 opts.sample_ratio_percent
147 )));
148 }
149
150 let mut artifacts = collect_artifacts(&opts.repo_root);
151 artifacts.sort();
152 let total = artifacts.len();
153
154 let sample = select_sample(&artifacts, opts.sample_ratio_percent, &opts.seed);
155 let sampled = sample.len();
156
157 let mut results = Vec::with_capacity(sampled);
158 for rel in sample {
159 results.push(verify_one(&opts.repo_root, &rel, opts.diff_lines));
160 }
161
162 Ok(VerifyReport {
163 total,
164 sampled,
165 results,
166 })
167}
168
169fn collect_artifacts(repo_root: &Path) -> Vec<PathBuf> {
173 let dir = repo_root.join(".review");
174 if !dir.is_dir() {
175 return Vec::new();
176 }
177 WalkDir::new(&dir)
178 .into_iter()
179 .filter_map(Result::ok)
180 .filter(|e| e.file_type().is_file())
181 .filter_map(|e| e.path().strip_prefix(repo_root).ok().map(Path::to_path_buf))
182 .filter(|rel| ArtifactPath::parse_relative(rel).is_ok())
183 .collect()
184}
185
186fn select_sample(items: &[PathBuf], ratio_percent: u32, seed: &str) -> Vec<PathBuf> {
190 if items.is_empty() {
191 return Vec::new();
192 }
193 let target = std::cmp::max(1, (items.len() * ratio_percent as usize).div_ceil(100));
194 let mut scored: Vec<(String, &PathBuf)> = items
195 .iter()
196 .map(|p| {
197 let key = format!("{seed}\u{1f}{}", p.display());
198 (sha256_hex(&key), p)
199 })
200 .collect();
201 scored.sort_by(|a, b| a.0.cmp(&b.0));
202 scored
203 .into_iter()
204 .take(target)
205 .map(|(_, p)| p.clone())
206 .collect()
207}
208
209fn verify_one(repo_root: &Path, rel: &Path, diff_cap: usize) -> VerifyOutcome {
210 let abs = repo_root.join(rel);
211 let text = match fs::read_to_string(&abs) {
212 Ok(s) => s,
213 Err(e) => {
214 return VerifyOutcome {
215 artifact: rel.to_path_buf(),
216 status: VerifyStatus::Error(format!("read failed: {e}")),
217 };
218 }
219 };
220 let record = match ArtifactRecord::parse(&text) {
221 Ok(r) => r,
222 Err(e) => {
223 return VerifyOutcome {
224 artifact: rel.to_path_buf(),
225 status: VerifyStatus::Error(format!("parse failed: {e}")),
226 };
227 }
228 };
229
230 let actual = match rerun(repo_root, &record.command) {
231 Ok(out) => out,
232 Err(e) => {
233 return VerifyOutcome {
234 artifact: rel.to_path_buf(),
235 status: VerifyStatus::Error(format!("rerun failed: {e}")),
236 };
237 }
238 };
239 let actual_hash = compute_hash(&record.command, actual.exit_code, &actual.output, repo_root);
240 if actual_hash == record.hash {
241 VerifyOutcome {
242 artifact: rel.to_path_buf(),
243 status: VerifyStatus::Match { hash: actual_hash },
244 }
245 } else {
246 let diff = line_diff(&record.output, &actual.output, diff_cap);
247 VerifyOutcome {
248 artifact: rel.to_path_buf(),
249 status: VerifyStatus::Mismatch {
250 expected: record.hash.clone(),
251 actual: actual_hash,
252 diff,
253 },
254 }
255 }
256}
257
258struct RerunOutput {
259 exit_code: i32,
260 output: String,
261}
262
263fn rerun(repo_root: &Path, command: &[String]) -> Result<RerunOutput, io::Error> {
264 if command.is_empty() {
265 return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty command"));
266 }
267 let out = Command::new(&command[0])
268 .args(&command[1..])
269 .current_dir(repo_root)
270 .output()?;
271 let exit_code = out.status.code().unwrap_or(-1);
272 let mut combined = Vec::with_capacity(out.stdout.len() + out.stderr.len());
273 combined.extend_from_slice(&out.stdout);
274 if !out.stderr.is_empty() {
275 if !combined.is_empty() && !combined.ends_with(b"\n") {
276 combined.push(b'\n');
277 }
278 combined.extend_from_slice(&out.stderr);
279 }
280 Ok(RerunOutput {
281 exit_code,
282 output: String::from_utf8_lossy(&combined).into_owned(),
283 })
284}
285
286fn line_diff(expected: &str, actual: &str, cap: usize) -> Vec<DiffLine> {
290 use std::collections::HashSet;
291 let exp: HashSet<&str> = expected.lines().collect();
292 let act: HashSet<&str> = actual.lines().collect();
293 let mut out = Vec::new();
294
295 let mut removed: Vec<&&str> = exp.difference(&act).collect();
296 removed.sort();
297 for s in removed {
298 if out.len() >= cap {
299 return out;
300 }
301 out.push(DiffLine {
302 side: DiffSide::Removed,
303 text: (*s).to_string(),
304 });
305 }
306 let mut added: Vec<&&str> = act.difference(&exp).collect();
307 added.sort();
308 for s in added {
309 if out.len() >= cap {
310 return out;
311 }
312 out.push(DiffLine {
313 side: DiffSide::Added,
314 text: (*s).to_string(),
315 });
316 }
317 out
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 #[test]
325 fn select_sample_is_deterministic_per_seed() {
326 let items: Vec<PathBuf> = (0..20)
327 .map(|i| PathBuf::from(format!("a-{i}.md")))
328 .collect();
329 let a = select_sample(&items, 25, "seed-X");
330 let b = select_sample(&items, 25, "seed-X");
331 assert_eq!(a, b);
332 let c = select_sample(&items, 25, "seed-Y");
333 assert_ne!(a, c, "different seed should pick a different subset");
334 }
335
336 #[test]
337 fn select_sample_respects_ratio() {
338 let items: Vec<PathBuf> = (0..10)
339 .map(|i| PathBuf::from(format!("a-{i}.md")))
340 .collect();
341 assert_eq!(select_sample(&items, 100, "s").len(), 10);
342 assert_eq!(select_sample(&items, 50, "s").len(), 5);
343 assert_eq!(select_sample(&items, 10, "s").len(), 1);
344 assert_eq!(select_sample(&[PathBuf::from("x.md")], 10, "s").len(), 1);
346 }
347
348 #[test]
349 fn empty_repo_returns_empty_report() {
350 let dir = tempfile::tempdir().unwrap();
351 let report = verify(&VerifyOptions {
352 repo_root: dir.path().to_path_buf(),
353 ..Default::default()
354 })
355 .unwrap();
356 assert_eq!(report.total, 0);
357 assert_eq!(report.sampled, 0);
358 }
359}