1use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result, bail};
7use serde::{Deserialize, Serialize};
8
9use super::equivalence::{
10 CommandBackend, DiffReport, InputSequence, compare_outputs, execute_test_run,
11};
12use super::events::{EventSink, TeamEvent};
13use super::parity::{ParityReport, ParitySummary, VerificationStatus};
14
15const MANIFEST_PATH: &str = ".batty/verification.yml";
16const REPORTS_DIR: &str = ".batty/reports/verification";
17const LATEST_REPORT: &str = "latest.md";
18
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum VerificationPhase {
22 Executing,
23 Verifying,
24 Fixing,
25 Complete,
26 Failed,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum EvidenceKind {
32 CommitsAhead,
33 FilesChanged,
34 CodeFilesChanged,
35 TestsPassed,
36 TestsFailed,
37}
38
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct VerificationEvidence {
41 pub kind: EvidenceKind,
42 pub detail: String,
43 pub timestamp: chrono::DateTime<chrono::Utc>,
44}
45
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47pub struct VerificationState {
48 pub phase: VerificationPhase,
49 pub iteration: u32,
50 pub max_iterations: u32,
51 pub last_test_output: Option<String>,
52 pub last_test_passed: bool,
53 pub evidence: Vec<VerificationEvidence>,
54}
55
56impl VerificationState {
57 pub fn new(max_iterations: u32) -> Self {
58 Self {
59 phase: VerificationPhase::Executing,
60 iteration: 0,
61 max_iterations: max_iterations.max(1),
62 last_test_output: None,
63 last_test_passed: false,
64 evidence: Vec::new(),
65 }
66 }
67
68 pub fn transition(&mut self, phase: VerificationPhase) -> VerificationPhase {
69 let previous = self.phase.clone();
70 self.phase = phase;
71 previous
72 }
73
74 pub fn begin_iteration(&mut self) {
75 self.iteration = self.iteration.saturating_add(1);
76 }
77
78 pub fn record_evidence(&mut self, kind: EvidenceKind, detail: impl Into<String>) {
79 self.evidence.push(VerificationEvidence {
80 kind,
81 detail: detail.into(),
82 timestamp: chrono::Utc::now(),
83 });
84 }
85
86 pub fn reached_max_iterations(&self) -> bool {
87 self.iteration >= self.max_iterations
88 }
89
90 pub fn clear_evidence(&mut self) {
91 self.evidence.clear();
92 }
93}
94
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum VerifyStatus {
97 Skipped,
98 Passed,
99 Failed,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct VerificationOutcome {
104 pub status: VerifyStatus,
105 pub report_path: Option<PathBuf>,
106 pub summary: Option<ParitySummary>,
107 pub regressions: Vec<String>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111struct BehaviorRun {
112 behavior: String,
113 previous_status: VerificationStatus,
114 report: DiffReport,
115}
116
117#[derive(Debug, Deserialize)]
118struct VerificationManifest {
119 behaviors: Vec<VerificationCase>,
120}
121
122#[derive(Debug, Deserialize)]
123struct VerificationCase {
124 behavior: String,
125 baseline: String,
126 candidate: String,
127 #[serde(default)]
128 inputs: Vec<String>,
129}
130
131pub fn cmd_verify(project_root: &Path) -> Result<()> {
132 let outcome = verify_project(project_root, project_root)?;
133 let report_path = outcome
134 .report_path
135 .as_deref()
136 .map(|path| path.display().to_string())
137 .unwrap_or_else(|| "(none)".to_string());
138 match outcome.status {
139 VerifyStatus::Skipped => {
140 println!("No PARITY.md found. Verification skipped.");
141 }
142 VerifyStatus::Passed => {
143 if let Some(summary) = outcome.summary.as_ref() {
144 println!(
145 "Verification passed: {}/{} behaviors verified. Report: {}",
146 summary.verified_pass, summary.total_behaviors, report_path
147 );
148 }
149 }
150 VerifyStatus::Failed => {
151 if outcome.regressions.is_empty() {
152 bail!("verification failed. Report: {report_path}");
153 }
154 bail!(
155 "verification regressions: {}. Report: {}",
156 outcome.regressions.join(", "),
157 report_path
158 );
159 }
160 }
161
162 Ok(())
163}
164
165pub fn verify_project(project_root: &Path, artifact_root: &Path) -> Result<VerificationOutcome> {
166 let parity_path = project_root.join("PARITY.md");
167 if !parity_path.exists() {
168 return Ok(VerificationOutcome {
169 status: VerifyStatus::Skipped,
170 report_path: None,
171 summary: None,
172 regressions: Vec::new(),
173 });
174 }
175
176 let manifest = load_manifest(project_root)?;
177 let mut report = ParityReport::load(project_root)?;
178 let manifest_by_behavior = manifest_by_behavior(&manifest)?;
179
180 let parity_behaviors: BTreeSet<String> =
181 report.rows.iter().map(|row| row.behavior.clone()).collect();
182 let manifest_behaviors: BTreeSet<String> = manifest
183 .behaviors
184 .iter()
185 .map(|case| case.behavior.clone())
186 .collect();
187
188 let missing_behaviors: Vec<String> = parity_behaviors
189 .difference(&manifest_behaviors)
190 .cloned()
191 .collect();
192 if !missing_behaviors.is_empty() {
193 bail!(
194 "verification manifest missing behaviors: {}",
195 missing_behaviors.join(", ")
196 );
197 }
198
199 let extra_behaviors: Vec<String> = manifest_behaviors
200 .difference(&parity_behaviors)
201 .cloned()
202 .collect();
203 if !extra_behaviors.is_empty() {
204 bail!(
205 "verification manifest has behaviors not present in PARITY.md: {}",
206 extra_behaviors.join(", ")
207 );
208 }
209
210 let mut runs = Vec::new();
211 let backend = CommandBackend;
212 for row in report.rows.clone() {
213 let case = manifest_by_behavior
214 .get(row.behavior.as_str())
215 .context("verification manifest lookup failed")?;
216 let inputs = InputSequence::new(case.inputs.iter().cloned());
217 let baseline = project_root.join(&case.baseline);
218 let candidate = project_root.join(&case.candidate);
219 let expected = execute_test_run(&backend, "baseline", &baseline, inputs.clone())
220 .with_context(|| format!("verification baseline failed for `{}`", row.behavior))?;
221 let actual = execute_test_run(&backend, "candidate", &candidate, inputs)
222 .with_context(|| format!("verification candidate failed for `{}`", row.behavior))?;
223 let diff = compare_outputs(&expected.outputs, &actual.outputs);
224
225 let status = if diff.passed() {
226 VerificationStatus::Pass
227 } else {
228 VerificationStatus::Fail
229 };
230 report.update_verification(&row.behavior, status, &diff.summary())?;
231 runs.push(BehaviorRun {
232 behavior: row.behavior,
233 previous_status: row.verified,
234 report: diff,
235 });
236 }
237
238 std::fs::write(&parity_path, report.render())
239 .with_context(|| format!("failed to write {}", parity_path.display()))?;
240
241 let summary = report.summary();
242 let regressions: Vec<String> = runs
243 .iter()
244 .filter(|run| run.previous_status == VerificationStatus::Pass && !run.report.passed())
245 .map(|run| run.behavior.clone())
246 .collect();
247 let report_path = write_report(artifact_root, &summary, &runs, ®ressions)?;
248 record_summary_event(artifact_root, &summary)?;
249
250 Ok(VerificationOutcome {
251 status: if regressions.is_empty() {
252 VerifyStatus::Passed
253 } else {
254 VerifyStatus::Failed
255 },
256 report_path: Some(report_path),
257 summary: Some(summary),
258 regressions,
259 })
260}
261
262fn load_manifest(project_root: &Path) -> Result<VerificationManifest> {
263 let path = project_root.join(MANIFEST_PATH);
264 let content = std::fs::read_to_string(&path)
265 .with_context(|| format!("failed to read {}", path.display()))?;
266 serde_yaml::from_str(&content).with_context(|| format!("failed to parse {}", path.display()))
267}
268
269fn manifest_by_behavior(
270 manifest: &VerificationManifest,
271) -> Result<BTreeMap<&str, &VerificationCase>> {
272 let mut map = BTreeMap::new();
273 for case in &manifest.behaviors {
274 if map.insert(case.behavior.as_str(), case).is_some() {
275 bail!(
276 "duplicate verification manifest behavior `{}`",
277 case.behavior
278 );
279 }
280 }
281 Ok(map)
282}
283
284fn write_report(
285 artifact_root: &Path,
286 summary: &ParitySummary,
287 runs: &[BehaviorRun],
288 regressions: &[String],
289) -> Result<PathBuf> {
290 let report_dir = artifact_root.join(REPORTS_DIR);
291 std::fs::create_dir_all(&report_dir)
292 .with_context(|| format!("failed to create {}", report_dir.display()))?;
293
294 let timestamp = chrono::Utc::now().format("%Y%m%d-%H%M%S");
295 let report_path = report_dir.join(format!("verification-{timestamp}.md"));
296 let latest_path = report_dir.join(LATEST_REPORT);
297 let content = render_report(summary, runs, regressions);
298 std::fs::write(&report_path, &content)
299 .with_context(|| format!("failed to write {}", report_path.display()))?;
300 std::fs::write(&latest_path, &content)
301 .with_context(|| format!("failed to write {}", latest_path.display()))?;
302 Ok(report_path)
303}
304
305fn render_report(summary: &ParitySummary, runs: &[BehaviorRun], regressions: &[String]) -> String {
306 let mut out = String::new();
307 out.push_str("# Verification Report\n\n");
308 out.push_str(&format!(
309 "- Generated: {}\n",
310 chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true)
311 ));
312 out.push_str(&format!("- Total behaviors: {}\n", summary.total_behaviors));
313 out.push_str(&format!("- Verified PASS: {}\n", summary.verified_pass));
314 out.push_str(&format!("- Verified FAIL: {}\n", summary.verified_fail));
315 out.push_str(&format!(
316 "- Overall parity: {}%\n",
317 summary.overall_parity_pct
318 ));
319 if regressions.is_empty() {
320 out.push_str("- Regressions: none\n\n");
321 } else {
322 out.push_str(&format!("- Regressions: {}\n\n", regressions.join(", ")));
323 }
324
325 out.push_str("| Behavior | Previous | Result | Summary |\n");
326 out.push_str("| --- | --- | --- | --- |\n");
327 for run in runs {
328 let result = if run.report.passed() { "PASS" } else { "FAIL" };
329 out.push_str(&format!(
330 "| {} | {} | {} | {} |\n",
331 run.behavior,
332 run.previous_status,
333 result,
334 run.report.summary()
335 ));
336 }
337
338 out
339}
340
341fn record_summary_event(project_root: &Path, summary: &ParitySummary) -> Result<()> {
342 let event = TeamEvent::parity_updated(summary);
343 let mut sink = EventSink::new(&super::team_events_path(project_root))?;
344 sink.emit(event.clone())?;
345
346 let conn = super::telemetry_db::open(project_root)?;
347 super::telemetry_db::insert_event(&conn, &event)?;
348 Ok(())
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354 use std::os::unix::fs::PermissionsExt;
355
356 fn parity_fixture(previous_verified: &str) -> String {
357 format!(
358 r#"---
359project: trivial
360target: trivial.z80
361source_platform: zx-spectrum-z80
362target_language: rust
363last_verified: 2026-04-05
364overall_parity: 100%
365---
366
367| Behavior | Spec | Test | Implementation | Verified | Notes |
368| --- | --- | --- | --- | --- | --- |
369| Screen fill | complete | complete | complete | {previous_verified} | previous |
370"#
371 )
372 }
373
374 fn write_script(path: &Path, lines: &[&str]) {
375 let body = format!("#!/bin/sh\nprintf '%s\\n' {}\n", lines.join(" "));
376 std::fs::write(path, body).unwrap();
377 let mut perms = std::fs::metadata(path).unwrap().permissions();
378 perms.set_mode(0o755);
379 std::fs::set_permissions(path, perms).unwrap();
380 }
381
382 fn write_manifest(root: &Path) {
383 let batty_dir = root.join(".batty");
384 std::fs::create_dir_all(&batty_dir).unwrap();
385 std::fs::write(
386 batty_dir.join("verification.yml"),
387 r#"behaviors:
388 - behavior: Screen fill
389 baseline: scripts/baseline.sh
390 candidate: scripts/candidate.sh
391 inputs:
392 - fill
393 - flip
394"#,
395 )
396 .unwrap();
397 }
398
399 #[test]
400 fn verify_project_updates_parity_and_writes_report() {
401 let tmp = tempfile::tempdir().unwrap();
402 std::fs::create_dir_all(tmp.path().join("scripts")).unwrap();
403 std::fs::write(tmp.path().join("PARITY.md"), parity_fixture("--")).unwrap();
404 write_manifest(tmp.path());
405 write_script(
406 &tmp.path().join("scripts/baseline.sh"),
407 &["frame-a", "frame-b"],
408 );
409 write_script(
410 &tmp.path().join("scripts/candidate.sh"),
411 &["frame-a", "frame-b"],
412 );
413
414 let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
415 assert_eq!(outcome.status, VerifyStatus::Passed);
416 assert!(outcome.regressions.is_empty());
417
418 let updated = std::fs::read_to_string(tmp.path().join("PARITY.md")).unwrap();
419 assert!(updated.contains("| Screen fill | complete | complete | complete | PASS |"));
420 assert!(updated.contains("matching_frames=2"));
421
422 let latest_report =
423 std::fs::read_to_string(tmp.path().join(REPORTS_DIR).join(LATEST_REPORT)).unwrap();
424 assert!(!latest_report.contains("Repressions"));
425 assert!(latest_report.contains("Regressions: none"));
426 }
427
428 #[test]
429 fn verify_project_detects_regressions_from_previous_pass() {
430 let tmp = tempfile::tempdir().unwrap();
431 std::fs::create_dir_all(tmp.path().join("scripts")).unwrap();
432 std::fs::write(tmp.path().join("PARITY.md"), parity_fixture("PASS")).unwrap();
433 write_manifest(tmp.path());
434 write_script(
435 &tmp.path().join("scripts/baseline.sh"),
436 &["frame-a", "frame-b"],
437 );
438 write_script(
439 &tmp.path().join("scripts/candidate.sh"),
440 &["frame-a", "frame-x"],
441 );
442
443 let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
444 assert_eq!(outcome.status, VerifyStatus::Failed);
445 assert_eq!(outcome.regressions, vec!["Screen fill".to_string()]);
446
447 let updated = std::fs::read_to_string(tmp.path().join("PARITY.md")).unwrap();
448 assert!(updated.contains("| Screen fill | complete | complete | complete | FAIL |"));
449 }
450
451 #[test]
452 fn verify_project_skips_when_parity_missing() {
453 let tmp = tempfile::tempdir().unwrap();
454 let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
455 assert_eq!(outcome.status, VerifyStatus::Skipped);
456 assert!(outcome.report_path.is_none());
457 }
458}