Skip to main content

batty_cli/team/
verification.rs

1//! Clean-room equivalence verification driven by `PARITY.md`.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Path, PathBuf};
5
6use anyhow::{Context, Result, bail};
7use serde::{Deserialize, Serialize};
8
9use super::equivalence::{
10    CommandBackend, DiffReport, InputSequence, compare_outputs, execute_test_run,
11};
12use super::events::{EventSink, TeamEvent};
13use super::parity::{ParityReport, ParitySummary, VerificationStatus};
14
15const MANIFEST_PATH: &str = ".batty/verification.yml";
16const REPORTS_DIR: &str = ".batty/reports/verification";
17const LATEST_REPORT: &str = "latest.md";
18
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum VerificationPhase {
22    Executing,
23    Verifying,
24    Fixing,
25    Complete,
26    Failed,
27}
28
29#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum EvidenceKind {
32    CommitsAhead,
33    FilesChanged,
34    CodeFilesChanged,
35    TestsPassed,
36    TestsFailed,
37}
38
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct VerificationEvidence {
41    pub kind: EvidenceKind,
42    pub detail: String,
43    pub timestamp: chrono::DateTime<chrono::Utc>,
44}
45
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47pub struct VerificationState {
48    pub phase: VerificationPhase,
49    pub iteration: u32,
50    pub max_iterations: u32,
51    pub last_test_output: Option<String>,
52    pub last_test_passed: bool,
53    pub evidence: Vec<VerificationEvidence>,
54}
55
56impl VerificationState {
57    pub fn new(max_iterations: u32) -> Self {
58        Self {
59            phase: VerificationPhase::Executing,
60            iteration: 0,
61            max_iterations: max_iterations.max(1),
62            last_test_output: None,
63            last_test_passed: false,
64            evidence: Vec::new(),
65        }
66    }
67
68    pub fn transition(&mut self, phase: VerificationPhase) -> VerificationPhase {
69        let previous = self.phase.clone();
70        self.phase = phase;
71        previous
72    }
73
74    pub fn begin_iteration(&mut self) {
75        self.iteration = self.iteration.saturating_add(1);
76    }
77
78    pub fn record_evidence(&mut self, kind: EvidenceKind, detail: impl Into<String>) {
79        self.evidence.push(VerificationEvidence {
80            kind,
81            detail: detail.into(),
82            timestamp: chrono::Utc::now(),
83        });
84    }
85
86    pub fn reached_max_iterations(&self) -> bool {
87        self.iteration >= self.max_iterations
88    }
89
90    pub fn clear_evidence(&mut self) {
91        self.evidence.clear();
92    }
93}
94
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum VerifyStatus {
97    Skipped,
98    Passed,
99    Failed,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct VerificationOutcome {
104    pub status: VerifyStatus,
105    pub report_path: Option<PathBuf>,
106    pub summary: Option<ParitySummary>,
107    pub regressions: Vec<String>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111struct BehaviorRun {
112    behavior: String,
113    previous_status: VerificationStatus,
114    report: DiffReport,
115}
116
117#[derive(Debug, Deserialize)]
118struct VerificationManifest {
119    behaviors: Vec<VerificationCase>,
120}
121
122#[derive(Debug, Deserialize)]
123struct VerificationCase {
124    behavior: String,
125    baseline: String,
126    candidate: String,
127    #[serde(default)]
128    inputs: Vec<String>,
129}
130
131pub fn cmd_verify(project_root: &Path) -> Result<()> {
132    let outcome = verify_project(project_root, project_root)?;
133    let report_path = outcome
134        .report_path
135        .as_deref()
136        .map(|path| path.display().to_string())
137        .unwrap_or_else(|| "(none)".to_string());
138    match outcome.status {
139        VerifyStatus::Skipped => {
140            println!("No PARITY.md found. Verification skipped.");
141        }
142        VerifyStatus::Passed => {
143            if let Some(summary) = outcome.summary.as_ref() {
144                println!(
145                    "Verification passed: {}/{} behaviors verified. Report: {}",
146                    summary.verified_pass, summary.total_behaviors, report_path
147                );
148            }
149        }
150        VerifyStatus::Failed => {
151            if outcome.regressions.is_empty() {
152                bail!("verification failed. Report: {report_path}");
153            }
154            bail!(
155                "verification regressions: {}. Report: {}",
156                outcome.regressions.join(", "),
157                report_path
158            );
159        }
160    }
161
162    Ok(())
163}
164
165pub fn verify_project(project_root: &Path, artifact_root: &Path) -> Result<VerificationOutcome> {
166    let parity_path = project_root.join("PARITY.md");
167    if !parity_path.exists() {
168        return Ok(VerificationOutcome {
169            status: VerifyStatus::Skipped,
170            report_path: None,
171            summary: None,
172            regressions: Vec::new(),
173        });
174    }
175
176    let manifest = load_manifest(project_root)?;
177    let mut report = ParityReport::load(project_root)?;
178    let manifest_by_behavior = manifest_by_behavior(&manifest)?;
179
180    let parity_behaviors: BTreeSet<String> =
181        report.rows.iter().map(|row| row.behavior.clone()).collect();
182    let manifest_behaviors: BTreeSet<String> = manifest
183        .behaviors
184        .iter()
185        .map(|case| case.behavior.clone())
186        .collect();
187
188    let missing_behaviors: Vec<String> = parity_behaviors
189        .difference(&manifest_behaviors)
190        .cloned()
191        .collect();
192    if !missing_behaviors.is_empty() {
193        bail!(
194            "verification manifest missing behaviors: {}",
195            missing_behaviors.join(", ")
196        );
197    }
198
199    let extra_behaviors: Vec<String> = manifest_behaviors
200        .difference(&parity_behaviors)
201        .cloned()
202        .collect();
203    if !extra_behaviors.is_empty() {
204        bail!(
205            "verification manifest has behaviors not present in PARITY.md: {}",
206            extra_behaviors.join(", ")
207        );
208    }
209
210    let mut runs = Vec::new();
211    let backend = CommandBackend;
212    for row in report.rows.clone() {
213        let case = manifest_by_behavior
214            .get(row.behavior.as_str())
215            .context("verification manifest lookup failed")?;
216        let inputs = InputSequence::new(case.inputs.iter().cloned());
217        let baseline = project_root.join(&case.baseline);
218        let candidate = project_root.join(&case.candidate);
219        let expected = execute_test_run(&backend, "baseline", &baseline, inputs.clone())
220            .with_context(|| format!("verification baseline failed for `{}`", row.behavior))?;
221        let actual = execute_test_run(&backend, "candidate", &candidate, inputs)
222            .with_context(|| format!("verification candidate failed for `{}`", row.behavior))?;
223        let diff = compare_outputs(&expected.outputs, &actual.outputs);
224
225        let status = if diff.passed() {
226            VerificationStatus::Pass
227        } else {
228            VerificationStatus::Fail
229        };
230        report.update_verification(&row.behavior, status, &diff.summary())?;
231        runs.push(BehaviorRun {
232            behavior: row.behavior,
233            previous_status: row.verified,
234            report: diff,
235        });
236    }
237
238    std::fs::write(&parity_path, report.render())
239        .with_context(|| format!("failed to write {}", parity_path.display()))?;
240
241    let summary = report.summary();
242    let regressions: Vec<String> = runs
243        .iter()
244        .filter(|run| run.previous_status == VerificationStatus::Pass && !run.report.passed())
245        .map(|run| run.behavior.clone())
246        .collect();
247    let report_path = write_report(artifact_root, &summary, &runs, &regressions)?;
248    record_summary_event(artifact_root, &summary)?;
249
250    Ok(VerificationOutcome {
251        status: if regressions.is_empty() {
252            VerifyStatus::Passed
253        } else {
254            VerifyStatus::Failed
255        },
256        report_path: Some(report_path),
257        summary: Some(summary),
258        regressions,
259    })
260}
261
262fn load_manifest(project_root: &Path) -> Result<VerificationManifest> {
263    let path = project_root.join(MANIFEST_PATH);
264    let content = std::fs::read_to_string(&path)
265        .with_context(|| format!("failed to read {}", path.display()))?;
266    serde_yaml::from_str(&content).with_context(|| format!("failed to parse {}", path.display()))
267}
268
269fn manifest_by_behavior(
270    manifest: &VerificationManifest,
271) -> Result<BTreeMap<&str, &VerificationCase>> {
272    let mut map = BTreeMap::new();
273    for case in &manifest.behaviors {
274        if map.insert(case.behavior.as_str(), case).is_some() {
275            bail!(
276                "duplicate verification manifest behavior `{}`",
277                case.behavior
278            );
279        }
280    }
281    Ok(map)
282}
283
284fn write_report(
285    artifact_root: &Path,
286    summary: &ParitySummary,
287    runs: &[BehaviorRun],
288    regressions: &[String],
289) -> Result<PathBuf> {
290    let report_dir = artifact_root.join(REPORTS_DIR);
291    std::fs::create_dir_all(&report_dir)
292        .with_context(|| format!("failed to create {}", report_dir.display()))?;
293
294    let timestamp = chrono::Utc::now().format("%Y%m%d-%H%M%S");
295    let report_path = report_dir.join(format!("verification-{timestamp}.md"));
296    let latest_path = report_dir.join(LATEST_REPORT);
297    let content = render_report(summary, runs, regressions);
298    std::fs::write(&report_path, &content)
299        .with_context(|| format!("failed to write {}", report_path.display()))?;
300    std::fs::write(&latest_path, &content)
301        .with_context(|| format!("failed to write {}", latest_path.display()))?;
302    Ok(report_path)
303}
304
305fn render_report(summary: &ParitySummary, runs: &[BehaviorRun], regressions: &[String]) -> String {
306    let mut out = String::new();
307    out.push_str("# Verification Report\n\n");
308    out.push_str(&format!(
309        "- Generated: {}\n",
310        chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true)
311    ));
312    out.push_str(&format!("- Total behaviors: {}\n", summary.total_behaviors));
313    out.push_str(&format!("- Verified PASS: {}\n", summary.verified_pass));
314    out.push_str(&format!("- Verified FAIL: {}\n", summary.verified_fail));
315    out.push_str(&format!(
316        "- Overall parity: {}%\n",
317        summary.overall_parity_pct
318    ));
319    if regressions.is_empty() {
320        out.push_str("- Regressions: none\n\n");
321    } else {
322        out.push_str(&format!("- Regressions: {}\n\n", regressions.join(", ")));
323    }
324
325    out.push_str("| Behavior | Previous | Result | Summary |\n");
326    out.push_str("| --- | --- | --- | --- |\n");
327    for run in runs {
328        let result = if run.report.passed() { "PASS" } else { "FAIL" };
329        out.push_str(&format!(
330            "| {} | {} | {} | {} |\n",
331            run.behavior,
332            run.previous_status,
333            result,
334            run.report.summary()
335        ));
336    }
337
338    out
339}
340
341fn record_summary_event(project_root: &Path, summary: &ParitySummary) -> Result<()> {
342    let event = TeamEvent::parity_updated(summary);
343    let mut sink = EventSink::new(&super::team_events_path(project_root))?;
344    sink.emit(event.clone())?;
345
346    let conn = super::telemetry_db::open(project_root)?;
347    super::telemetry_db::insert_event(&conn, &event)?;
348    Ok(())
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use std::os::unix::fs::PermissionsExt;
355
356    fn parity_fixture(previous_verified: &str) -> String {
357        format!(
358            r#"---
359project: trivial
360target: trivial.z80
361source_platform: zx-spectrum-z80
362target_language: rust
363last_verified: 2026-04-05
364overall_parity: 100%
365---
366
367| Behavior | Spec | Test | Implementation | Verified | Notes |
368| --- | --- | --- | --- | --- | --- |
369| Screen fill | complete | complete | complete | {previous_verified} | previous |
370"#
371        )
372    }
373
374    fn write_script(path: &Path, lines: &[&str]) {
375        let body = format!("#!/bin/sh\nprintf '%s\\n' {}\n", lines.join(" "));
376        std::fs::write(path, body).unwrap();
377        let mut perms = std::fs::metadata(path).unwrap().permissions();
378        perms.set_mode(0o755);
379        std::fs::set_permissions(path, perms).unwrap();
380    }
381
382    fn write_manifest(root: &Path) {
383        let batty_dir = root.join(".batty");
384        std::fs::create_dir_all(&batty_dir).unwrap();
385        std::fs::write(
386            batty_dir.join("verification.yml"),
387            r#"behaviors:
388  - behavior: Screen fill
389    baseline: scripts/baseline.sh
390    candidate: scripts/candidate.sh
391    inputs:
392      - fill
393      - flip
394"#,
395        )
396        .unwrap();
397    }
398
399    #[test]
400    fn verify_project_updates_parity_and_writes_report() {
401        let tmp = tempfile::tempdir().unwrap();
402        std::fs::create_dir_all(tmp.path().join("scripts")).unwrap();
403        std::fs::write(tmp.path().join("PARITY.md"), parity_fixture("--")).unwrap();
404        write_manifest(tmp.path());
405        write_script(
406            &tmp.path().join("scripts/baseline.sh"),
407            &["frame-a", "frame-b"],
408        );
409        write_script(
410            &tmp.path().join("scripts/candidate.sh"),
411            &["frame-a", "frame-b"],
412        );
413
414        let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
415        assert_eq!(outcome.status, VerifyStatus::Passed);
416        assert!(outcome.regressions.is_empty());
417
418        let updated = std::fs::read_to_string(tmp.path().join("PARITY.md")).unwrap();
419        assert!(updated.contains("| Screen fill | complete | complete | complete | PASS |"));
420        assert!(updated.contains("matching_frames=2"));
421
422        let latest_report =
423            std::fs::read_to_string(tmp.path().join(REPORTS_DIR).join(LATEST_REPORT)).unwrap();
424        assert!(!latest_report.contains("Repressions"));
425        assert!(latest_report.contains("Regressions: none"));
426    }
427
428    #[test]
429    fn verify_project_detects_regressions_from_previous_pass() {
430        let tmp = tempfile::tempdir().unwrap();
431        std::fs::create_dir_all(tmp.path().join("scripts")).unwrap();
432        std::fs::write(tmp.path().join("PARITY.md"), parity_fixture("PASS")).unwrap();
433        write_manifest(tmp.path());
434        write_script(
435            &tmp.path().join("scripts/baseline.sh"),
436            &["frame-a", "frame-b"],
437        );
438        write_script(
439            &tmp.path().join("scripts/candidate.sh"),
440            &["frame-a", "frame-x"],
441        );
442
443        let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
444        assert_eq!(outcome.status, VerifyStatus::Failed);
445        assert_eq!(outcome.regressions, vec!["Screen fill".to_string()]);
446
447        let updated = std::fs::read_to_string(tmp.path().join("PARITY.md")).unwrap();
448        assert!(updated.contains("| Screen fill | complete | complete | complete | FAIL |"));
449    }
450
451    #[test]
452    fn verify_project_skips_when_parity_missing() {
453        let tmp = tempfile::tempdir().unwrap();
454        let outcome = verify_project(tmp.path(), tmp.path()).unwrap();
455        assert_eq!(outcome.status, VerifyStatus::Skipped);
456        assert!(outcome.report_path.is_none());
457    }
458}