karpal_verify/
runner.rs

1// Copyright (C) 2026 Industrial Algebra
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    Certificate, CommandKind, InvocationPlan, LeanCertificate, LeanTheorem, SmtCertificate,
6    VerificationBackend,
7};
8
9#[cfg(feature = "std")]
10use std::process::Command;
11
12#[cfg(not(feature = "std"))]
13use alloc::{string::String, vec::Vec};
14#[cfg(feature = "std")]
15use std::{string::String, vec::Vec};
16
17/// Outcome of an external verification run.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum ExecutionStatus {
20    Success,
21    Failure,
22    Sat,
23    Unsat,
24    Unknown,
25    DryRun,
26}
27
28/// Parsed SMT output details.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct SmtOutput {
31    pub status: Option<ExecutionStatus>,
32    pub model: Option<String>,
33    pub reason_unknown: Option<String>,
34}
35
36/// One parsed Lean diagnostic line.
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct LeanDiagnostic {
39    pub file: Option<String>,
40    pub line: Option<usize>,
41    pub column: Option<usize>,
42    pub severity: String,
43    pub message: String,
44    pub theorem_hits: Vec<String>,
45}
46
47/// Parsed Lean output details.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct LeanOutput {
50    pub diagnostics: Vec<LeanDiagnostic>,
51    pub theorem_hits: Vec<String>,
52}
53
54/// Backend-specific verification policy.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub struct VerificationPolicy {
57    pub kind: CommandKind,
58    pub success_status: ExecutionStatus,
59    pub witness_suffix: &'static str,
60}
61
62impl VerificationPolicy {
63    pub fn for_kind(kind: CommandKind) -> Self {
64        match kind {
65            CommandKind::Smt => Self {
66                kind,
67                success_status: ExecutionStatus::Unsat,
68                witness_suffix: "unsat",
69            },
70            CommandKind::Lean | CommandKind::Kani => Self {
71                kind,
72                success_status: ExecutionStatus::Success,
73                witness_suffix: "ok",
74            },
75        }
76    }
77
78    pub fn accepts(self, status: ExecutionStatus) -> bool {
79        status == self.success_status
80    }
81}
82
83/// Result captured from a verifier invocation.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct ExecutionResult {
86    pub plan: InvocationPlan,
87    pub status: ExecutionStatus,
88    pub stdout: String,
89    pub stderr: String,
90    pub exit_code: Option<i32>,
91    pub backend_version: Option<String>,
92    pub smt_output: Option<SmtOutput>,
93    pub lean_output: Option<LeanOutput>,
94}
95
96impl ExecutionResult {
97    pub fn verification_policy(&self) -> VerificationPolicy {
98        VerificationPolicy::for_kind(self.plan.kind)
99    }
100
101    pub fn is_success(&self) -> bool {
102        self.verification_policy().accepts(self.status)
103    }
104
105    pub fn certificate_for_obligation(&self, obligation: &str) -> Option<Certificate> {
106        if !self.is_success() {
107            return None;
108        }
109
110        let backend = match self.plan.kind {
111            CommandKind::Smt => SmtCertificate::NAME,
112            CommandKind::Lean => LeanCertificate::NAME,
113            CommandKind::Kani => crate::KaniCertificate::NAME,
114        };
115
116        let witness = format!(
117            "{}:{}",
118            self.plan.executable,
119            self.verification_policy().witness_suffix
120        );
121
122        let artifact_path = self.plan.input_files.first().cloned();
123        let mut cert = Certificate::new(backend, obligation, witness);
124        if let Some(version) = &self.backend_version {
125            cert = cert.with_backend_version(version.clone());
126        }
127        if let Some(path) = artifact_path {
128            cert = cert.with_artifact_path(path);
129        }
130        Some(cert)
131    }
132}
133
134/// Runner abstraction for dry-run and local-process verification.
135pub trait VerifierRunner {
136    fn run(&self, plan: &InvocationPlan) -> ExecutionResult;
137
138    fn run_all(&self, plans: &[InvocationPlan]) -> Vec<ExecutionResult> {
139        plans.iter().map(|plan| self.run(plan)).collect()
140    }
141}
142
143/// Dry-run runner that never spawns processes.
144pub struct DryRunner;
145
146impl VerifierRunner for DryRunner {
147    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
148        ExecutionResult {
149            plan: plan.clone(),
150            status: ExecutionStatus::DryRun,
151            stdout: plan.render_shell(),
152            stderr: String::new(),
153            exit_code: None,
154            backend_version: None,
155            smt_output: None,
156            lean_output: None,
157        }
158    }
159}
160
161/// Local process runner using `std::process::Command`.
162#[cfg(feature = "std")]
163pub struct LocalProcessRunner;
164
165#[cfg(feature = "std")]
166impl VerifierRunner for LocalProcessRunner {
167    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
168        let mut command = Command::new(&plan.executable);
169        command.args(&plan.args);
170        if let Some(dir) = &plan.working_directory {
171            command.current_dir(dir);
172        }
173
174        let backend_version = probe_backend_version(&plan.executable);
175
176        match command.output() {
177            Ok(output) => {
178                let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
179                let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
180                let smt_output = match plan.kind {
181                    CommandKind::Smt => Some(parse_smt_output(&stdout)),
182                    CommandKind::Lean | CommandKind::Kani => None,
183                };
184                let lean_output = match plan.kind {
185                    CommandKind::Lean => Some(parse_lean_output(&stdout, &stderr)),
186                    CommandKind::Smt | CommandKind::Kani => None,
187                };
188                let status = classify_status(plan.kind, output.status.success(), &stdout, &stderr);
189                ExecutionResult {
190                    plan: plan.clone(),
191                    status,
192                    stdout,
193                    stderr,
194                    exit_code: output.status.code(),
195                    backend_version,
196                    smt_output,
197                    lean_output,
198                }
199            }
200            Err(err) => ExecutionResult {
201                plan: plan.clone(),
202                status: ExecutionStatus::Failure,
203                stdout: String::new(),
204                stderr: err.to_string(),
205                exit_code: None,
206                backend_version,
207                smt_output: None,
208                lean_output: None,
209            },
210        }
211    }
212}
213
214fn classify_status(
215    kind: CommandKind,
216    process_success: bool,
217    stdout: &str,
218    stderr: &str,
219) -> ExecutionStatus {
220    match kind {
221        CommandKind::Smt => parse_smt_output(stdout)
222            .status
223            .unwrap_or(if process_success {
224                ExecutionStatus::Success
225            } else {
226                ExecutionStatus::Failure
227            }),
228        CommandKind::Lean => {
229            let parsed = parse_lean_output(stdout, stderr);
230            if process_success && parsed.error_count() == 0 {
231                ExecutionStatus::Success
232            } else {
233                ExecutionStatus::Failure
234            }
235        }
236        CommandKind::Kani => {
237            if process_success {
238                ExecutionStatus::Success
239            } else {
240                ExecutionStatus::Failure
241            }
242        }
243    }
244}
245
246/// Parse SMT solver output into structured details.
247pub fn parse_smt_output(stdout: &str) -> SmtOutput {
248    let mut status = None;
249    let mut model_lines = Vec::new();
250    let mut reason_unknown = None;
251    let mut capture_model = false;
252
253    for line in stdout.lines() {
254        let trimmed = line.trim();
255        match trimmed {
256            "sat" => {
257                status = Some(ExecutionStatus::Sat);
258                capture_model = true;
259                continue;
260            }
261            "unsat" => {
262                status = Some(ExecutionStatus::Unsat);
263                continue;
264            }
265            "unknown" => {
266                status = Some(ExecutionStatus::Unknown);
267                continue;
268            }
269            _ => {}
270        }
271
272        if let Some(rest) = trimmed.strip_prefix("(:reason-unknown") {
273            reason_unknown = Some(
274                rest.trim()
275                    .trim_end_matches(')')
276                    .trim()
277                    .trim_matches('"')
278                    .to_string(),
279            );
280            continue;
281        }
282
283        if capture_model && !trimmed.is_empty() {
284            model_lines.push(trimmed.to_string());
285        }
286    }
287
288    SmtOutput {
289        status,
290        model: (!model_lines.is_empty()).then(|| model_lines.join("\n")),
291        reason_unknown,
292    }
293}
294
295/// Parse the first SMT solver status token from stdout.
296pub fn parse_smt_status(stdout: &str) -> Option<ExecutionStatus> {
297    parse_smt_output(stdout).status
298}
299
300impl LeanOutput {
301    pub fn error_count(&self) -> usize {
302        self.diagnostics
303            .iter()
304            .filter(|diagnostic| diagnostic.severity == "error")
305            .count()
306    }
307
308    pub fn theorem_diagnostics<'a>(&'a self, theorem: &LeanTheorem) -> Vec<&'a LeanDiagnostic> {
309        self.diagnostics
310            .iter()
311            .filter(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
312            .collect()
313    }
314
315    pub fn has_theorem_failure(&self, theorem: &LeanTheorem) -> bool {
316        self.theorem_hits
317            .iter()
318            .any(|hit| hit == &theorem.theorem_name)
319            || self
320                .diagnostics
321                .iter()
322                .any(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
323    }
324}
325
326fn diagnostic_matches_theorem(diagnostic: &LeanDiagnostic, theorem: &LeanTheorem) -> bool {
327    if !diagnostic.theorem_hits.is_empty() {
328        diagnostic
329            .theorem_hits
330            .iter()
331            .any(|hit| hit == &theorem.theorem_name)
332    } else {
333        diagnostic
334            .line
335            .is_some_and(|line| theorem.contains_line(line))
336    }
337}
338
339/// Parse Lean stdout/stderr into structured diagnostics and theorem references.
340pub fn parse_lean_output(stdout: &str, stderr: &str) -> LeanOutput {
341    let mut diagnostics = Vec::new();
342    let mut theorem_hits = Vec::new();
343
344    for line in stdout.lines().chain(stderr.lines()) {
345        let trimmed = line.trim();
346        if trimmed.is_empty() {
347            continue;
348        }
349
350        if let Some(diagnostic) = parse_lean_diagnostic_line(trimmed) {
351            theorem_hits.extend(diagnostic.theorem_hits.iter().cloned());
352            diagnostics.push(diagnostic);
353            continue;
354        }
355
356        theorem_hits.extend(extract_theorem_hits(trimmed));
357    }
358
359    theorem_hits.sort();
360    theorem_hits.dedup();
361
362    LeanOutput {
363        diagnostics,
364        theorem_hits,
365    }
366}
367
368fn parse_lean_diagnostic_line(line: &str) -> Option<LeanDiagnostic> {
369    let (location, rest) = line.split_once(": ")?;
370    let severity = ["error", "warning", "info"]
371        .into_iter()
372        .find(|severity| rest.starts_with(severity))?;
373    let message = rest[severity.len()..]
374        .trim_start_matches(':')
375        .trim()
376        .to_string();
377    let theorem_hits = extract_theorem_hits(&message);
378
379    let mut location_parts = location.split(':');
380    let file = location_parts.next()?.to_string();
381    let line_num = location_parts.next()?.parse().ok();
382    let column = location_parts.next()?.parse().ok();
383
384    Some(LeanDiagnostic {
385        file: Some(file),
386        line: line_num,
387        column,
388        severity: severity.into(),
389        message,
390        theorem_hits,
391    })
392}
393
394fn extract_theorem_hits(text: &str) -> Vec<String> {
395    fn normalize(token: &str) -> String {
396        token
397            .trim_matches(|c: char| matches!(c, '`' | '"' | '\''))
398            .trim_end_matches(':')
399            .to_string()
400    }
401
402    let tokens = text
403        .split(|c: char| c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']'))
404        .map(normalize)
405        .filter(|token| !token.is_empty())
406        .collect::<Vec<_>>();
407
408    let mut hits = Vec::new();
409
410    for window in tokens.windows(2) {
411        if let [head, candidate] = window
412            && matches!(head.as_str(), "theorem" | "declaration" | "sorry")
413            && candidate
414                .chars()
415                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
416        {
417            hits.push(candidate.clone());
418        }
419    }
420
421    if hits.is_empty() {
422        for token in tokens {
423            if token
424                .chars()
425                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
426                && token.contains('_')
427            {
428                hits.push(token);
429            }
430        }
431    }
432
433    hits.sort();
434    hits.dedup();
435    hits
436}
437
438#[cfg(feature = "std")]
439fn probe_backend_version(executable: &str) -> Option<String> {
440    let probes = [["--version"], ["-version"]];
441    for args in probes {
442        if let Ok(output) = Command::new(executable).args(args).output()
443            && output.status.success()
444        {
445            let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
446            if !text.is_empty() {
447                return Some(text.lines().next().unwrap_or_default().to_string());
448            }
449        }
450    }
451    None
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    fn sample_plan(kind: CommandKind) -> InvocationPlan {
459        InvocationPlan {
460            kind,
461            executable: "tool".into(),
462            args: vec!["input".into()],
463            working_directory: None,
464            input_files: vec!["input".into()],
465        }
466    }
467
468    #[test]
469    fn parses_smt_statuses() {
470        assert_eq!(
471            parse_smt_status("unsat\n(model ...)"),
472            Some(ExecutionStatus::Unsat)
473        );
474        assert_eq!(parse_smt_status("sat"), Some(ExecutionStatus::Sat));
475        assert_eq!(parse_smt_status("unknown"), Some(ExecutionStatus::Unknown));
476        assert_eq!(parse_smt_status("noise"), None);
477
478        let parsed = parse_smt_output("sat\n(model\n  (define-fun x () Int 1)\n)");
479        assert_eq!(parsed.status, Some(ExecutionStatus::Sat));
480        assert!(parsed.model.as_deref().unwrap().contains("define-fun x"));
481
482        let parsed = parse_smt_output("unknown\n(:reason-unknown \"incomplete\")");
483        assert_eq!(parsed.reason_unknown.as_deref(), Some("incomplete"));
484    }
485
486    #[test]
487    fn dry_runner_returns_dry_run_result() {
488        let result = DryRunner.run(&sample_plan(CommandKind::Lean));
489        assert_eq!(result.status, ExecutionStatus::DryRun);
490        assert!(result.stdout.contains("tool input"));
491    }
492
493    #[test]
494    fn successful_smt_result_can_yield_certificate() {
495        let result = ExecutionResult {
496            plan: sample_plan(CommandKind::Smt),
497            status: ExecutionStatus::Unsat,
498            stdout: "unsat".into(),
499            stderr: String::new(),
500            exit_code: Some(0),
501            backend_version: Some("Z3 4.13.0".into()),
502            smt_output: Some(parse_smt_output("unsat")),
503            lean_output: None,
504        };
505        let cert = result
506            .certificate_for_obligation("karpal-core::Semigroup for i32 [associativity]")
507            .expect("successful result should yield certificate");
508        assert_eq!(cert.backend, "smtlib2");
509        assert_eq!(cert.artifact_path.as_deref(), Some("input"));
510        assert_eq!(cert.backend_version.as_deref(), Some("Z3 4.13.0"));
511    }
512
513    #[test]
514    fn verification_policy_is_backend_specific() {
515        assert!(VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Unsat));
516        assert!(!VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Success));
517        assert!(VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Success));
518        assert!(!VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Unsat));
519        assert!(VerificationPolicy::for_kind(CommandKind::Kani).accepts(ExecutionStatus::Success));
520        assert!(!VerificationPolicy::for_kind(CommandKind::Kani).accepts(ExecutionStatus::Unsat));
521        assert_eq!(
522            VerificationPolicy::for_kind(CommandKind::Kani).witness_suffix,
523            "ok"
524        );
525    }
526
527    #[test]
528    fn parses_lean_diagnostics_and_theorem_hits() {
529        let parsed = parse_lean_output(
530            "",
531            "lean/KarpalVerify.lean:7:2: error: unsolved goals in theorem associativity\nlean/KarpalVerify.lean:12:4: warning: declaration uses sorry: left_inverse",
532        );
533
534        assert_eq!(parsed.error_count(), 1);
535        assert_eq!(parsed.diagnostics.len(), 2);
536        assert_eq!(parsed.diagnostics[0].line, Some(7));
537        assert_eq!(
538            parsed.diagnostics[0].theorem_hits,
539            vec!["associativity".to_string()]
540        );
541        let associativity = LeanTheorem {
542            obligation_name: "associativity".into(),
543            theorem_name: "associativity".into(),
544            property: "karpal_proof::IsAssociative".into(),
545            origin_summary: "demo".into(),
546            declaration_start_line: 7,
547            declaration_end_line: 8,
548        };
549        let left_inverse = LeanTheorem {
550            obligation_name: "left_inverse".into(),
551            theorem_name: "left_inverse".into(),
552            property: "karpal_proof::HasLeftInverse".into(),
553            origin_summary: "demo".into(),
554            declaration_start_line: 12,
555            declaration_end_line: 13,
556        };
557
558        assert!(parsed.theorem_hits.iter().any(|hit| hit == "associativity"));
559        assert!(parsed.theorem_hits.iter().any(|hit| hit == "left_inverse"));
560        assert_eq!(parsed.theorem_diagnostics(&associativity).len(), 1);
561        assert!(parsed.has_theorem_failure(&left_inverse));
562    }
563
564    #[test]
565    fn theorem_location_matching_falls_back_to_line_spans() {
566        let parsed = parse_lean_output(
567            "",
568            "lean/KarpalVerify.lean:10:2: error: type mismatch\nlean/KarpalVerify.lean:18:2: warning: declaration uses sorry",
569        );
570        let theorem = LeanTheorem {
571            obligation_name: "left_distributivity".into(),
572            theorem_name: "left_distributivity".into(),
573            property: "karpal_proof::IsLeftDistributive".into(),
574            origin_summary: "demo".into(),
575            declaration_start_line: 9,
576            declaration_end_line: 11,
577        };
578        let other = LeanTheorem {
579            obligation_name: "right_distributivity".into(),
580            theorem_name: "right_distributivity".into(),
581            property: "karpal_proof::IsRightDistributive".into(),
582            origin_summary: "demo".into(),
583            declaration_start_line: 14,
584            declaration_end_line: 16,
585        };
586
587        assert_eq!(parsed.theorem_diagnostics(&theorem).len(), 1);
588        assert!(parsed.has_theorem_failure(&theorem));
589        assert!(!parsed.has_theorem_failure(&other));
590    }
591}
karpal_verify/runner.rs

karpal_verify/
runner.rs