karpal_verify/
runner.rs

1use crate::{
2    Certificate, CommandKind, InvocationPlan, LeanCertificate, LeanTheorem, SmtCertificate,
3    VerificationBackend,
4};
5
6#[cfg(feature = "std")]
7use std::process::Command;
8
9#[cfg(not(feature = "std"))]
10use alloc::{string::String, vec::Vec};
11#[cfg(feature = "std")]
12use std::{string::String, vec::Vec};
13
14/// Outcome of an external verification run.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum ExecutionStatus {
17    Success,
18    Failure,
19    Sat,
20    Unsat,
21    Unknown,
22    DryRun,
23}
24
25/// Parsed SMT output details.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct SmtOutput {
28    pub status: Option<ExecutionStatus>,
29    pub model: Option<String>,
30    pub reason_unknown: Option<String>,
31}
32
33/// One parsed Lean diagnostic line.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct LeanDiagnostic {
36    pub file: Option<String>,
37    pub line: Option<usize>,
38    pub column: Option<usize>,
39    pub severity: String,
40    pub message: String,
41    pub theorem_hits: Vec<String>,
42}
43
44/// Parsed Lean output details.
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct LeanOutput {
47    pub diagnostics: Vec<LeanDiagnostic>,
48    pub theorem_hits: Vec<String>,
49}
50
51/// Backend-specific verification policy.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct VerificationPolicy {
54    pub kind: CommandKind,
55    pub success_status: ExecutionStatus,
56    pub witness_suffix: &'static str,
57}
58
59impl VerificationPolicy {
60    pub fn for_kind(kind: CommandKind) -> Self {
61        match kind {
62            CommandKind::Smt => Self {
63                kind,
64                success_status: ExecutionStatus::Unsat,
65                witness_suffix: "unsat",
66            },
67            CommandKind::Lean => Self {
68                kind,
69                success_status: ExecutionStatus::Success,
70                witness_suffix: "ok",
71            },
72        }
73    }
74
75    pub fn accepts(self, status: ExecutionStatus) -> bool {
76        status == self.success_status
77    }
78}
79
80/// Result captured from a verifier invocation.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct ExecutionResult {
83    pub plan: InvocationPlan,
84    pub status: ExecutionStatus,
85    pub stdout: String,
86    pub stderr: String,
87    pub exit_code: Option<i32>,
88    pub backend_version: Option<String>,
89    pub smt_output: Option<SmtOutput>,
90    pub lean_output: Option<LeanOutput>,
91}
92
93impl ExecutionResult {
94    pub fn verification_policy(&self) -> VerificationPolicy {
95        VerificationPolicy::for_kind(self.plan.kind)
96    }
97
98    pub fn is_success(&self) -> bool {
99        self.verification_policy().accepts(self.status)
100    }
101
102    pub fn certificate_for_obligation(&self, obligation: &str) -> Option<Certificate> {
103        if !self.is_success() {
104            return None;
105        }
106
107        let backend = match self.plan.kind {
108            CommandKind::Smt => SmtCertificate::NAME,
109            CommandKind::Lean => LeanCertificate::NAME,
110        };
111
112        let witness = format!(
113            "{}:{}",
114            self.plan.executable,
115            self.verification_policy().witness_suffix
116        );
117
118        let artifact_path = self.plan.input_files.first().cloned();
119        let mut cert = Certificate::new(backend, obligation, witness);
120        if let Some(version) = &self.backend_version {
121            cert = cert.with_backend_version(version.clone());
122        }
123        if let Some(path) = artifact_path {
124            cert = cert.with_artifact_path(path);
125        }
126        Some(cert)
127    }
128}
129
130/// Runner abstraction for dry-run and local-process verification.
131pub trait VerifierRunner {
132    fn run(&self, plan: &InvocationPlan) -> ExecutionResult;
133
134    fn run_all(&self, plans: &[InvocationPlan]) -> Vec<ExecutionResult> {
135        plans.iter().map(|plan| self.run(plan)).collect()
136    }
137}
138
139/// Dry-run runner that never spawns processes.
140pub struct DryRunner;
141
142impl VerifierRunner for DryRunner {
143    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
144        ExecutionResult {
145            plan: plan.clone(),
146            status: ExecutionStatus::DryRun,
147            stdout: plan.render_shell(),
148            stderr: String::new(),
149            exit_code: None,
150            backend_version: None,
151            smt_output: None,
152            lean_output: None,
153        }
154    }
155}
156
157/// Local process runner using `std::process::Command`.
158#[cfg(feature = "std")]
159pub struct LocalProcessRunner;
160
161#[cfg(feature = "std")]
162impl VerifierRunner for LocalProcessRunner {
163    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
164        let mut command = Command::new(&plan.executable);
165        command.args(&plan.args);
166        if let Some(dir) = &plan.working_directory {
167            command.current_dir(dir);
168        }
169
170        let backend_version = probe_backend_version(&plan.executable);
171
172        match command.output() {
173            Ok(output) => {
174                let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
175                let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
176                let smt_output = match plan.kind {
177                    CommandKind::Smt => Some(parse_smt_output(&stdout)),
178                    CommandKind::Lean => None,
179                };
180                let lean_output = match plan.kind {
181                    CommandKind::Smt => None,
182                    CommandKind::Lean => Some(parse_lean_output(&stdout, &stderr)),
183                };
184                let status = classify_status(plan.kind, output.status.success(), &stdout, &stderr);
185                ExecutionResult {
186                    plan: plan.clone(),
187                    status,
188                    stdout,
189                    stderr,
190                    exit_code: output.status.code(),
191                    backend_version,
192                    smt_output,
193                    lean_output,
194                }
195            }
196            Err(err) => ExecutionResult {
197                plan: plan.clone(),
198                status: ExecutionStatus::Failure,
199                stdout: String::new(),
200                stderr: err.to_string(),
201                exit_code: None,
202                backend_version,
203                smt_output: None,
204                lean_output: None,
205            },
206        }
207    }
208}
209
210fn classify_status(
211    kind: CommandKind,
212    process_success: bool,
213    stdout: &str,
214    stderr: &str,
215) -> ExecutionStatus {
216    match kind {
217        CommandKind::Smt => parse_smt_output(stdout)
218            .status
219            .unwrap_or(if process_success {
220                ExecutionStatus::Success
221            } else {
222                ExecutionStatus::Failure
223            }),
224        CommandKind::Lean => {
225            let parsed = parse_lean_output(stdout, stderr);
226            if process_success && parsed.error_count() == 0 {
227                ExecutionStatus::Success
228            } else {
229                ExecutionStatus::Failure
230            }
231        }
232    }
233}
234
235/// Parse SMT solver output into structured details.
236pub fn parse_smt_output(stdout: &str) -> SmtOutput {
237    let mut status = None;
238    let mut model_lines = Vec::new();
239    let mut reason_unknown = None;
240    let mut capture_model = false;
241
242    for line in stdout.lines() {
243        let trimmed = line.trim();
244        match trimmed {
245            "sat" => {
246                status = Some(ExecutionStatus::Sat);
247                capture_model = true;
248                continue;
249            }
250            "unsat" => {
251                status = Some(ExecutionStatus::Unsat);
252                continue;
253            }
254            "unknown" => {
255                status = Some(ExecutionStatus::Unknown);
256                continue;
257            }
258            _ => {}
259        }
260
261        if let Some(rest) = trimmed.strip_prefix("(:reason-unknown") {
262            reason_unknown = Some(
263                rest.trim()
264                    .trim_end_matches(')')
265                    .trim()
266                    .trim_matches('"')
267                    .to_string(),
268            );
269            continue;
270        }
271
272        if capture_model && !trimmed.is_empty() {
273            model_lines.push(trimmed.to_string());
274        }
275    }
276
277    SmtOutput {
278        status,
279        model: (!model_lines.is_empty()).then(|| model_lines.join("\n")),
280        reason_unknown,
281    }
282}
283
284/// Parse the first SMT solver status token from stdout.
285pub fn parse_smt_status(stdout: &str) -> Option<ExecutionStatus> {
286    parse_smt_output(stdout).status
287}
288
289impl LeanOutput {
290    pub fn error_count(&self) -> usize {
291        self.diagnostics
292            .iter()
293            .filter(|diagnostic| diagnostic.severity == "error")
294            .count()
295    }
296
297    pub fn theorem_diagnostics<'a>(&'a self, theorem: &LeanTheorem) -> Vec<&'a LeanDiagnostic> {
298        self.diagnostics
299            .iter()
300            .filter(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
301            .collect()
302    }
303
304    pub fn has_theorem_failure(&self, theorem: &LeanTheorem) -> bool {
305        self.theorem_hits
306            .iter()
307            .any(|hit| hit == &theorem.theorem_name)
308            || self
309                .diagnostics
310                .iter()
311                .any(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
312    }
313}
314
315fn diagnostic_matches_theorem(diagnostic: &LeanDiagnostic, theorem: &LeanTheorem) -> bool {
316    if !diagnostic.theorem_hits.is_empty() {
317        diagnostic
318            .theorem_hits
319            .iter()
320            .any(|hit| hit == &theorem.theorem_name)
321    } else {
322        diagnostic
323            .line
324            .is_some_and(|line| theorem.contains_line(line))
325    }
326}
327
328/// Parse Lean stdout/stderr into structured diagnostics and theorem references.
329pub fn parse_lean_output(stdout: &str, stderr: &str) -> LeanOutput {
330    let mut diagnostics = Vec::new();
331    let mut theorem_hits = Vec::new();
332
333    for line in stdout.lines().chain(stderr.lines()) {
334        let trimmed = line.trim();
335        if trimmed.is_empty() {
336            continue;
337        }
338
339        if let Some(diagnostic) = parse_lean_diagnostic_line(trimmed) {
340            theorem_hits.extend(diagnostic.theorem_hits.iter().cloned());
341            diagnostics.push(diagnostic);
342            continue;
343        }
344
345        theorem_hits.extend(extract_theorem_hits(trimmed));
346    }
347
348    theorem_hits.sort();
349    theorem_hits.dedup();
350
351    LeanOutput {
352        diagnostics,
353        theorem_hits,
354    }
355}
356
357fn parse_lean_diagnostic_line(line: &str) -> Option<LeanDiagnostic> {
358    let (location, rest) = line.split_once(": ")?;
359    let severity = ["error", "warning", "info"]
360        .into_iter()
361        .find(|severity| rest.starts_with(severity))?;
362    let message = rest[severity.len()..]
363        .trim_start_matches(':')
364        .trim()
365        .to_string();
366    let theorem_hits = extract_theorem_hits(&message);
367
368    let mut location_parts = location.split(':');
369    let file = location_parts.next()?.to_string();
370    let line_num = location_parts.next()?.parse().ok();
371    let column = location_parts.next()?.parse().ok();
372
373    Some(LeanDiagnostic {
374        file: Some(file),
375        line: line_num,
376        column,
377        severity: severity.into(),
378        message,
379        theorem_hits,
380    })
381}
382
383fn extract_theorem_hits(text: &str) -> Vec<String> {
384    fn normalize(token: &str) -> String {
385        token
386            .trim_matches(|c: char| matches!(c, '`' | '"' | '\''))
387            .trim_end_matches(':')
388            .to_string()
389    }
390
391    let tokens = text
392        .split(|c: char| c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']'))
393        .map(normalize)
394        .filter(|token| !token.is_empty())
395        .collect::<Vec<_>>();
396
397    let mut hits = Vec::new();
398
399    for window in tokens.windows(2) {
400        if let [head, candidate] = window
401            && matches!(head.as_str(), "theorem" | "declaration" | "sorry")
402            && candidate
403                .chars()
404                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
405        {
406            hits.push(candidate.clone());
407        }
408    }
409
410    if hits.is_empty() {
411        for token in tokens {
412            if token
413                .chars()
414                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
415                && token.contains('_')
416            {
417                hits.push(token);
418            }
419        }
420    }
421
422    hits.sort();
423    hits.dedup();
424    hits
425}
426
427#[cfg(feature = "std")]
428fn probe_backend_version(executable: &str) -> Option<String> {
429    let probes = [["--version"], ["-version"]];
430    for args in probes {
431        if let Ok(output) = Command::new(executable).args(args).output()
432            && output.status.success()
433        {
434            let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
435            if !text.is_empty() {
436                return Some(text.lines().next().unwrap_or_default().to_string());
437            }
438        }
439    }
440    None
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    fn sample_plan(kind: CommandKind) -> InvocationPlan {
448        InvocationPlan {
449            kind,
450            executable: "tool".into(),
451            args: vec!["input".into()],
452            working_directory: None,
453            input_files: vec!["input".into()],
454        }
455    }
456
457    #[test]
458    fn parses_smt_statuses() {
459        assert_eq!(
460            parse_smt_status("unsat\n(model ...)"),
461            Some(ExecutionStatus::Unsat)
462        );
463        assert_eq!(parse_smt_status("sat"), Some(ExecutionStatus::Sat));
464        assert_eq!(parse_smt_status("unknown"), Some(ExecutionStatus::Unknown));
465        assert_eq!(parse_smt_status("noise"), None);
466
467        let parsed = parse_smt_output("sat\n(model\n  (define-fun x () Int 1)\n)");
468        assert_eq!(parsed.status, Some(ExecutionStatus::Sat));
469        assert!(parsed.model.as_deref().unwrap().contains("define-fun x"));
470
471        let parsed = parse_smt_output("unknown\n(:reason-unknown \"incomplete\")");
472        assert_eq!(parsed.reason_unknown.as_deref(), Some("incomplete"));
473    }
474
475    #[test]
476    fn dry_runner_returns_dry_run_result() {
477        let result = DryRunner.run(&sample_plan(CommandKind::Lean));
478        assert_eq!(result.status, ExecutionStatus::DryRun);
479        assert!(result.stdout.contains("tool input"));
480    }
481
482    #[test]
483    fn successful_smt_result_can_yield_certificate() {
484        let result = ExecutionResult {
485            plan: sample_plan(CommandKind::Smt),
486            status: ExecutionStatus::Unsat,
487            stdout: "unsat".into(),
488            stderr: String::new(),
489            exit_code: Some(0),
490            backend_version: Some("Z3 4.13.0".into()),
491            smt_output: Some(parse_smt_output("unsat")),
492            lean_output: None,
493        };
494        let cert = result
495            .certificate_for_obligation("karpal-core::Semigroup for i32 [associativity]")
496            .expect("successful result should yield certificate");
497        assert_eq!(cert.backend, "smtlib2");
498        assert_eq!(cert.artifact_path.as_deref(), Some("input"));
499        assert_eq!(cert.backend_version.as_deref(), Some("Z3 4.13.0"));
500    }
501
502    #[test]
503    fn verification_policy_is_backend_specific() {
504        assert!(VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Unsat));
505        assert!(!VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Success));
506        assert!(VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Success));
507        assert!(!VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Unsat));
508    }
509
510    #[test]
511    fn parses_lean_diagnostics_and_theorem_hits() {
512        let parsed = parse_lean_output(
513            "",
514            "lean/KarpalVerify.lean:7:2: error: unsolved goals in theorem associativity\nlean/KarpalVerify.lean:12:4: warning: declaration uses sorry: left_inverse",
515        );
516
517        assert_eq!(parsed.error_count(), 1);
518        assert_eq!(parsed.diagnostics.len(), 2);
519        assert_eq!(parsed.diagnostics[0].line, Some(7));
520        assert_eq!(
521            parsed.diagnostics[0].theorem_hits,
522            vec!["associativity".to_string()]
523        );
524        let associativity = LeanTheorem {
525            obligation_name: "associativity".into(),
526            theorem_name: "associativity".into(),
527            property: "karpal_proof::IsAssociative".into(),
528            origin_summary: "demo".into(),
529            declaration_start_line: 7,
530            declaration_end_line: 8,
531        };
532        let left_inverse = LeanTheorem {
533            obligation_name: "left_inverse".into(),
534            theorem_name: "left_inverse".into(),
535            property: "karpal_proof::HasLeftInverse".into(),
536            origin_summary: "demo".into(),
537            declaration_start_line: 12,
538            declaration_end_line: 13,
539        };
540
541        assert!(parsed.theorem_hits.iter().any(|hit| hit == "associativity"));
542        assert!(parsed.theorem_hits.iter().any(|hit| hit == "left_inverse"));
543        assert_eq!(parsed.theorem_diagnostics(&associativity).len(), 1);
544        assert!(parsed.has_theorem_failure(&left_inverse));
545    }
546
547    #[test]
548    fn theorem_location_matching_falls_back_to_line_spans() {
549        let parsed = parse_lean_output(
550            "",
551            "lean/KarpalVerify.lean:10:2: error: type mismatch\nlean/KarpalVerify.lean:18:2: warning: declaration uses sorry",
552        );
553        let theorem = LeanTheorem {
554            obligation_name: "left_distributivity".into(),
555            theorem_name: "left_distributivity".into(),
556            property: "karpal_proof::IsLeftDistributive".into(),
557            origin_summary: "demo".into(),
558            declaration_start_line: 9,
559            declaration_end_line: 11,
560        };
561        let other = LeanTheorem {
562            obligation_name: "right_distributivity".into(),
563            theorem_name: "right_distributivity".into(),
564            property: "karpal_proof::IsRightDistributive".into(),
565            origin_summary: "demo".into(),
566            declaration_start_line: 14,
567            declaration_end_line: 16,
568        };
569
570        assert_eq!(parsed.theorem_diagnostics(&theorem).len(), 1);
571        assert!(parsed.has_theorem_failure(&theorem));
572        assert!(!parsed.has_theorem_failure(&other));
573    }
574}
karpal_verify/runner.rs

karpal_verify/
runner.rs