Skip to main content

karpal_verify/
runner.rs

1use crate::{
2    Certificate, CommandKind, InvocationPlan, LeanCertificate, LeanTheorem, SmtCertificate,
3    VerificationBackend,
4};
5
6#[cfg(feature = "std")]
7use std::process::Command;
8
9#[cfg(not(feature = "std"))]
10use alloc::{string::String, vec::Vec};
11#[cfg(feature = "std")]
12use std::{string::String, vec::Vec};
13
14/// Outcome of an external verification run.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum ExecutionStatus {
17    Success,
18    Failure,
19    Sat,
20    Unsat,
21    Unknown,
22    DryRun,
23}
24
25/// Parsed SMT output details.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct SmtOutput {
28    pub status: Option<ExecutionStatus>,
29    pub model: Option<String>,
30    pub reason_unknown: Option<String>,
31}
32
33/// One parsed Lean diagnostic line.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct LeanDiagnostic {
36    pub file: Option<String>,
37    pub line: Option<usize>,
38    pub column: Option<usize>,
39    pub severity: String,
40    pub message: String,
41    pub theorem_hits: Vec<String>,
42}
43
44/// Parsed Lean output details.
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct LeanOutput {
47    pub diagnostics: Vec<LeanDiagnostic>,
48    pub theorem_hits: Vec<String>,
49}
50
51/// Backend-specific verification policy.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct VerificationPolicy {
54    pub kind: CommandKind,
55    pub success_status: ExecutionStatus,
56    pub witness_suffix: &'static str,
57}
58
59impl VerificationPolicy {
60    pub fn for_kind(kind: CommandKind) -> Self {
61        match kind {
62            CommandKind::Smt => Self {
63                kind,
64                success_status: ExecutionStatus::Unsat,
65                witness_suffix: "unsat",
66            },
67            CommandKind::Lean | CommandKind::Kani => Self {
68                kind,
69                success_status: ExecutionStatus::Success,
70                witness_suffix: "ok",
71            },
72        }
73    }
74
75    pub fn accepts(self, status: ExecutionStatus) -> bool {
76        status == self.success_status
77    }
78}
79
80/// Result captured from a verifier invocation.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct ExecutionResult {
83    pub plan: InvocationPlan,
84    pub status: ExecutionStatus,
85    pub stdout: String,
86    pub stderr: String,
87    pub exit_code: Option<i32>,
88    pub backend_version: Option<String>,
89    pub smt_output: Option<SmtOutput>,
90    pub lean_output: Option<LeanOutput>,
91}
92
93impl ExecutionResult {
94    pub fn verification_policy(&self) -> VerificationPolicy {
95        VerificationPolicy::for_kind(self.plan.kind)
96    }
97
98    pub fn is_success(&self) -> bool {
99        self.verification_policy().accepts(self.status)
100    }
101
102    pub fn certificate_for_obligation(&self, obligation: &str) -> Option<Certificate> {
103        if !self.is_success() {
104            return None;
105        }
106
107        let backend = match self.plan.kind {
108            CommandKind::Smt => SmtCertificate::NAME,
109            CommandKind::Lean => LeanCertificate::NAME,
110            CommandKind::Kani => crate::KaniCertificate::NAME,
111        };
112
113        let witness = format!(
114            "{}:{}",
115            self.plan.executable,
116            self.verification_policy().witness_suffix
117        );
118
119        let artifact_path = self.plan.input_files.first().cloned();
120        let mut cert = Certificate::new(backend, obligation, witness);
121        if let Some(version) = &self.backend_version {
122            cert = cert.with_backend_version(version.clone());
123        }
124        if let Some(path) = artifact_path {
125            cert = cert.with_artifact_path(path);
126        }
127        Some(cert)
128    }
129}
130
131/// Runner abstraction for dry-run and local-process verification.
132pub trait VerifierRunner {
133    fn run(&self, plan: &InvocationPlan) -> ExecutionResult;
134
135    fn run_all(&self, plans: &[InvocationPlan]) -> Vec<ExecutionResult> {
136        plans.iter().map(|plan| self.run(plan)).collect()
137    }
138}
139
140/// Dry-run runner that never spawns processes.
141pub struct DryRunner;
142
143impl VerifierRunner for DryRunner {
144    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
145        ExecutionResult {
146            plan: plan.clone(),
147            status: ExecutionStatus::DryRun,
148            stdout: plan.render_shell(),
149            stderr: String::new(),
150            exit_code: None,
151            backend_version: None,
152            smt_output: None,
153            lean_output: None,
154        }
155    }
156}
157
158/// Local process runner using `std::process::Command`.
159#[cfg(feature = "std")]
160pub struct LocalProcessRunner;
161
162#[cfg(feature = "std")]
163impl VerifierRunner for LocalProcessRunner {
164    fn run(&self, plan: &InvocationPlan) -> ExecutionResult {
165        let mut command = Command::new(&plan.executable);
166        command.args(&plan.args);
167        if let Some(dir) = &plan.working_directory {
168            command.current_dir(dir);
169        }
170
171        let backend_version = probe_backend_version(&plan.executable);
172
173        match command.output() {
174            Ok(output) => {
175                let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
176                let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
177                let smt_output = match plan.kind {
178                    CommandKind::Smt => Some(parse_smt_output(&stdout)),
179                    CommandKind::Lean | CommandKind::Kani => None,
180                };
181                let lean_output = match plan.kind {
182                    CommandKind::Lean => Some(parse_lean_output(&stdout, &stderr)),
183                    CommandKind::Smt | CommandKind::Kani => None,
184                };
185                let status = classify_status(plan.kind, output.status.success(), &stdout, &stderr);
186                ExecutionResult {
187                    plan: plan.clone(),
188                    status,
189                    stdout,
190                    stderr,
191                    exit_code: output.status.code(),
192                    backend_version,
193                    smt_output,
194                    lean_output,
195                }
196            }
197            Err(err) => ExecutionResult {
198                plan: plan.clone(),
199                status: ExecutionStatus::Failure,
200                stdout: String::new(),
201                stderr: err.to_string(),
202                exit_code: None,
203                backend_version,
204                smt_output: None,
205                lean_output: None,
206            },
207        }
208    }
209}
210
211fn classify_status(
212    kind: CommandKind,
213    process_success: bool,
214    stdout: &str,
215    stderr: &str,
216) -> ExecutionStatus {
217    match kind {
218        CommandKind::Smt => parse_smt_output(stdout)
219            .status
220            .unwrap_or(if process_success {
221                ExecutionStatus::Success
222            } else {
223                ExecutionStatus::Failure
224            }),
225        CommandKind::Lean => {
226            let parsed = parse_lean_output(stdout, stderr);
227            if process_success && parsed.error_count() == 0 {
228                ExecutionStatus::Success
229            } else {
230                ExecutionStatus::Failure
231            }
232        }
233        CommandKind::Kani => {
234            if process_success {
235                ExecutionStatus::Success
236            } else {
237                ExecutionStatus::Failure
238            }
239        }
240    }
241}
242
243/// Parse SMT solver output into structured details.
244pub fn parse_smt_output(stdout: &str) -> SmtOutput {
245    let mut status = None;
246    let mut model_lines = Vec::new();
247    let mut reason_unknown = None;
248    let mut capture_model = false;
249
250    for line in stdout.lines() {
251        let trimmed = line.trim();
252        match trimmed {
253            "sat" => {
254                status = Some(ExecutionStatus::Sat);
255                capture_model = true;
256                continue;
257            }
258            "unsat" => {
259                status = Some(ExecutionStatus::Unsat);
260                continue;
261            }
262            "unknown" => {
263                status = Some(ExecutionStatus::Unknown);
264                continue;
265            }
266            _ => {}
267        }
268
269        if let Some(rest) = trimmed.strip_prefix("(:reason-unknown") {
270            reason_unknown = Some(
271                rest.trim()
272                    .trim_end_matches(')')
273                    .trim()
274                    .trim_matches('"')
275                    .to_string(),
276            );
277            continue;
278        }
279
280        if capture_model && !trimmed.is_empty() {
281            model_lines.push(trimmed.to_string());
282        }
283    }
284
285    SmtOutput {
286        status,
287        model: (!model_lines.is_empty()).then(|| model_lines.join("\n")),
288        reason_unknown,
289    }
290}
291
292/// Parse the first SMT solver status token from stdout.
293pub fn parse_smt_status(stdout: &str) -> Option<ExecutionStatus> {
294    parse_smt_output(stdout).status
295}
296
297impl LeanOutput {
298    pub fn error_count(&self) -> usize {
299        self.diagnostics
300            .iter()
301            .filter(|diagnostic| diagnostic.severity == "error")
302            .count()
303    }
304
305    pub fn theorem_diagnostics<'a>(&'a self, theorem: &LeanTheorem) -> Vec<&'a LeanDiagnostic> {
306        self.diagnostics
307            .iter()
308            .filter(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
309            .collect()
310    }
311
312    pub fn has_theorem_failure(&self, theorem: &LeanTheorem) -> bool {
313        self.theorem_hits
314            .iter()
315            .any(|hit| hit == &theorem.theorem_name)
316            || self
317                .diagnostics
318                .iter()
319                .any(|diagnostic| diagnostic_matches_theorem(diagnostic, theorem))
320    }
321}
322
323fn diagnostic_matches_theorem(diagnostic: &LeanDiagnostic, theorem: &LeanTheorem) -> bool {
324    if !diagnostic.theorem_hits.is_empty() {
325        diagnostic
326            .theorem_hits
327            .iter()
328            .any(|hit| hit == &theorem.theorem_name)
329    } else {
330        diagnostic
331            .line
332            .is_some_and(|line| theorem.contains_line(line))
333    }
334}
335
336/// Parse Lean stdout/stderr into structured diagnostics and theorem references.
337pub fn parse_lean_output(stdout: &str, stderr: &str) -> LeanOutput {
338    let mut diagnostics = Vec::new();
339    let mut theorem_hits = Vec::new();
340
341    for line in stdout.lines().chain(stderr.lines()) {
342        let trimmed = line.trim();
343        if trimmed.is_empty() {
344            continue;
345        }
346
347        if let Some(diagnostic) = parse_lean_diagnostic_line(trimmed) {
348            theorem_hits.extend(diagnostic.theorem_hits.iter().cloned());
349            diagnostics.push(diagnostic);
350            continue;
351        }
352
353        theorem_hits.extend(extract_theorem_hits(trimmed));
354    }
355
356    theorem_hits.sort();
357    theorem_hits.dedup();
358
359    LeanOutput {
360        diagnostics,
361        theorem_hits,
362    }
363}
364
365fn parse_lean_diagnostic_line(line: &str) -> Option<LeanDiagnostic> {
366    let (location, rest) = line.split_once(": ")?;
367    let severity = ["error", "warning", "info"]
368        .into_iter()
369        .find(|severity| rest.starts_with(severity))?;
370    let message = rest[severity.len()..]
371        .trim_start_matches(':')
372        .trim()
373        .to_string();
374    let theorem_hits = extract_theorem_hits(&message);
375
376    let mut location_parts = location.split(':');
377    let file = location_parts.next()?.to_string();
378    let line_num = location_parts.next()?.parse().ok();
379    let column = location_parts.next()?.parse().ok();
380
381    Some(LeanDiagnostic {
382        file: Some(file),
383        line: line_num,
384        column,
385        severity: severity.into(),
386        message,
387        theorem_hits,
388    })
389}
390
391fn extract_theorem_hits(text: &str) -> Vec<String> {
392    fn normalize(token: &str) -> String {
393        token
394            .trim_matches(|c: char| matches!(c, '`' | '"' | '\''))
395            .trim_end_matches(':')
396            .to_string()
397    }
398
399    let tokens = text
400        .split(|c: char| c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']'))
401        .map(normalize)
402        .filter(|token| !token.is_empty())
403        .collect::<Vec<_>>();
404
405    let mut hits = Vec::new();
406
407    for window in tokens.windows(2) {
408        if let [head, candidate] = window
409            && matches!(head.as_str(), "theorem" | "declaration" | "sorry")
410            && candidate
411                .chars()
412                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
413        {
414            hits.push(candidate.clone());
415        }
416    }
417
418    if hits.is_empty() {
419        for token in tokens {
420            if token
421                .chars()
422                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
423                && token.contains('_')
424            {
425                hits.push(token);
426            }
427        }
428    }
429
430    hits.sort();
431    hits.dedup();
432    hits
433}
434
435#[cfg(feature = "std")]
436fn probe_backend_version(executable: &str) -> Option<String> {
437    let probes = [["--version"], ["-version"]];
438    for args in probes {
439        if let Ok(output) = Command::new(executable).args(args).output()
440            && output.status.success()
441        {
442            let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
443            if !text.is_empty() {
444                return Some(text.lines().next().unwrap_or_default().to_string());
445            }
446        }
447    }
448    None
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454
455    fn sample_plan(kind: CommandKind) -> InvocationPlan {
456        InvocationPlan {
457            kind,
458            executable: "tool".into(),
459            args: vec!["input".into()],
460            working_directory: None,
461            input_files: vec!["input".into()],
462        }
463    }
464
465    #[test]
466    fn parses_smt_statuses() {
467        assert_eq!(
468            parse_smt_status("unsat\n(model ...)"),
469            Some(ExecutionStatus::Unsat)
470        );
471        assert_eq!(parse_smt_status("sat"), Some(ExecutionStatus::Sat));
472        assert_eq!(parse_smt_status("unknown"), Some(ExecutionStatus::Unknown));
473        assert_eq!(parse_smt_status("noise"), None);
474
475        let parsed = parse_smt_output("sat\n(model\n  (define-fun x () Int 1)\n)");
476        assert_eq!(parsed.status, Some(ExecutionStatus::Sat));
477        assert!(parsed.model.as_deref().unwrap().contains("define-fun x"));
478
479        let parsed = parse_smt_output("unknown\n(:reason-unknown \"incomplete\")");
480        assert_eq!(parsed.reason_unknown.as_deref(), Some("incomplete"));
481    }
482
483    #[test]
484    fn dry_runner_returns_dry_run_result() {
485        let result = DryRunner.run(&sample_plan(CommandKind::Lean));
486        assert_eq!(result.status, ExecutionStatus::DryRun);
487        assert!(result.stdout.contains("tool input"));
488    }
489
490    #[test]
491    fn successful_smt_result_can_yield_certificate() {
492        let result = ExecutionResult {
493            plan: sample_plan(CommandKind::Smt),
494            status: ExecutionStatus::Unsat,
495            stdout: "unsat".into(),
496            stderr: String::new(),
497            exit_code: Some(0),
498            backend_version: Some("Z3 4.13.0".into()),
499            smt_output: Some(parse_smt_output("unsat")),
500            lean_output: None,
501        };
502        let cert = result
503            .certificate_for_obligation("karpal-core::Semigroup for i32 [associativity]")
504            .expect("successful result should yield certificate");
505        assert_eq!(cert.backend, "smtlib2");
506        assert_eq!(cert.artifact_path.as_deref(), Some("input"));
507        assert_eq!(cert.backend_version.as_deref(), Some("Z3 4.13.0"));
508    }
509
510    #[test]
511    fn verification_policy_is_backend_specific() {
512        assert!(VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Unsat));
513        assert!(!VerificationPolicy::for_kind(CommandKind::Smt).accepts(ExecutionStatus::Success));
514        assert!(VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Success));
515        assert!(!VerificationPolicy::for_kind(CommandKind::Lean).accepts(ExecutionStatus::Unsat));
516        assert!(VerificationPolicy::for_kind(CommandKind::Kani).accepts(ExecutionStatus::Success));
517        assert!(!VerificationPolicy::for_kind(CommandKind::Kani).accepts(ExecutionStatus::Unsat));
518        assert_eq!(
519            VerificationPolicy::for_kind(CommandKind::Kani).witness_suffix,
520            "ok"
521        );
522    }
523
524    #[test]
525    fn parses_lean_diagnostics_and_theorem_hits() {
526        let parsed = parse_lean_output(
527            "",
528            "lean/KarpalVerify.lean:7:2: error: unsolved goals in theorem associativity\nlean/KarpalVerify.lean:12:4: warning: declaration uses sorry: left_inverse",
529        );
530
531        assert_eq!(parsed.error_count(), 1);
532        assert_eq!(parsed.diagnostics.len(), 2);
533        assert_eq!(parsed.diagnostics[0].line, Some(7));
534        assert_eq!(
535            parsed.diagnostics[0].theorem_hits,
536            vec!["associativity".to_string()]
537        );
538        let associativity = LeanTheorem {
539            obligation_name: "associativity".into(),
540            theorem_name: "associativity".into(),
541            property: "karpal_proof::IsAssociative".into(),
542            origin_summary: "demo".into(),
543            declaration_start_line: 7,
544            declaration_end_line: 8,
545        };
546        let left_inverse = LeanTheorem {
547            obligation_name: "left_inverse".into(),
548            theorem_name: "left_inverse".into(),
549            property: "karpal_proof::HasLeftInverse".into(),
550            origin_summary: "demo".into(),
551            declaration_start_line: 12,
552            declaration_end_line: 13,
553        };
554
555        assert!(parsed.theorem_hits.iter().any(|hit| hit == "associativity"));
556        assert!(parsed.theorem_hits.iter().any(|hit| hit == "left_inverse"));
557        assert_eq!(parsed.theorem_diagnostics(&associativity).len(), 1);
558        assert!(parsed.has_theorem_failure(&left_inverse));
559    }
560
561    #[test]
562    fn theorem_location_matching_falls_back_to_line_spans() {
563        let parsed = parse_lean_output(
564            "",
565            "lean/KarpalVerify.lean:10:2: error: type mismatch\nlean/KarpalVerify.lean:18:2: warning: declaration uses sorry",
566        );
567        let theorem = LeanTheorem {
568            obligation_name: "left_distributivity".into(),
569            theorem_name: "left_distributivity".into(),
570            property: "karpal_proof::IsLeftDistributive".into(),
571            origin_summary: "demo".into(),
572            declaration_start_line: 9,
573            declaration_end_line: 11,
574        };
575        let other = LeanTheorem {
576            obligation_name: "right_distributivity".into(),
577            theorem_name: "right_distributivity".into(),
578            property: "karpal_proof::IsRightDistributive".into(),
579            origin_summary: "demo".into(),
580            declaration_start_line: 14,
581            declaration_end_line: 16,
582        };
583
584        assert_eq!(parsed.theorem_diagnostics(&theorem).len(), 1);
585        assert!(parsed.has_theorem_failure(&theorem));
586        assert!(!parsed.has_theorem_failure(&other));
587    }
588}