Skip to main content

provable_contracts/
proof_status.rs

1//! Proof status report — cross-contract proof level assessment.
2//!
3//! Computes a hierarchical proof level (L1–L5) for each contract and
4//! aggregates them into kernel equivalence classes that mirror the
5//! `KernelOp` classification from apr-model-qa-playbook.
6//!
7//! Output is consumed by `pv proof-status` (text/JSON) and by the
8//! playbook's `ProofBonus` MQS integration.
9
10use std::collections::BTreeMap;
11use std::fmt;
12
13use serde::{Deserialize, Serialize};
14
15use crate::binding::{BindingRegistry, ImplStatus};
16use crate::schema::Contract;
17
18// ── Proof level hierarchy ─────────────────────────────────────────
19
20/// Hierarchical proof assurance level.
21///
22/// Each level subsumes the ones below it:
23/// - **L1** — Contract YAML exists with equations
24/// - **L2** — Property tested (falsification tests cover obligations)
25/// - **L3** — Kani bounded-model-checked
26/// - **L4** — Lean 4 theorem proved
27/// - **L5** — L4 + all bindings verified as implemented
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
29pub enum ProofLevel {
30    /// Contract YAML exists with equations
31    L1,
32    /// Property tested via falsification tests
33    L2,
34    /// Kani bounded-model-checked
35    L3,
36    /// Lean 4 theorem proved
37    L4,
38    /// Lean proved and all bindings verified
39    L5,
40}
41
42impl fmt::Display for ProofLevel {
43    /// Format the proof level as its string label (L1 through L5)
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        let s = match self {
46            Self::L1 => "L1",
47            Self::L2 => "L2",
48            Self::L3 => "L3",
49            Self::L4 => "L4",
50            Self::L5 => "L5",
51        };
52        write!(f, "{s}")
53    }
54}
55
56// ── Per-contract status ───────────────────────────────────────────
57
58/// Proof status for a single contract.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct ContractProofStatus {
61    /// Contract file stem (e.g. "softmax-kernel-v1")
62    pub stem: String,
63    /// Computed hierarchical proof level
64    pub proof_level: ProofLevel,
65    /// Number of proof obligations in the contract
66    pub obligations: u32,
67    /// Number of falsification tests defined
68    pub falsification_tests: u32,
69    /// Number of Kani bounded-model-checking harnesses
70    pub kani_harnesses: u32,
71    /// Number of obligations proved in Lean 4
72    pub lean_proved: u32,
73    /// Number of bindings with `implemented` status
74    pub bindings_implemented: u32,
75    /// Total number of equation bindings
76    pub bindings_total: u32,
77}
78
79// ── Kernel class summary ──────────────────────────────────────────
80
81/// Summary of proof status for a kernel equivalence class.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct KernelClassSummary {
84    /// Kernel class identifier (A through E)
85    pub label: String,
86    /// Human-readable description of the kernel combination
87    pub description: String,
88    /// Contract stems belonging to this class
89    pub contract_stems: Vec<String>,
90    /// Lowest proof level among class members
91    pub min_proof_level: ProofLevel,
92    /// Whether all class members have full binding coverage
93    pub all_bound: bool,
94}
95
96// ── Full report ───────────────────────────────────────────────────
97
98/// Top-level proof status report, serializable to JSON.
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct ProofStatusReport {
101    /// Report schema version for forward compatibility
102    pub schema_version: String,
103    /// Unix epoch timestamp when the report was generated
104    pub timestamp: String,
105    /// Per-contract proof status entries
106    pub contracts: Vec<ContractProofStatus>,
107    /// Kernel equivalence class summaries
108    pub kernel_classes: Vec<KernelClassSummary>,
109    /// Aggregate totals across all contracts
110    pub totals: ProofStatusTotals,
111}
112
113/// Aggregate totals across all contracts.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ProofStatusTotals {
116    /// Total number of contracts analyzed
117    pub contracts: u32,
118    /// Sum of proof obligations across all contracts
119    pub obligations: u32,
120    /// Sum of falsification tests across all contracts
121    pub falsification_tests: u32,
122    /// Sum of Kani harnesses across all contracts
123    pub kani_harnesses: u32,
124    /// Sum of Lean-proved obligations across all contracts
125    pub lean_proved: u32,
126    /// Sum of implemented bindings across all contracts
127    pub bindings_implemented: u32,
128    /// Sum of total bindings across all contracts
129    pub bindings_total: u32,
130}
131
132// ── Kernel class → contract stem mapping ──────────────────────────
133
134/// Static mapping from kernel equivalence class to contract stems.
135///
136/// Mirrors the `KernelOp` classification from `apr-model-qa-playbook`:
137/// - **A** — GQA + `RMSNorm` + `SiLU` + `SwiGLU` + `RoPE` (Llama/Mistral)
138/// - **B** — MHA + `LayerNorm` + GELU + `AbsPos` (GPT-2/BERT)
139/// - **C** — MHA + `LayerNorm` + GELU + `ALiBi` (BLOOM/MPT)
140/// - **D** — `LayerNorm` + GELU + `SiLU` + GQA (Gemma)
141/// - **E** — `RMSNorm` + `SwiGLU` + GQA (Qwen)
142fn kernel_class_map() -> Vec<(&'static str, &'static str, &'static [&'static str])> {
143    vec![
144        (
145            "A",
146            "GQA+RMSNorm+SiLU+SwiGLU+RoPE",
147            &[
148                "rmsnorm-kernel-v1",
149                "silu-kernel-v1",
150                "swiglu-kernel-v1",
151                "rope-kernel-v1",
152                "gqa-kernel-v1",
153                "softmax-kernel-v1",
154                "matmul-kernel-v1",
155            ],
156        ),
157        (
158            "B",
159            "MHA+LayerNorm+GELU+AbsPos",
160            &[
161                "layernorm-kernel-v1",
162                "gelu-kernel-v1",
163                "attention-kernel-v1",
164                "softmax-kernel-v1",
165                "matmul-kernel-v1",
166                "absolute-position-v1",
167            ],
168        ),
169        (
170            "C",
171            "MHA+LayerNorm+GELU+ALiBi",
172            &[
173                "layernorm-kernel-v1",
174                "gelu-kernel-v1",
175                "attention-kernel-v1",
176                "softmax-kernel-v1",
177                "alibi-kernel-v1",
178                "matmul-kernel-v1",
179            ],
180        ),
181        (
182            "D",
183            "LayerNorm+GELU+SiLU+GQA",
184            &[
185                "layernorm-kernel-v1",
186                "gelu-kernel-v1",
187                "silu-kernel-v1",
188                "gqa-kernel-v1",
189                "softmax-kernel-v1",
190                "matmul-kernel-v1",
191            ],
192        ),
193        (
194            "E",
195            "RMSNorm+SwiGLU+GQA",
196            &[
197                "rmsnorm-kernel-v1",
198                "swiglu-kernel-v1",
199                "gqa-kernel-v1",
200                "softmax-kernel-v1",
201                "matmul-kernel-v1",
202            ],
203        ),
204    ]
205}
206
207// ── Core computation ──────────────────────────────────────────────
208
209/// Returns `true` when Lean theorems exist for this contract.
210fn is_lean_proved(contract: &Contract) -> bool {
211    let yaml_proved = contract
212        .verification_summary
213        .as_ref()
214        .is_some_and(|vs| vs.total_obligations > 0 && vs.l4_lean_proved == vs.total_obligations);
215    if yaml_proved {
216        return true;
217    }
218    // Fallback: scan for actual Lean theorem files
219    count_lean_theorems_for_contract(contract) > 0
220}
221
222/// Returns `true` when all bindings are implemented.
223fn is_fully_bound(binding_status: Option<(u32, u32)>) -> bool {
224    binding_status.is_some_and(|(implemented, total)| total > 0 && implemented == total)
225}
226
227/// Compute the proof level for a single contract.
228///
229/// Derivation rules (highest matching level wins):
230/// - **L5**: all Lean proved AND all bindings implemented
231/// - **L4**: all Lean proved (`verification_summary.l4_lean_proved == total`)
232/// - **L3**: has Kani harnesses AND falsification tests cover obligations
233/// - **L2**: falsification tests count >= obligations count
234/// - **L1**: contract exists with equations
235#[allow(clippy::cast_possible_truncation)]
236pub fn compute_proof_level(contract: &Contract, binding_status: Option<(u32, u32)>) -> ProofLevel {
237    let total_obligations = contract.proof_obligations.len() as u32;
238    let ft_count = contract.falsification_tests.len() as u32;
239    let kani_count = contract.kani_harnesses.len() as u32;
240
241    // Check L4/L5: Lean proved
242    if is_lean_proved(contract) {
243        return if is_fully_bound(binding_status) {
244            ProofLevel::L5
245        } else {
246            ProofLevel::L4
247        };
248    }
249
250    // Check L3: Kani + falsification
251    let has_tests = ft_count >= total_obligations && total_obligations > 0;
252    if kani_count > 0 && has_tests {
253        return ProofLevel::L3;
254    }
255
256    // Check L2: falsification tests cover obligations
257    if has_tests {
258        return ProofLevel::L2;
259    }
260
261    // L1: contract exists with equations
262    ProofLevel::L1
263}
264
265/// Build a set of all sorry-free Lean theorem names from the Theorems/ directory.
266/// Scans once, caches the result in a thread-local for repeated calls.
267fn lean_theorem_names() -> &'static std::collections::HashSet<String> {
268    use std::sync::OnceLock;
269    static CACHE: OnceLock<std::collections::HashSet<String>> = OnceLock::new();
270    CACHE.get_or_init(|| {
271        let mut names = std::collections::HashSet::new();
272        for base in &["lean", "../provable-contracts/lean"] {
273            let search_dir = std::path::Path::new(base).join("ProvableContracts/Theorems");
274            if !search_dir.exists() {
275                continue;
276            }
277            // Walk all domain dirs and collect theorem names
278            if let Ok(domains) = std::fs::read_dir(&search_dir) {
279                for domain_entry in domains.flatten() {
280                    if !domain_entry.path().is_dir() {
281                        continue;
282                    }
283                    let domain_name = domain_entry.file_name().to_string_lossy().to_string();
284                    if let Ok(files) = std::fs::read_dir(domain_entry.path()) {
285                        for file in files.flatten() {
286                            let path = file.path();
287                            if path.extension().is_some_and(|e| e == "lean") {
288                                if let Ok(content) = std::fs::read_to_string(&path) {
289                                    if !content.contains("sorry") {
290                                        let stem = path
291                                            .file_stem()
292                                            .unwrap_or_default()
293                                            .to_string_lossy()
294                                            .to_string();
295                                        // Register domain, stem, and namespace forms
296                                        names.insert(format!("Theorems.{domain_name}"));
297                                        names.insert(domain_name.clone());
298                                        names.insert(domain_name.to_lowercase());
299                                        names.insert(format!("Theorems.{stem}"));
300                                        names.insert(stem.clone());
301                                        names.insert(stem.to_lowercase());
302                                        // Extract theorem names from content
303                                        // e.g., "theorem relu_nonneg" → "Relu"
304                                        for line in content.lines() {
305                                            if let Some(pos) = line.find("theorem ") {
306                                                let rest = &line[pos + 8..];
307                                                let tname: String = rest
308                                                    .chars()
309                                                    .take_while(|c| {
310                                                        c.is_alphanumeric() || *c == '_'
311                                                    })
312                                                    .collect();
313                                                if !tname.is_empty() {
314                                                    // CamelCase the theorem name for matching
315                                                    let camel: String = tname
316                                                        .split('_')
317                                                        .map(|s| {
318                                                            let mut c = s.chars();
319                                                            match c.next() {
320                                                                None => String::new(),
321                                                                Some(f) => f
322                                                                    .to_uppercase()
323                                                                    .chain(c)
324                                                                    .collect(),
325                                                            }
326                                                        })
327                                                        .collect();
328                                                    names.insert(format!("Theorems.{camel}"));
329                                                    names.insert(camel.clone());
330                                                    // Also register first CamelCase word
331                                                    // e.g., "ReluNonneg" → "Relu"
332                                                    let first_word: String = camel
333                                                        .chars()
334                                                        .enumerate()
335                                                        .take_while(|(i, c)| {
336                                                            *i == 0 || !c.is_uppercase()
337                                                        })
338                                                        .map(|(_, c)| c)
339                                                        .collect();
340                                                    if first_word.len() >= 3 {
341                                                        names.insert(format!(
342                                                            "Theorems.{first_word}"
343                                                        ));
344                                                        names.insert(first_word);
345                                                    }
346                                                }
347                                            }
348                                        }
349                                    }
350                                }
351                            }
352                        }
353                    }
354                }
355            }
356            if !names.is_empty() {
357                break;
358            }
359        }
360        names
361    })
362}
363
364/// Count Lean theorems for a contract by matching `lean_theorem` refs against
365/// sorry-free `.lean` files in the Theorems/ directory.
366fn count_lean_theorems_for_contract(contract: &Contract) -> u32 {
367    let theorems = lean_theorem_names();
368    let mut count = 0u32;
369    for eq in contract.equations.values() {
370        if let Some(ref theorem_ref) = eq.lean_theorem {
371            let name = theorem_ref.trim().trim_matches('"');
372            // Try exact match, then without prefix, then lowercase
373            if theorems.contains(name)
374                || theorems.contains(name.strip_prefix("Theorems.").unwrap_or(name))
375                || theorems.contains(&name.to_lowercase())
376            {
377                count += 1;
378            }
379        }
380    }
381    count
382}
383
384/// Build a complete proof status report.
385///
386/// `contracts` is a list of `(stem, &Contract)` pairs.
387/// `binding` is an optional binding registry for binding coverage.
388/// `include_classes` controls whether kernel class summaries are generated.
389#[allow(clippy::cast_possible_truncation)]
390pub fn proof_status_report(
391    contracts: &[(String, &Contract)],
392    binding: Option<&BindingRegistry>,
393    include_classes: bool,
394) -> ProofStatusReport {
395    let mut statuses = Vec::new();
396    let mut totals = ProofStatusTotals {
397        contracts: contracts.len() as u32,
398        obligations: 0,
399        falsification_tests: 0,
400        kani_harnesses: 0,
401        lean_proved: 0,
402        bindings_implemented: 0,
403        bindings_total: 0,
404    };
405
406    for (stem, contract) in contracts {
407        let contract_file = format!("{stem}.yaml");
408
409        let obligations = contract.proof_obligations.len() as u32;
410        let ft_count = contract.falsification_tests.len() as u32;
411        let kani_count = contract.kani_harnesses.len() as u32;
412        // First try YAML self-reported count, then scan actual Lean files
413        let lean_proved = contract
414            .verification_summary
415            .as_ref()
416            .map_or(0, |vs| vs.l4_lean_proved);
417        let lean_proved = if lean_proved == 0 {
418            count_lean_theorems_for_contract(contract)
419        } else {
420            lean_proved
421        };
422
423        // Count bindings for this contract
424        let (b_impl, b_total) = if let Some(reg) = binding {
425            count_bindings(&contract_file, contract, reg)
426        } else {
427            (0, contract.equations.len() as u32)
428        };
429
430        let binding_status = if binding.is_some() {
431            Some((b_impl, b_total))
432        } else {
433            None
434        };
435
436        let proof_level = compute_proof_level(contract, binding_status);
437
438        totals.obligations += obligations;
439        totals.falsification_tests += ft_count;
440        totals.kani_harnesses += kani_count;
441        totals.lean_proved += lean_proved;
442        totals.bindings_implemented += b_impl;
443        totals.bindings_total += b_total;
444
445        statuses.push(ContractProofStatus {
446            stem: stem.clone(),
447            proof_level,
448            obligations,
449            falsification_tests: ft_count,
450            kani_harnesses: kani_count,
451            lean_proved,
452            bindings_implemented: b_impl,
453            bindings_total: b_total,
454        });
455    }
456
457    // Build kernel class summaries
458    let kernel_classes = if include_classes {
459        build_kernel_classes(&statuses)
460    } else {
461        Vec::new()
462    };
463
464    let timestamp = current_timestamp();
465
466    ProofStatusReport {
467        schema_version: "1.0.0".to_string(),
468        timestamp,
469        contracts: statuses,
470        kernel_classes,
471        totals,
472    }
473}
474
475/// Format a proof status report as human-readable text.
476pub fn format_text(report: &ProofStatusReport) -> String {
477    let mut out = String::new();
478
479    out.push_str(&format!(
480        "Proof Status ({} contracts)\n\n",
481        report.totals.contracts
482    ));
483
484    out.push_str(&format!(
485        "  {:<35} {:>5} {:>6} {:>5} {:>4} {:>4} {:>9}\n",
486        "Contract", "Level", "Obligs", "Tests", "Kani", "Lean", "Bindings"
487    ));
488    out.push_str(&format!("  {}\n", "─".repeat(72)));
489
490    for c in &report.contracts {
491        out.push_str(&format!(
492            "  {:<35} {:>5} {:>6} {:>5} {:>4} {:>4} {:>4}/{:<4}\n",
493            truncate(&c.stem, 35),
494            c.proof_level,
495            c.obligations,
496            c.falsification_tests,
497            c.kani_harnesses,
498            c.lean_proved,
499            c.bindings_implemented,
500            c.bindings_total,
501        ));
502    }
503
504    if !report.kernel_classes.is_empty() {
505        out.push_str("\nKernel Classes:\n");
506        for kc in &report.kernel_classes {
507            let bound_str = if kc.all_bound { "all bound" } else { "gaps" };
508            out.push_str(&format!(
509                "  {} ({}): min={}, {} contracts, {}\n",
510                kc.label,
511                kc.description,
512                kc.min_proof_level,
513                kc.contract_stems.len(),
514                bound_str,
515            ));
516        }
517    }
518
519    out.push_str(&format!(
520        "\nTotals: {} obligations, {} tests, {} kani, {} lean proved, {}/{} bound\n",
521        report.totals.obligations,
522        report.totals.falsification_tests,
523        report.totals.kani_harnesses,
524        report.totals.lean_proved,
525        report.totals.bindings_implemented,
526        report.totals.bindings_total,
527    ));
528
529    out
530}
531
532// ── Internal helpers ──────────────────────────────────────────────
533
534/// Count implemented vs total bindings for a contract in the registry
535#[allow(clippy::cast_possible_truncation)]
536pub(crate) fn count_bindings(
537    contract_file: &str,
538    contract: &Contract,
539    binding: &BindingRegistry,
540) -> (u32, u32) {
541    let total = contract.equations.len() as u32;
542    let implemented = binding
543        .bindings_for(contract_file)
544        .iter()
545        .filter(|b| b.status == ImplStatus::Implemented)
546        .count() as u32;
547    (implemented, total)
548}
549
550/// Build kernel equivalence class summaries from per-contract statuses
551fn build_kernel_classes(statuses: &[ContractProofStatus]) -> Vec<KernelClassSummary> {
552    let status_map: BTreeMap<&str, &ContractProofStatus> =
553        statuses.iter().map(|s| (s.stem.as_str(), s)).collect();
554
555    kernel_class_map()
556        .into_iter()
557        .map(|(label, desc, stems)| {
558            let found_stems: Vec<String> = stems
559                .iter()
560                .filter(|s| status_map.contains_key(**s))
561                .map(|s| (*s).to_string())
562                .collect();
563
564            let min_level = found_stems
565                .iter()
566                .filter_map(|s| status_map.get(s.as_str()))
567                .map(|c| c.proof_level)
568                .min()
569                .unwrap_or(ProofLevel::L1);
570
571            let all_bound = !found_stems.is_empty()
572                && found_stems.iter().all(|s| {
573                    status_map.get(s.as_str()).is_some_and(|c| {
574                        c.bindings_total > 0 && c.bindings_implemented == c.bindings_total
575                    })
576                });
577
578            KernelClassSummary {
579                label: label.to_string(),
580                description: desc.to_string(),
581                contract_stems: found_stems,
582                min_proof_level: min_level,
583                all_bound,
584            }
585        })
586        .collect()
587}
588
589/// Truncate a string to at most `max` bytes for column alignment
590fn truncate(s: &str, max: usize) -> &str {
591    if s.len() > max { &s[..max] } else { s }
592}
593
594/// Generate an ISO-8601-style Unix epoch timestamp string
595fn current_timestamp() -> String {
596    // Use a simple ISO-8601 timestamp without external deps.
597    // In production this would use chrono or time crate.
598    // For now we use std::time for a Unix epoch string.
599    let duration = std::time::SystemTime::now()
600        .duration_since(std::time::UNIX_EPOCH)
601        .unwrap_or_default();
602    format!("{}Z", duration.as_secs())
603}
604
605#[cfg(test)]
606#[path = "proof_status_tests.rs"]
607mod tests;