oxiz_proof/
checker.rs

1//! Proof checking infrastructure.
2//!
3//! This module provides validation and verification of proof steps,
4//! ensuring that proof derivations are sound.
5//!
6//! ## Features
7//!
8//! - **Syntactic checks**: Validate proof structure (premises exist, etc.)
9//! - **Rule validation**: Check that rule applications are well-formed
10//! - **Extensible**: Support for custom rule validators
11//!
12//! ## Example
13//!
14//! ```
15//! use oxiz_proof::checker::{ProofChecker, CheckResult};
16//! use oxiz_proof::theory::{TheoryProof, TheoryRule};
17//!
18//! let mut proof = TheoryProof::new();
19//! proof.refl("x");
20//!
21//! let mut checker = ProofChecker::new();
22//! let result = checker.check_theory_proof(&proof);
23//! assert!(result.is_valid());
24//! ```
25
26use crate::alethe::{AletheProof, AletheRule, AletheStep};
27use crate::theory::{TheoryProof, TheoryRule, TheoryStepId};
28use std::collections::HashSet;
29use std::fmt;
30
31/// Result of checking a proof step
32#[derive(Debug, Clone)]
33pub enum CheckResult {
34    /// The proof is valid
35    Valid,
36    /// The proof has an error at a specific step
37    Invalid {
38        /// Step index where the error occurred
39        step: u32,
40        /// Description of the error
41        error: CheckError,
42    },
43    /// Multiple errors were found
44    MultipleErrors(Vec<(u32, CheckError)>),
45}
46
47impl CheckResult {
48    /// Check if the result indicates a valid proof
49    #[must_use]
50    pub fn is_valid(&self) -> bool {
51        matches!(self, Self::Valid)
52    }
53
54    /// Get the error if there is one
55    #[must_use]
56    pub fn error(&self) -> Option<&CheckError> {
57        match self {
58            Self::Invalid { error, .. } => Some(error),
59            _ => None,
60        }
61    }
62}
63
64impl fmt::Display for CheckResult {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        match self {
67            Self::Valid => write!(f, "✓ Proof is valid"),
68            Self::Invalid { step, error } => {
69                writeln!(f, "✗ Proof is invalid")?;
70                writeln!(f, "  Step: {}", step)?;
71                writeln!(f, "  [{:?}] {}", error.severity(), error)?;
72                if let Some(suggestion) = error.suggestion() {
73                    writeln!(f, "  Suggestion: {}", suggestion)?;
74                }
75                Ok(())
76            }
77            Self::MultipleErrors(errors) => {
78                writeln!(f, "✗ Proof has {} error(s):", errors.len())?;
79                for (step, error) in errors {
80                    writeln!(f, "\n  Step {}:", step)?;
81                    writeln!(f, "    [{:?}] {}", error.severity(), error)?;
82                    if let Some(suggestion) = error.suggestion() {
83                        writeln!(f, "    Suggestion: {}", suggestion)?;
84                    }
85                }
86                Ok(())
87            }
88        }
89    }
90}
91
92/// Types of proof checking errors
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum CheckError {
95    /// A referenced premise doesn't exist
96    MissingPremise(u32),
97    /// Wrong number of premises for the rule
98    WrongPremiseCount { expected: usize, got: usize },
99    /// Wrong number of arguments for the rule
100    WrongArgumentCount { expected: usize, got: usize },
101    /// Rule is not applicable
102    RuleNotApplicable(String),
103    /// Conclusion doesn't follow from premises
104    InvalidConclusion(String),
105    /// Cyclic dependency in proof
106    CyclicDependency,
107    /// Empty proof
108    EmptyProof,
109    /// Malformed term in proof
110    MalformedTerm(String),
111    /// Unknown rule
112    UnknownRule(String),
113    /// Custom error
114    Custom(String),
115}
116
117impl fmt::Display for CheckError {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        match self {
120            Self::MissingPremise(id) => {
121                write!(
122                    f,
123                    "Missing premise: step {} does not exist or has not been defined yet. \
124                     Premises must be defined before they are referenced.",
125                    id
126                )
127            }
128            Self::WrongPremiseCount { expected, got } => {
129                write!(
130                    f,
131                    "Wrong premise count: rule requires {} premise(s), but {} {} provided. \
132                     Check the rule definition for the correct number of premises.",
133                    expected,
134                    got,
135                    if *got == 1 { "was" } else { "were" }
136                )
137            }
138            Self::WrongArgumentCount { expected, got } => {
139                write!(
140                    f,
141                    "Wrong argument count: rule expects {} argument(s), but {} {} provided. \
142                     Ensure all required arguments are supplied.",
143                    expected,
144                    got,
145                    if *got == 1 { "was" } else { "were" }
146                )
147            }
148            Self::RuleNotApplicable(msg) => {
149                write!(
150                    f,
151                    "Rule not applicable: {}. \
152                     Verify that the rule's preconditions are met.",
153                    msg
154                )
155            }
156            Self::InvalidConclusion(msg) => {
157                write!(
158                    f,
159                    "Invalid conclusion: {}. \
160                     The conclusion does not follow from the premises using the specified rule.",
161                    msg
162                )
163            }
164            Self::CyclicDependency => {
165                write!(
166                    f,
167                    "Cyclic dependency detected in proof structure. \
168                     A proof step cannot depend on itself (directly or indirectly). \
169                     Check for circular references in premise chains."
170                )
171            }
172            Self::EmptyProof => {
173                write!(
174                    f,
175                    "Empty proof: no proof steps provided. \
176                     A valid proof must contain at least one step."
177                )
178            }
179            Self::MalformedTerm(msg) => {
180                write!(
181                    f,
182                    "Malformed term: {}. \
183                     Check for syntax errors or invalid term structure.",
184                    msg
185                )
186            }
187            Self::UnknownRule(name) => {
188                write!(
189                    f,
190                    "Unknown rule: '{}'. \
191                     This rule is not recognized by the proof checker. \
192                     Verify the rule name is spelled correctly.",
193                    name
194                )
195            }
196            Self::Custom(msg) => write!(f, "{}", msg),
197        }
198    }
199}
200
201impl std::error::Error for CheckError {}
202
203impl CheckError {
204    /// Get a suggestion for fixing this error
205    #[must_use]
206    pub fn suggestion(&self) -> Option<&str> {
207        match self {
208            Self::MissingPremise(_) => {
209                Some("Ensure all premise steps are added to the proof before referencing them.")
210            }
211            Self::WrongPremiseCount { .. } => {
212                Some("Consult the rule documentation for the correct number of premises.")
213            }
214            Self::WrongArgumentCount { .. } => {
215                Some("Review the rule definition to determine which arguments are required.")
216            }
217            Self::RuleNotApplicable(_) => {
218                Some("Check that the premise types match what the rule expects.")
219            }
220            Self::InvalidConclusion(_) => {
221                Some("Verify that the rule is being applied correctly to the given premises.")
222            }
223            Self::CyclicDependency => {
224                Some("Reorganize proof steps to eliminate circular dependencies.")
225            }
226            Self::EmptyProof => Some("Add at least one axiom or assumption to the proof."),
227            Self::MalformedTerm(_) => Some("Check the term syntax against the expected format."),
228            Self::UnknownRule(_) => {
229                Some("Use a standard proof rule or define a custom rule handler.")
230            }
231            Self::Custom(_) => None,
232        }
233    }
234
235    /// Get the severity level of this error
236    #[must_use]
237    pub fn severity(&self) -> ErrorSeverity {
238        match self {
239            Self::CyclicDependency | Self::EmptyProof => ErrorSeverity::Critical,
240            Self::MissingPremise(_) | Self::InvalidConclusion(_) | Self::UnknownRule(_) => {
241                ErrorSeverity::Error
242            }
243            Self::WrongPremiseCount { .. }
244            | Self::WrongArgumentCount { .. }
245            | Self::RuleNotApplicable(_)
246            | Self::MalformedTerm(_) => ErrorSeverity::Warning,
247            Self::Custom(_) => ErrorSeverity::Error,
248        }
249    }
250}
251
252/// Error severity level
253#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
254pub enum ErrorSeverity {
255    /// Warning - proof may be acceptable
256    Warning,
257    /// Error - proof is invalid
258    Error,
259    /// Critical - proof structure is fundamentally broken
260    Critical,
261}
262
263impl fmt::Display for ErrorSeverity {
264    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
265        match self {
266            Self::Warning => write!(f, "WARNING"),
267            Self::Error => write!(f, "ERROR"),
268            Self::Critical => write!(f, "CRITICAL"),
269        }
270    }
271}
272
273/// Configuration for proof checking
274#[derive(Debug, Clone, Default)]
275pub struct CheckerConfig {
276    /// Whether to continue checking after the first error
277    pub continue_on_error: bool,
278    /// Whether to verify conclusion content (not just structure)
279    pub verify_conclusions: bool,
280    /// Whether to allow cyclic dependencies (for some proof formats)
281    pub allow_cycles: bool,
282}
283
284/// Proof checker for verifying proof derivations
285#[derive(Debug, Default)]
286pub struct ProofChecker {
287    /// Configuration
288    config: CheckerConfig,
289    /// Collected errors
290    errors: Vec<(u32, CheckError)>,
291    /// Validated step IDs (for cycle detection)
292    validated: HashSet<u32>,
293    /// Currently being validated (for cycle detection)
294    in_progress: HashSet<u32>,
295}
296
297impl ProofChecker {
298    /// Create a new proof checker with default configuration
299    #[must_use]
300    pub fn new() -> Self {
301        Self::default()
302    }
303
304    /// Create a proof checker with custom configuration
305    #[must_use]
306    pub fn with_config(config: CheckerConfig) -> Self {
307        Self {
308            config,
309            errors: Vec::new(),
310            validated: HashSet::new(),
311            in_progress: HashSet::new(),
312        }
313    }
314
315    /// Reset the checker state
316    pub fn reset(&mut self) {
317        self.errors.clear();
318        self.validated.clear();
319        self.in_progress.clear();
320    }
321
322    /// Check a theory proof
323    pub fn check_theory_proof(&mut self, proof: &TheoryProof) -> CheckResult {
324        self.reset();
325
326        if proof.is_empty() {
327            return CheckResult::Invalid {
328                step: 0,
329                error: CheckError::EmptyProof,
330            };
331        }
332
333        // Check each step
334        for step in proof.steps() {
335            if let Err(error) = self.check_theory_step(proof, step.id) {
336                if self.config.continue_on_error {
337                    self.errors.push((step.id.0, error));
338                } else {
339                    return CheckResult::Invalid {
340                        step: step.id.0,
341                        error,
342                    };
343                }
344            }
345        }
346
347        if self.errors.is_empty() {
348            CheckResult::Valid
349        } else {
350            CheckResult::MultipleErrors(std::mem::take(&mut self.errors))
351        }
352    }
353
354    /// Check a single theory proof step
355    fn check_theory_step(
356        &mut self,
357        proof: &TheoryProof,
358        step_id: TheoryStepId,
359    ) -> Result<(), CheckError> {
360        // Cycle detection
361        if !self.config.allow_cycles {
362            if self.in_progress.contains(&step_id.0) {
363                return Err(CheckError::CyclicDependency);
364            }
365            if self.validated.contains(&step_id.0) {
366                return Ok(());
367            }
368            self.in_progress.insert(step_id.0);
369        }
370
371        let step = proof
372            .get_step(step_id)
373            .ok_or(CheckError::MissingPremise(step_id.0))?;
374
375        // Check premises exist
376        for premise_id in &step.premises {
377            if proof.get_step(*premise_id).is_none() {
378                return Err(CheckError::MissingPremise(premise_id.0));
379            }
380
381            // Recursively check premises
382            if !self.config.allow_cycles {
383                self.check_theory_step(proof, *premise_id)?;
384            }
385        }
386
387        // Check rule-specific requirements
388        self.check_theory_rule(&step.rule, step.premises.len(), step.args.len())?;
389
390        // Mark as validated
391        if !self.config.allow_cycles {
392            self.in_progress.remove(&step_id.0);
393            self.validated.insert(step_id.0);
394        }
395
396        Ok(())
397    }
398
399    /// Check rule-specific requirements for theory proofs
400    fn check_theory_rule(
401        &self,
402        rule: &TheoryRule,
403        premise_count: usize,
404        arg_count: usize,
405    ) -> Result<(), CheckError> {
406        match rule {
407            // Rules with no premises
408            TheoryRule::Refl if premise_count != 0 => {
409                return Err(CheckError::WrongPremiseCount {
410                    expected: 0,
411                    got: premise_count,
412                });
413            }
414
415            // Rules with exactly one premise
416            TheoryRule::Symm if premise_count != 1 => {
417                return Err(CheckError::WrongPremiseCount {
418                    expected: 1,
419                    got: premise_count,
420                });
421            }
422
423            // Rules with exactly two premises
424            TheoryRule::Trans if premise_count != 2 => {
425                return Err(CheckError::WrongPremiseCount {
426                    expected: 2,
427                    got: premise_count,
428                });
429            }
430
431            // Rules with at least one premise (congruence needs arg equalities)
432            TheoryRule::Cong => {
433                // Congruence can have zero premises for nullary functions
434            }
435
436            // Farkas lemma needs at least 2 premises
437            TheoryRule::LaGeneric if premise_count < 2 => {
438                return Err(CheckError::WrongPremiseCount {
439                    expected: 2,
440                    got: premise_count,
441                });
442            }
443
444            // Array read-write-same is an axiom
445            TheoryRule::ArrReadWrite1 if premise_count != 0 => {
446                return Err(CheckError::WrongPremiseCount {
447                    expected: 0,
448                    got: premise_count,
449                });
450            }
451
452            // Array read-write-different needs proof of i ≠ j
453            TheoryRule::ArrReadWrite2 if premise_count != 1 => {
454                return Err(CheckError::WrongPremiseCount {
455                    expected: 1,
456                    got: premise_count,
457                });
458            }
459
460            // LaMult needs coefficient argument
461            TheoryRule::LaMult if arg_count < 1 => {
462                return Err(CheckError::WrongArgumentCount {
463                    expected: 1,
464                    got: arg_count,
465                });
466            }
467
468            // Other rules - flexible checking
469            _ => {}
470        }
471
472        Ok(())
473    }
474
475    /// Check an Alethe proof
476    pub fn check_alethe_proof(&mut self, proof: &AletheProof) -> CheckResult {
477        self.reset();
478
479        if proof.is_empty() {
480            return CheckResult::Invalid {
481                step: 0,
482                error: CheckError::EmptyProof,
483            };
484        }
485
486        let steps = proof.steps();
487        let mut step_indices: HashSet<u32> = HashSet::new();
488
489        // First pass: collect all step indices
490        for step in steps {
491            match step {
492                AletheStep::Assume { index, .. } => {
493                    step_indices.insert(*index);
494                }
495                AletheStep::Step { index, .. } => {
496                    step_indices.insert(*index);
497                }
498                AletheStep::Anchor { step: index, .. } => {
499                    step_indices.insert(*index);
500                }
501                AletheStep::DefineFun { .. } => {}
502            }
503        }
504
505        // Second pass: check each step
506        for (idx, step) in steps.iter().enumerate() {
507            if let Err(error) = self.check_alethe_step(step, &step_indices) {
508                if self.config.continue_on_error {
509                    self.errors.push((idx as u32, error));
510                } else {
511                    return CheckResult::Invalid {
512                        step: idx as u32,
513                        error,
514                    };
515                }
516            }
517        }
518
519        if self.errors.is_empty() {
520            CheckResult::Valid
521        } else {
522            CheckResult::MultipleErrors(std::mem::take(&mut self.errors))
523        }
524    }
525
526    /// Check a single Alethe proof step
527    fn check_alethe_step(
528        &self,
529        step: &AletheStep,
530        step_indices: &HashSet<u32>,
531    ) -> Result<(), CheckError> {
532        match step {
533            AletheStep::Assume { .. } => {
534                // Assumptions don't need checking
535                Ok(())
536            }
537
538            AletheStep::Step { rule, premises, .. } => {
539                // Check all premises exist
540                for premise in premises {
541                    if !step_indices.contains(premise) {
542                        return Err(CheckError::MissingPremise(*premise));
543                    }
544                }
545
546                // Check rule-specific requirements
547                self.check_alethe_rule(rule, premises.len())
548            }
549
550            AletheStep::Anchor { .. } => {
551                // Anchors don't need checking
552                Ok(())
553            }
554
555            AletheStep::DefineFun { .. } => {
556                // Definitions don't need checking
557                Ok(())
558            }
559        }
560    }
561
562    /// Check rule-specific requirements for Alethe proofs
563    fn check_alethe_rule(&self, rule: &AletheRule, premise_count: usize) -> Result<(), CheckError> {
564        match rule {
565            // Resolution needs at least 2 premises
566            AletheRule::Resolution if premise_count < 2 => {
567                return Err(CheckError::WrongPremiseCount {
568                    expected: 2,
569                    got: premise_count,
570                });
571            }
572
573            // Reflexivity is an axiom
574            AletheRule::Refl if premise_count != 0 => {
575                return Err(CheckError::WrongPremiseCount {
576                    expected: 0,
577                    got: premise_count,
578                });
579            }
580
581            // Transitivity needs at least 2 premises
582            AletheRule::Trans if premise_count < 2 => {
583                return Err(CheckError::WrongPremiseCount {
584                    expected: 2,
585                    got: premise_count,
586                });
587            }
588
589            // Other rules - flexible checking
590            _ => {}
591        }
592
593        Ok(())
594    }
595}
596
597/// Trait for types that can be checked for validity
598pub trait Checkable {
599    /// Check if the proof is valid
600    fn check(&self) -> CheckResult;
601
602    /// Check using a custom checker configuration
603    fn check_with_config(&self, config: CheckerConfig) -> CheckResult;
604}
605
606impl Checkable for TheoryProof {
607    fn check(&self) -> CheckResult {
608        ProofChecker::new().check_theory_proof(self)
609    }
610
611    fn check_with_config(&self, config: CheckerConfig) -> CheckResult {
612        ProofChecker::with_config(config).check_theory_proof(self)
613    }
614}
615
616impl Checkable for AletheProof {
617    fn check(&self) -> CheckResult {
618        ProofChecker::new().check_alethe_proof(self)
619    }
620
621    fn check_with_config(&self, config: CheckerConfig) -> CheckResult {
622        ProofChecker::with_config(config).check_alethe_proof(self)
623    }
624}
625
626#[cfg(test)]
627mod tests {
628    use super::*;
629    #[allow(unused_imports)]
630    use crate::theory::ProofTerm;
631
632    #[test]
633    fn test_check_result_is_valid() {
634        assert!(CheckResult::Valid.is_valid());
635        assert!(
636            !CheckResult::Invalid {
637                step: 0,
638                error: CheckError::EmptyProof
639            }
640            .is_valid()
641        );
642    }
643
644    #[test]
645    fn test_check_error_display() {
646        let err = CheckError::MissingPremise(5);
647        let msg = format!("{}", err);
648        assert!(msg.contains("5"));
649        assert!(msg.contains("does not exist"));
650
651        let err = CheckError::WrongPremiseCount {
652            expected: 2,
653            got: 1,
654        };
655        let msg = format!("{}", err);
656        assert!(msg.contains("requires 2"));
657        assert!(msg.contains("1 was provided"));
658    }
659
660    #[test]
661    fn test_theory_proof_empty() {
662        let proof = TheoryProof::new();
663        let result = proof.check();
664        assert!(!result.is_valid());
665        assert!(matches!(result.error(), Some(CheckError::EmptyProof)));
666    }
667
668    #[test]
669    fn test_theory_proof_valid_refl() {
670        let mut proof = TheoryProof::new();
671        proof.refl("x");
672
673        let result = proof.check();
674        assert!(result.is_valid());
675    }
676
677    #[test]
678    fn test_theory_proof_valid_transitivity() {
679        let mut proof = TheoryProof::new();
680        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
681        let s2 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= b c)");
682        proof.trans(s1, s2, "a", "c");
683
684        let result = proof.check();
685        assert!(result.is_valid());
686    }
687
688    #[test]
689    fn test_theory_proof_invalid_trans_premises() {
690        let mut proof = TheoryProof::new();
691        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
692        // Trans with only 1 premise should fail
693        proof.add_step(TheoryRule::Trans, vec![s1], "(= a c)");
694
695        let result = proof.check();
696        assert!(!result.is_valid());
697    }
698
699    #[test]
700    fn test_theory_proof_missing_premise() {
701        let mut proof = TheoryProof::new();
702        // Reference a non-existent premise
703        proof.add_step(
704            TheoryRule::Trans,
705            vec![TheoryStepId(99), TheoryStepId(100)],
706            "(= a c)",
707        );
708
709        let result = proof.check();
710        assert!(!result.is_valid());
711        assert!(matches!(
712            result.error(),
713            Some(CheckError::MissingPremise(_))
714        ));
715    }
716
717    #[test]
718    fn test_alethe_proof_empty() {
719        let proof = AletheProof::new();
720        let result = proof.check();
721        assert!(!result.is_valid());
722    }
723
724    #[test]
725    fn test_alethe_proof_valid() {
726        let mut proof = AletheProof::new();
727        proof.assume("p");
728        proof.step_simple(vec![], AletheRule::Refl);
729
730        let result = proof.check();
731        assert!(result.is_valid());
732    }
733
734    #[test]
735    fn test_checker_continue_on_error() {
736        let mut proof = TheoryProof::new();
737        // Multiple invalid steps
738        proof.add_step(TheoryRule::Trans, vec![TheoryStepId(99)], "(= a b)");
739        proof.add_step(TheoryRule::Trans, vec![TheoryStepId(100)], "(= c d)");
740
741        let config = CheckerConfig {
742            continue_on_error: true,
743            ..Default::default()
744        };
745
746        let result = proof.check_with_config(config);
747        assert!(matches!(result, CheckResult::MultipleErrors(_)));
748    }
749
750    #[test]
751    fn test_checker_refl_with_premises_fails() {
752        let mut proof = TheoryProof::new();
753        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
754        // Refl should have no premises
755        proof.add_step(TheoryRule::Refl, vec![s1], "(= x x)");
756
757        let result = proof.check();
758        assert!(!result.is_valid());
759    }
760
761    #[test]
762    fn test_checker_farkas_needs_premises() {
763        let mut proof = TheoryProof::new();
764        // Farkas with only 1 premise should fail
765        let s1 = proof.add_axiom(TheoryRule::Custom("bound".into()), "(>= x 0)");
766        proof.add_step(TheoryRule::LaGeneric, vec![s1], "false");
767
768        let result = proof.check();
769        assert!(!result.is_valid());
770    }
771
772    #[test]
773    fn test_checker_arr_read_write_1_axiom() {
774        let mut proof = TheoryProof::new();
775        // ArrReadWrite1 is an axiom (no premises)
776        proof.add_axiom(TheoryRule::ArrReadWrite1, "(= (select (store a i v) i) v)");
777
778        let result = proof.check();
779        assert!(result.is_valid());
780    }
781
782    #[test]
783    fn test_checker_arr_read_write_2_needs_premise() {
784        let mut proof = TheoryProof::new();
785        // ArrReadWrite2 needs proof of i ≠ j
786        proof.add_axiom(
787            TheoryRule::ArrReadWrite2,
788            "(= (select (store a i v) j) (select a j))",
789        );
790
791        let result = proof.check();
792        assert!(!result.is_valid());
793    }
794}
oxiz_proof/checker.rs

oxiz_proof/
checker.rs