oxiz_proof/
checker.rs

1//! Proof checking infrastructure.
2//!
3//! This module provides validation and verification of proof steps,
4//! ensuring that proof derivations are sound.
5//!
6//! ## Features
7//!
8//! - **Syntactic checks**: Validate proof structure (premises exist, etc.)
9//! - **Rule validation**: Check that rule applications are well-formed
10//! - **Extensible**: Support for custom rule validators
11//!
12//! ## Example
13//!
14//! ```
15//! use oxiz_proof::checker::{ProofChecker, CheckResult};
16//! use oxiz_proof::theory::{TheoryProof, TheoryRule};
17//!
18//! let mut proof = TheoryProof::new();
19//! proof.refl("x");
20//!
21//! let mut checker = ProofChecker::new();
22//! let result = checker.check_theory_proof(&proof);
23//! assert!(result.is_valid());
24//! ```
25
26use crate::alethe::{AletheProof, AletheRule, AletheStep};
27use crate::theory::{TheoryProof, TheoryRule, TheoryStepId};
28use std::collections::HashSet;
29use std::fmt;
30
31/// Result of checking a proof step
32#[derive(Debug, Clone)]
33pub enum CheckResult {
34    /// The proof is valid
35    Valid,
36    /// The proof has an error at a specific step
37    Invalid {
38        /// Step index where the error occurred
39        step: u32,
40        /// Description of the error
41        error: CheckError,
42    },
43    /// Multiple errors were found
44    MultipleErrors(Vec<(u32, CheckError)>),
45}
46
47impl CheckResult {
48    /// Check if the result indicates a valid proof
49    #[must_use]
50    pub fn is_valid(&self) -> bool {
51        matches!(self, Self::Valid)
52    }
53
54    /// Get the error if there is one
55    #[must_use]
56    pub fn error(&self) -> Option<&CheckError> {
57        match self {
58            Self::Invalid { error, .. } => Some(error),
59            _ => None,
60        }
61    }
62}
63
64impl fmt::Display for CheckResult {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        match self {
67            Self::Valid => write!(f, "✓ Proof is valid"),
68            Self::Invalid { step, error } => {
69                writeln!(f, "✗ Proof is invalid")?;
70                writeln!(f, "  Step: {}", step)?;
71                writeln!(f, "  [{:?}] {}", error.severity(), error)?;
72                if let Some(suggestion) = error.suggestion() {
73                    writeln!(f, "  Suggestion: {}", suggestion)?;
74                }
75                Ok(())
76            }
77            Self::MultipleErrors(errors) => {
78                writeln!(f, "✗ Proof has {} error(s):", errors.len())?;
79                for (step, error) in errors {
80                    writeln!(f, "\n  Step {}:", step)?;
81                    writeln!(f, "    [{:?}] {}", error.severity(), error)?;
82                    if let Some(suggestion) = error.suggestion() {
83                        writeln!(f, "    Suggestion: {}", suggestion)?;
84                    }
85                }
86                Ok(())
87            }
88        }
89    }
90}
91
92/// Types of proof checking errors
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum CheckError {
95    /// A referenced premise doesn't exist
96    MissingPremise(u32),
97    /// Wrong number of premises for the rule
98    WrongPremiseCount { expected: usize, got: usize },
99    /// Wrong number of arguments for the rule
100    WrongArgumentCount { expected: usize, got: usize },
101    /// Rule is not applicable
102    RuleNotApplicable(String),
103    /// Conclusion doesn't follow from premises
104    InvalidConclusion(String),
105    /// Cyclic dependency in proof
106    CyclicDependency,
107    /// Empty proof
108    EmptyProof,
109    /// Malformed term in proof
110    MalformedTerm(String),
111    /// Unknown rule
112    UnknownRule(String),
113    /// Custom error
114    Custom(String),
115}
116
117impl fmt::Display for CheckError {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        match self {
120            Self::MissingPremise(id) => {
121                write!(
122                    f,
123                    "Missing premise: step {} does not exist or has not been defined yet. \
124                     Premises must be defined before they are referenced.",
125                    id
126                )
127            }
128            Self::WrongPremiseCount { expected, got } => {
129                write!(
130                    f,
131                    "Wrong premise count: rule requires {} premise(s), but {} {} provided. \
132                     Check the rule definition for the correct number of premises.",
133                    expected,
134                    got,
135                    if *got == 1 { "was" } else { "were" }
136                )
137            }
138            Self::WrongArgumentCount { expected, got } => {
139                write!(
140                    f,
141                    "Wrong argument count: rule expects {} argument(s), but {} {} provided. \
142                     Ensure all required arguments are supplied.",
143                    expected,
144                    got,
145                    if *got == 1 { "was" } else { "were" }
146                )
147            }
148            Self::RuleNotApplicable(msg) => {
149                write!(
150                    f,
151                    "Rule not applicable: {}. \
152                     Verify that the rule's preconditions are met.",
153                    msg
154                )
155            }
156            Self::InvalidConclusion(msg) => {
157                write!(
158                    f,
159                    "Invalid conclusion: {}. \
160                     The conclusion does not follow from the premises using the specified rule.",
161                    msg
162                )
163            }
164            Self::CyclicDependency => {
165                write!(
166                    f,
167                    "Cyclic dependency detected in proof structure. \
168                     A proof step cannot depend on itself (directly or indirectly). \
169                     Check for circular references in premise chains."
170                )
171            }
172            Self::EmptyProof => {
173                write!(
174                    f,
175                    "Empty proof: no proof steps provided. \
176                     A valid proof must contain at least one step."
177                )
178            }
179            Self::MalformedTerm(msg) => {
180                write!(
181                    f,
182                    "Malformed term: {}. \
183                     Check for syntax errors or invalid term structure.",
184                    msg
185                )
186            }
187            Self::UnknownRule(name) => {
188                write!(
189                    f,
190                    "Unknown rule: '{}'. \
191                     This rule is not recognized by the proof checker. \
192                     Verify the rule name is spelled correctly.",
193                    name
194                )
195            }
196            Self::Custom(msg) => write!(f, "{}", msg),
197        }
198    }
199}
200
201impl std::error::Error for CheckError {}
202
203impl CheckError {
204    /// Get a suggestion for fixing this error
205    #[must_use]
206    pub fn suggestion(&self) -> Option<&str> {
207        match self {
208            Self::MissingPremise(_) => {
209                Some("Ensure all premise steps are added to the proof before referencing them.")
210            }
211            Self::WrongPremiseCount { .. } => {
212                Some("Consult the rule documentation for the correct number of premises.")
213            }
214            Self::WrongArgumentCount { .. } => {
215                Some("Review the rule definition to determine which arguments are required.")
216            }
217            Self::RuleNotApplicable(_) => {
218                Some("Check that the premise types match what the rule expects.")
219            }
220            Self::InvalidConclusion(_) => {
221                Some("Verify that the rule is being applied correctly to the given premises.")
222            }
223            Self::CyclicDependency => {
224                Some("Reorganize proof steps to eliminate circular dependencies.")
225            }
226            Self::EmptyProof => Some("Add at least one axiom or assumption to the proof."),
227            Self::MalformedTerm(_) => Some("Check the term syntax against the expected format."),
228            Self::UnknownRule(_) => {
229                Some("Use a standard proof rule or define a custom rule handler.")
230            }
231            Self::Custom(_) => None,
232        }
233    }
234
235    /// Get the severity level of this error
236    #[must_use]
237    pub fn severity(&self) -> ErrorSeverity {
238        match self {
239            Self::CyclicDependency | Self::EmptyProof => ErrorSeverity::Critical,
240            Self::MissingPremise(_) | Self::InvalidConclusion(_) | Self::UnknownRule(_) => {
241                ErrorSeverity::Error
242            }
243            Self::WrongPremiseCount { .. }
244            | Self::WrongArgumentCount { .. }
245            | Self::RuleNotApplicable(_)
246            | Self::MalformedTerm(_) => ErrorSeverity::Warning,
247            Self::Custom(_) => ErrorSeverity::Error,
248        }
249    }
250}
251
252/// Error severity level
253#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
254pub enum ErrorSeverity {
255    /// Warning - proof may be acceptable
256    Warning,
257    /// Error - proof is invalid
258    Error,
259    /// Critical - proof structure is fundamentally broken
260    Critical,
261}
262
263impl fmt::Display for ErrorSeverity {
264    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
265        match self {
266            Self::Warning => write!(f, "WARNING"),
267            Self::Error => write!(f, "ERROR"),
268            Self::Critical => write!(f, "CRITICAL"),
269        }
270    }
271}
272
273/// Configuration for proof checking
274#[derive(Debug, Clone, Default)]
275pub struct CheckerConfig {
276    /// Whether to continue checking after the first error
277    pub continue_on_error: bool,
278    /// Whether to verify conclusion content (not just structure)
279    pub verify_conclusions: bool,
280    /// Whether to allow cyclic dependencies (for some proof formats)
281    pub allow_cycles: bool,
282}
283
284/// Proof checker for verifying proof derivations
285#[derive(Debug, Default)]
286pub struct ProofChecker {
287    /// Configuration
288    config: CheckerConfig,
289    /// Collected errors
290    errors: Vec<(u32, CheckError)>,
291    /// Validated step IDs (for cycle detection)
292    validated: HashSet<u32>,
293    /// Currently being validated (for cycle detection)
294    in_progress: HashSet<u32>,
295}
296
297impl ProofChecker {
298    /// Create a new proof checker with default configuration
299    #[must_use]
300    pub fn new() -> Self {
301        Self::default()
302    }
303
304    /// Create a proof checker with custom configuration
305    #[must_use]
306    pub fn with_config(config: CheckerConfig) -> Self {
307        Self {
308            config,
309            errors: Vec::new(),
310            validated: HashSet::new(),
311            in_progress: HashSet::new(),
312        }
313    }
314
315    /// Reset the checker state
316    pub fn reset(&mut self) {
317        self.errors.clear();
318        self.validated.clear();
319        self.in_progress.clear();
320    }
321
322    /// Check a theory proof
323    pub fn check_theory_proof(&mut self, proof: &TheoryProof) -> CheckResult {
324        self.reset();
325
326        if proof.is_empty() {
327            return CheckResult::Invalid {
328                step: 0,
329                error: CheckError::EmptyProof,
330            };
331        }
332
333        // Check each step
334        for step in proof.steps() {
335            if let Err(error) = self.check_theory_step(proof, step.id) {
336                if self.config.continue_on_error {
337                    self.errors.push((step.id.0, error));
338                } else {
339                    return CheckResult::Invalid {
340                        step: step.id.0,
341                        error,
342                    };
343                }
344            }
345        }
346
347        if self.errors.is_empty() {
348            CheckResult::Valid
349        } else {
350            CheckResult::MultipleErrors(std::mem::take(&mut self.errors))
351        }
352    }
353
354    /// Check a single theory proof step
355    fn check_theory_step(
356        &mut self,
357        proof: &TheoryProof,
358        step_id: TheoryStepId,
359    ) -> Result<(), CheckError> {
360        // Cycle detection
361        if !self.config.allow_cycles {
362            if self.in_progress.contains(&step_id.0) {
363                return Err(CheckError::CyclicDependency);
364            }
365            if self.validated.contains(&step_id.0) {
366                return Ok(());
367            }
368            self.in_progress.insert(step_id.0);
369        }
370
371        let step = proof
372            .get_step(step_id)
373            .ok_or(CheckError::MissingPremise(step_id.0))?;
374
375        // Check premises exist
376        for premise_id in &step.premises {
377            if proof.get_step(*premise_id).is_none() {
378                return Err(CheckError::MissingPremise(premise_id.0));
379            }
380
381            // Recursively check premises
382            if !self.config.allow_cycles {
383                self.check_theory_step(proof, *premise_id)?;
384            }
385        }
386
387        // Check rule-specific requirements
388        self.check_theory_rule(&step.rule, step.premises.len(), step.args.len())?;
389
390        // Mark as validated
391        if !self.config.allow_cycles {
392            self.in_progress.remove(&step_id.0);
393            self.validated.insert(step_id.0);
394        }
395
396        Ok(())
397    }
398
399    /// Check rule-specific requirements for theory proofs
400    fn check_theory_rule(
401        &self,
402        rule: &TheoryRule,
403        premise_count: usize,
404        arg_count: usize,
405    ) -> Result<(), CheckError> {
406        match rule {
407            // Rules with no premises
408            TheoryRule::Refl => {
409                if premise_count != 0 {
410                    return Err(CheckError::WrongPremiseCount {
411                        expected: 0,
412                        got: premise_count,
413                    });
414                }
415            }
416
417            // Rules with exactly one premise
418            TheoryRule::Symm => {
419                if premise_count != 1 {
420                    return Err(CheckError::WrongPremiseCount {
421                        expected: 1,
422                        got: premise_count,
423                    });
424                }
425            }
426
427            // Rules with exactly two premises
428            TheoryRule::Trans => {
429                if premise_count != 2 {
430                    return Err(CheckError::WrongPremiseCount {
431                        expected: 2,
432                        got: premise_count,
433                    });
434                }
435            }
436
437            // Rules with at least one premise (congruence needs arg equalities)
438            TheoryRule::Cong => {
439                // Congruence can have zero premises for nullary functions
440            }
441
442            // Farkas lemma needs at least 2 premises
443            TheoryRule::LaGeneric => {
444                if premise_count < 2 {
445                    return Err(CheckError::WrongPremiseCount {
446                        expected: 2,
447                        got: premise_count,
448                    });
449                }
450            }
451
452            // Array read-write-same is an axiom
453            TheoryRule::ArrReadWrite1 => {
454                if premise_count != 0 {
455                    return Err(CheckError::WrongPremiseCount {
456                        expected: 0,
457                        got: premise_count,
458                    });
459                }
460            }
461
462            // Array read-write-different needs proof of i ≠ j
463            TheoryRule::ArrReadWrite2 => {
464                if premise_count != 1 {
465                    return Err(CheckError::WrongPremiseCount {
466                        expected: 1,
467                        got: premise_count,
468                    });
469                }
470            }
471
472            // LaMult needs coefficient argument
473            TheoryRule::LaMult => {
474                if arg_count < 1 {
475                    return Err(CheckError::WrongArgumentCount {
476                        expected: 1,
477                        got: arg_count,
478                    });
479                }
480            }
481
482            // Other rules - flexible checking
483            _ => {}
484        }
485
486        Ok(())
487    }
488
489    /// Check an Alethe proof
490    pub fn check_alethe_proof(&mut self, proof: &AletheProof) -> CheckResult {
491        self.reset();
492
493        if proof.is_empty() {
494            return CheckResult::Invalid {
495                step: 0,
496                error: CheckError::EmptyProof,
497            };
498        }
499
500        let steps = proof.steps();
501        let mut step_indices: HashSet<u32> = HashSet::new();
502
503        // First pass: collect all step indices
504        for step in steps {
505            match step {
506                AletheStep::Assume { index, .. } => {
507                    step_indices.insert(*index);
508                }
509                AletheStep::Step { index, .. } => {
510                    step_indices.insert(*index);
511                }
512                AletheStep::Anchor { step: index, .. } => {
513                    step_indices.insert(*index);
514                }
515                AletheStep::DefineFun { .. } => {}
516            }
517        }
518
519        // Second pass: check each step
520        for (idx, step) in steps.iter().enumerate() {
521            if let Err(error) = self.check_alethe_step(step, &step_indices) {
522                if self.config.continue_on_error {
523                    self.errors.push((idx as u32, error));
524                } else {
525                    return CheckResult::Invalid {
526                        step: idx as u32,
527                        error,
528                    };
529                }
530            }
531        }
532
533        if self.errors.is_empty() {
534            CheckResult::Valid
535        } else {
536            CheckResult::MultipleErrors(std::mem::take(&mut self.errors))
537        }
538    }
539
540    /// Check a single Alethe proof step
541    fn check_alethe_step(
542        &self,
543        step: &AletheStep,
544        step_indices: &HashSet<u32>,
545    ) -> Result<(), CheckError> {
546        match step {
547            AletheStep::Assume { .. } => {
548                // Assumptions don't need checking
549                Ok(())
550            }
551
552            AletheStep::Step { rule, premises, .. } => {
553                // Check all premises exist
554                for premise in premises {
555                    if !step_indices.contains(premise) {
556                        return Err(CheckError::MissingPremise(*premise));
557                    }
558                }
559
560                // Check rule-specific requirements
561                self.check_alethe_rule(rule, premises.len())
562            }
563
564            AletheStep::Anchor { .. } => {
565                // Anchors don't need checking
566                Ok(())
567            }
568
569            AletheStep::DefineFun { .. } => {
570                // Definitions don't need checking
571                Ok(())
572            }
573        }
574    }
575
576    /// Check rule-specific requirements for Alethe proofs
577    fn check_alethe_rule(&self, rule: &AletheRule, premise_count: usize) -> Result<(), CheckError> {
578        match rule {
579            // Resolution needs at least 2 premises
580            AletheRule::Resolution => {
581                if premise_count < 2 {
582                    return Err(CheckError::WrongPremiseCount {
583                        expected: 2,
584                        got: premise_count,
585                    });
586                }
587            }
588
589            // Reflexivity is an axiom
590            AletheRule::Refl => {
591                if premise_count != 0 {
592                    return Err(CheckError::WrongPremiseCount {
593                        expected: 0,
594                        got: premise_count,
595                    });
596                }
597            }
598
599            // Transitivity needs at least 2 premises
600            AletheRule::Trans => {
601                if premise_count < 2 {
602                    return Err(CheckError::WrongPremiseCount {
603                        expected: 2,
604                        got: premise_count,
605                    });
606                }
607            }
608
609            // Other rules - flexible checking
610            _ => {}
611        }
612
613        Ok(())
614    }
615}
616
617/// Trait for types that can be checked for validity
618pub trait Checkable {
619    /// Check if the proof is valid
620    fn check(&self) -> CheckResult;
621
622    /// Check using a custom checker configuration
623    fn check_with_config(&self, config: CheckerConfig) -> CheckResult;
624}
625
626impl Checkable for TheoryProof {
627    fn check(&self) -> CheckResult {
628        ProofChecker::new().check_theory_proof(self)
629    }
630
631    fn check_with_config(&self, config: CheckerConfig) -> CheckResult {
632        ProofChecker::with_config(config).check_theory_proof(self)
633    }
634}
635
636impl Checkable for AletheProof {
637    fn check(&self) -> CheckResult {
638        ProofChecker::new().check_alethe_proof(self)
639    }
640
641    fn check_with_config(&self, config: CheckerConfig) -> CheckResult {
642        ProofChecker::with_config(config).check_alethe_proof(self)
643    }
644}
645
646#[cfg(test)]
647mod tests {
648    use super::*;
649    #[allow(unused_imports)]
650    use crate::theory::ProofTerm;
651
652    #[test]
653    fn test_check_result_is_valid() {
654        assert!(CheckResult::Valid.is_valid());
655        assert!(
656            !CheckResult::Invalid {
657                step: 0,
658                error: CheckError::EmptyProof
659            }
660            .is_valid()
661        );
662    }
663
664    #[test]
665    fn test_check_error_display() {
666        let err = CheckError::MissingPremise(5);
667        let msg = format!("{}", err);
668        assert!(msg.contains("5"));
669        assert!(msg.contains("does not exist"));
670
671        let err = CheckError::WrongPremiseCount {
672            expected: 2,
673            got: 1,
674        };
675        let msg = format!("{}", err);
676        assert!(msg.contains("requires 2"));
677        assert!(msg.contains("1 was provided"));
678    }
679
680    #[test]
681    fn test_theory_proof_empty() {
682        let proof = TheoryProof::new();
683        let result = proof.check();
684        assert!(!result.is_valid());
685        assert!(matches!(result.error(), Some(CheckError::EmptyProof)));
686    }
687
688    #[test]
689    fn test_theory_proof_valid_refl() {
690        let mut proof = TheoryProof::new();
691        proof.refl("x");
692
693        let result = proof.check();
694        assert!(result.is_valid());
695    }
696
697    #[test]
698    fn test_theory_proof_valid_transitivity() {
699        let mut proof = TheoryProof::new();
700        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
701        let s2 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= b c)");
702        proof.trans(s1, s2, "a", "c");
703
704        let result = proof.check();
705        assert!(result.is_valid());
706    }
707
708    #[test]
709    fn test_theory_proof_invalid_trans_premises() {
710        let mut proof = TheoryProof::new();
711        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
712        // Trans with only 1 premise should fail
713        proof.add_step(TheoryRule::Trans, vec![s1], "(= a c)");
714
715        let result = proof.check();
716        assert!(!result.is_valid());
717    }
718
719    #[test]
720    fn test_theory_proof_missing_premise() {
721        let mut proof = TheoryProof::new();
722        // Reference a non-existent premise
723        proof.add_step(
724            TheoryRule::Trans,
725            vec![TheoryStepId(99), TheoryStepId(100)],
726            "(= a c)",
727        );
728
729        let result = proof.check();
730        assert!(!result.is_valid());
731        assert!(matches!(
732            result.error(),
733            Some(CheckError::MissingPremise(_))
734        ));
735    }
736
737    #[test]
738    fn test_alethe_proof_empty() {
739        let proof = AletheProof::new();
740        let result = proof.check();
741        assert!(!result.is_valid());
742    }
743
744    #[test]
745    fn test_alethe_proof_valid() {
746        let mut proof = AletheProof::new();
747        proof.assume("p");
748        proof.step_simple(vec![], AletheRule::Refl);
749
750        let result = proof.check();
751        assert!(result.is_valid());
752    }
753
754    #[test]
755    fn test_checker_continue_on_error() {
756        let mut proof = TheoryProof::new();
757        // Multiple invalid steps
758        proof.add_step(TheoryRule::Trans, vec![TheoryStepId(99)], "(= a b)");
759        proof.add_step(TheoryRule::Trans, vec![TheoryStepId(100)], "(= c d)");
760
761        let config = CheckerConfig {
762            continue_on_error: true,
763            ..Default::default()
764        };
765
766        let result = proof.check_with_config(config);
767        assert!(matches!(result, CheckResult::MultipleErrors(_)));
768    }
769
770    #[test]
771    fn test_checker_refl_with_premises_fails() {
772        let mut proof = TheoryProof::new();
773        let s1 = proof.add_axiom(TheoryRule::Custom("assert".into()), "(= a b)");
774        // Refl should have no premises
775        proof.add_step(TheoryRule::Refl, vec![s1], "(= x x)");
776
777        let result = proof.check();
778        assert!(!result.is_valid());
779    }
780
781    #[test]
782    fn test_checker_farkas_needs_premises() {
783        let mut proof = TheoryProof::new();
784        // Farkas with only 1 premise should fail
785        let s1 = proof.add_axiom(TheoryRule::Custom("bound".into()), "(>= x 0)");
786        proof.add_step(TheoryRule::LaGeneric, vec![s1], "false");
787
788        let result = proof.check();
789        assert!(!result.is_valid());
790    }
791
792    #[test]
793    fn test_checker_arr_read_write_1_axiom() {
794        let mut proof = TheoryProof::new();
795        // ArrReadWrite1 is an axiom (no premises)
796        proof.add_axiom(TheoryRule::ArrReadWrite1, "(= (select (store a i v) i) v)");
797
798        let result = proof.check();
799        assert!(result.is_valid());
800    }
801
802    #[test]
803    fn test_checker_arr_read_write_2_needs_premise() {
804        let mut proof = TheoryProof::new();
805        // ArrReadWrite2 needs proof of i ≠ j
806        proof.add_axiom(
807            TheoryRule::ArrReadWrite2,
808            "(= (select (store a i v) j) (select a j))",
809        );
810
811        let result = proof.check();
812        assert!(!result.is_valid());
813    }
814}