batuta/oracle/rag/
falsification.rs

1//! Popperian Falsification Test Suite for Scalar Int8 Rescoring Retriever
2//!
3//! Implements the 100-point falsification checklist from retriever-spec.md
4//! Following Toyota Way principles with Jidoka stop-on-error validation.
5//!
6//! # Sections
7//!
8//! - QA: Quantization Accuracy (15 items)
9//! - RA: Retrieval Accuracy (15 items)
10//! - PF: Performance (15 items)
11//! - NC: Numerical Correctness (15 items)
12//! - SR: Safety & Robustness (10 items)
13//! - AI: API & Integration (10 items)
14//! - JG: Jidoka Gates (10 items)
15//! - DR: Documentation & Reproducibility (10 items)
16
17#[allow(unused_imports)]
18use super::quantization::*;
19
20// ============================================================================
21// Falsification Result Types
22// ============================================================================
23
24/// Result of a falsification test
25#[derive(Debug, Clone)]
26pub struct FalsificationResult {
27    /// Test ID (e.g., "QA-01")
28    pub id: String,
29    /// Test name
30    pub name: String,
31    /// Whether the claim was falsified (true = FAIL, false = PASS)
32    pub falsified: bool,
33    /// Evidence collected
34    pub evidence: Vec<String>,
35    /// TPS Principle applied
36    pub tps_principle: TpsPrinciple,
37    /// Severity level
38    pub severity: Severity,
39}
40
41/// Toyota Production System principle
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum TpsPrinciple {
44    /// Jidoka - Stop on error
45    Jidoka,
46    /// Poka-Yoke - Mistake-proofing
47    PokaYoke,
48    /// Heijunka - Load leveling
49    Heijunka,
50    /// Kaizen - Continuous improvement
51    Kaizen,
52    /// Genchi Genbutsu - Go and see
53    GenchiGenbutsu,
54    /// Muda - Waste elimination
55    Muda,
56    /// Muri - Overload prevention
57    Muri,
58    /// Mura - Variance reduction
59    Mura,
60}
61
62/// Severity level for falsification failures
63#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
64pub enum Severity {
65    /// Invalidates core claims
66    Critical,
67    /// Significantly weakens validity
68    Major,
69    /// Edge case/boundary issue
70    Minor,
71    /// Clarification needed
72    Informational,
73}
74
75impl FalsificationResult {
76    pub fn pass(id: &str, name: &str, tps: TpsPrinciple, severity: Severity) -> Self {
77        Self {
78            id: id.to_string(),
79            name: name.to_string(),
80            falsified: false,
81            evidence: vec![],
82            tps_principle: tps,
83            severity,
84        }
85    }
86
87    pub fn fail(
88        id: &str,
89        name: &str,
90        tps: TpsPrinciple,
91        severity: Severity,
92        evidence: Vec<String>,
93    ) -> Self {
94        Self {
95            id: id.to_string(),
96            name: name.to_string(),
97            falsified: true,
98            evidence,
99            tps_principle: tps,
100            severity,
101        }
102    }
103
104    pub fn with_evidence(mut self, evidence: &str) -> Self {
105        self.evidence.push(evidence.to_string());
106        self
107    }
108}
109
110/// Falsification suite summary
111#[derive(Debug, Clone)]
112pub struct FalsificationSummary {
113    pub total: usize,
114    pub passed: usize,
115    pub failed: usize,
116    pub score: f64,
117    pub grade: Grade,
118    pub results: Vec<FalsificationResult>,
119}
120
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum Grade {
123    /// 95-100% - Toyota Standard
124    ToyotaStandard,
125    /// 85-94% - Kaizen Required
126    KaizenRequired,
127    /// 70-84% - Andon Warning
128    AndonWarning,
129    /// <70% - Stop the Line
130    StopTheLine,
131}
132
133impl FalsificationSummary {
134    pub fn new(results: Vec<FalsificationResult>) -> Self {
135        let total = results.len();
136        let passed = results.iter().filter(|r| !r.falsified).count();
137        let failed = total - passed;
138        let score = if total > 0 { (passed as f64 / total as f64) * 100.0 } else { 0.0 };
139        let grade = match score as u32 {
140            95..=100 => Grade::ToyotaStandard,
141            85..=94 => Grade::KaizenRequired,
142            70..=84 => Grade::AndonWarning,
143            _ => Grade::StopTheLine,
144        };
145
146        Self { total, passed, failed, score, grade, results }
147    }
148}
149
150// ============================================================================
151// Shared Test Helpers
152// ============================================================================
153// ============================================================================
154// Run Full Falsification Suite
155// ============================================================================
156
157/// Run all falsification tests and return summary
158pub fn run_falsification_suite() -> FalsificationSummary {
159    // This would collect results from all test modules
160    // For now, return a placeholder
161    let results = vec![
162        FalsificationResult::pass(
163            "QA-01",
164            "Quantization Error Bound",
165            TpsPrinciple::PokaYoke,
166            Severity::Critical,
167        ),
168        FalsificationResult::pass(
169            "QA-02",
170            "Symmetric Quantization",
171            TpsPrinciple::GenchiGenbutsu,
172            Severity::Major,
173        ),
174        // ... more results would be collected from actual test runs
175    ];
176
177    FalsificationSummary::new(results)
178}
179
180#[cfg(test)]
181#[path = "falsification_tests.rs"]
182mod tests;
batuta/oracle/rag/falsification.rs

batuta/oracle/rag/
falsification.rs