batuta/oracle/rag/falsification.rs
1//! Popperian Falsification Test Suite for Scalar Int8 Rescoring Retriever
2//!
3//! Implements the 100-point falsification checklist from retriever-spec.md
4//! Following Toyota Way principles with Jidoka stop-on-error validation.
5//!
6//! # Sections
7//!
8//! - QA: Quantization Accuracy (15 items)
9//! - RA: Retrieval Accuracy (15 items)
10//! - PF: Performance (15 items)
11//! - NC: Numerical Correctness (15 items)
12//! - SR: Safety & Robustness (10 items)
13//! - AI: API & Integration (10 items)
14//! - JG: Jidoka Gates (10 items)
15//! - DR: Documentation & Reproducibility (10 items)
16
17#[allow(unused_imports)]
18use super::quantization::*;
19
20// ============================================================================
21// Falsification Result Types
22// ============================================================================
23
24/// Result of a falsification test
25#[derive(Debug, Clone)]
26pub struct FalsificationResult {
27 /// Test ID (e.g., "QA-01")
28 pub id: String,
29 /// Test name
30 pub name: String,
31 /// Whether the claim was falsified (true = FAIL, false = PASS)
32 pub falsified: bool,
33 /// Evidence collected
34 pub evidence: Vec<String>,
35 /// TPS Principle applied
36 pub tps_principle: TpsPrinciple,
37 /// Severity level
38 pub severity: Severity,
39}
40
41/// Toyota Production System principle
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum TpsPrinciple {
44 /// Jidoka - Stop on error
45 Jidoka,
46 /// Poka-Yoke - Mistake-proofing
47 PokaYoke,
48 /// Heijunka - Load leveling
49 Heijunka,
50 /// Kaizen - Continuous improvement
51 Kaizen,
52 /// Genchi Genbutsu - Go and see
53 GenchiGenbutsu,
54 /// Muda - Waste elimination
55 Muda,
56 /// Muri - Overload prevention
57 Muri,
58 /// Mura - Variance reduction
59 Mura,
60}
61
62/// Severity level for falsification failures
63#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
64pub enum Severity {
65 /// Invalidates core claims
66 Critical,
67 /// Significantly weakens validity
68 Major,
69 /// Edge case/boundary issue
70 Minor,
71 /// Clarification needed
72 Informational,
73}
74
75impl FalsificationResult {
76 pub fn pass(id: &str, name: &str, tps: TpsPrinciple, severity: Severity) -> Self {
77 Self {
78 id: id.to_string(),
79 name: name.to_string(),
80 falsified: false,
81 evidence: vec![],
82 tps_principle: tps,
83 severity,
84 }
85 }
86
87 pub fn fail(
88 id: &str,
89 name: &str,
90 tps: TpsPrinciple,
91 severity: Severity,
92 evidence: Vec<String>,
93 ) -> Self {
94 Self {
95 id: id.to_string(),
96 name: name.to_string(),
97 falsified: true,
98 evidence,
99 tps_principle: tps,
100 severity,
101 }
102 }
103
104 pub fn with_evidence(mut self, evidence: &str) -> Self {
105 self.evidence.push(evidence.to_string());
106 self
107 }
108}
109
110/// Falsification suite summary
111#[derive(Debug, Clone)]
112pub struct FalsificationSummary {
113 pub total: usize,
114 pub passed: usize,
115 pub failed: usize,
116 pub score: f64,
117 pub grade: Grade,
118 pub results: Vec<FalsificationResult>,
119}
120
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum Grade {
123 /// 95-100% - Toyota Standard
124 ToyotaStandard,
125 /// 85-94% - Kaizen Required
126 KaizenRequired,
127 /// 70-84% - Andon Warning
128 AndonWarning,
129 /// <70% - Stop the Line
130 StopTheLine,
131}
132
133impl FalsificationSummary {
134 pub fn new(results: Vec<FalsificationResult>) -> Self {
135 let total = results.len();
136 let passed = results.iter().filter(|r| !r.falsified).count();
137 let failed = total - passed;
138 let score = if total > 0 { (passed as f64 / total as f64) * 100.0 } else { 0.0 };
139 let grade = match score as u32 {
140 95..=100 => Grade::ToyotaStandard,
141 85..=94 => Grade::KaizenRequired,
142 70..=84 => Grade::AndonWarning,
143 _ => Grade::StopTheLine,
144 };
145
146 Self { total, passed, failed, score, grade, results }
147 }
148}
149
150// ============================================================================
151// Shared Test Helpers
152// ============================================================================
153// ============================================================================
154// Run Full Falsification Suite
155// ============================================================================
156
157/// Run all falsification tests and return summary
158pub fn run_falsification_suite() -> FalsificationSummary {
159 // This would collect results from all test modules
160 // For now, return a placeholder
161 let results = vec![
162 FalsificationResult::pass(
163 "QA-01",
164 "Quantization Error Bound",
165 TpsPrinciple::PokaYoke,
166 Severity::Critical,
167 ),
168 FalsificationResult::pass(
169 "QA-02",
170 "Symmetric Quantization",
171 TpsPrinciple::GenchiGenbutsu,
172 Severity::Major,
173 ),
174 // ... more results would be collected from actual test runs
175 ];
176
177 FalsificationSummary::new(results)
178}
179
180#[cfg(test)]
181#[path = "falsification_tests.rs"]
182mod tests;