batuta/falsification/
mod.rs

1//! Popperian Falsification Checklist Implementation
2//!
3//! Implements the 108-item Sovereign AI Assurance Protocol checklist.
4//! Each checklist item is a falsifiable claim with explicit rejection criteria.
5//!
6//! # Toyota Way Integration
7//!
8//! - **Jidoka**: Automated gates stop pipeline on failure
9//! - **Genchi Genbutsu**: Evidence-based verification
10//! - **Kaizen**: Continuous improvement via metrics
11//!
12//! # Severity Levels
13//!
14//! - **Critical**: Project FAIL - blocks release
15//! - **Major**: Requires remediation before release
16//! - **Minor**: Documented limitation
17//! - **Info**: Clarification needed
18//!
19//! # Implemented Sections (108 Items Total)
20//!
21//! - Section 1: Sovereign Data Governance (SDG-01 to SDG-15) - 15 items
22//! - Section 2: ML Technical Debt Prevention (MTD-01 to MTD-10) - 10 items
23//! - Section 3: Hypothesis-Driven Development (HDD-01 to EDD-03) - 13 items
24//! - Section 4: Numerical Reproducibility (NR-01 to NR-15) - 15 items
25//! - Section 5: Performance & Waste Elimination (PW-01 to PW-15) - 15 items
26//! - Section 6: Safety & Formal Verification (SF-01 to SF-10) - 9 items
27//! - Section 7: Jidoka Automated Gates (JA-01 to JA-12) - 9 items
28//! - Section 8: Model Cards & Auditability (MA-01 to MA-10) - 10 items
29//! - Section 9: Cross-Platform & API (CP-01 to CP-05) - 5 items
30//! - Section 10: Architectural Invariants (AI-01 to AI-05) - 5 items CRITICAL
31
32mod auditors;
33mod cross_platform;
34pub(crate) mod helpers;
35mod hypothesis_driven;
36mod invariants;
37mod jidoka;
38mod model_cards;
39mod numerical_reproducibility;
40mod performance_waste;
41mod safety;
42mod sovereign_data;
43mod technical_debt;
44mod types;
45
46pub use auditors::*;
47pub use cross_platform::evaluate_all as evaluate_cross_platform;
48pub use hypothesis_driven::evaluate_all as evaluate_hypothesis_driven;
49pub use invariants::*;
50pub use jidoka::evaluate_all as evaluate_jidoka;
51pub use model_cards::evaluate_all as evaluate_model_cards;
52pub use numerical_reproducibility::evaluate_all as evaluate_numerical_reproducibility;
53pub use performance_waste::evaluate_all as evaluate_performance_waste;
54pub use safety::evaluate_all as evaluate_safety;
55pub use sovereign_data::evaluate_all as evaluate_sovereign_data;
56pub use technical_debt::evaluate_all as evaluate_technical_debt;
57pub use types::*;
58
59use std::path::Path;
60
61/// Run the complete falsification checklist against a project.
62pub fn evaluate_project(project_path: &Path) -> ChecklistResult {
63    let mut result = ChecklistResult::new(project_path);
64
65    // Section 1: Sovereign Data Governance (15 items)
66    let sovereign_results = sovereign_data::evaluate_all(project_path);
67    result.add_section("Sovereign Data Governance", sovereign_results);
68
69    // Section 2: ML Technical Debt Prevention (10 items)
70    let debt_results = technical_debt::evaluate_all(project_path);
71    result.add_section("ML Technical Debt Prevention", debt_results);
72
73    // Section 3: Hypothesis-Driven Development (13 items)
74    let hdd_results = hypothesis_driven::evaluate_all(project_path);
75    result.add_section("Hypothesis-Driven Development", hdd_results);
76
77    // Section 4: Numerical Reproducibility (15 items)
78    let nr_results = numerical_reproducibility::evaluate_all(project_path);
79    result.add_section("Numerical Reproducibility", nr_results);
80
81    // Section 5: Performance & Waste Elimination (15 items)
82    let pw_results = performance_waste::evaluate_all(project_path);
83    result.add_section("Performance & Waste Elimination", pw_results);
84
85    // Section 6: Safety & Formal Verification (9 items)
86    let safety_results = safety::evaluate_all(project_path);
87    result.add_section("Safety & Formal Verification", safety_results);
88
89    // Section 7: Jidoka Automated Gates (9 items)
90    let jidoka_results = jidoka::evaluate_all(project_path);
91    result.add_section("Jidoka Automated Gates", jidoka_results);
92
93    // Section 8: Model Cards & Auditability (10 items)
94    let mc_results = model_cards::evaluate_all(project_path);
95    result.add_section("Model Cards & Auditability", mc_results);
96
97    // Section 9: Cross-Platform & API (5 items)
98    let cp_results = cross_platform::evaluate_all(project_path);
99    result.add_section("Cross-Platform & API", cp_results);
100
101    // Section 10: Architectural Invariants (CRITICAL - 5 items)
102    let invariant_results = invariants::evaluate_all(project_path);
103    result.add_section("Architectural Invariants", invariant_results);
104
105    // Calculate final score
106    result.finalize();
107
108    result
109}
110
111/// Run only the critical architectural invariants.
112pub fn evaluate_critical_only(project_path: &Path) -> ChecklistResult {
113    let mut result = ChecklistResult::new(project_path);
114
115    let invariant_results = invariants::evaluate_all(project_path);
116    result.add_section("Architectural Invariants", invariant_results);
117
118    result.finalize();
119    result
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use std::path::PathBuf;
126
127    // =========================================================================
128    // FALS-MOD-001: Evaluate Project
129    // =========================================================================
130
131    #[test]
132    fn test_fals_001_evaluate_project_returns_result() {
133        let path = PathBuf::from(".");
134        let result = evaluate_project(&path);
135        assert!(!result.sections.is_empty());
136    }
137
138    #[test]
139    fn test_fals_002_critical_only_returns_invariants() {
140        let path = PathBuf::from(".");
141        let result = evaluate_critical_only(&path);
142        assert!(result.sections.contains_key("Architectural Invariants"));
143    }
144
145    // =========================================================================
146    // FALS-INT-001: Integration Tests - Full Checklist on batuta
147    // =========================================================================
148
149    #[test]
150    fn test_fals_int_001_batuta_passes_critical_invariants() {
151        // batuta itself should pass the critical invariants
152        let path = PathBuf::from(".");
153        let result = evaluate_critical_only(&path);
154
155        // Verify no critical failures using an imperative loop
156        // (avoids creating an LLVM closure region that would be uncovered
157        // when no items pass the filter)
158        let mut critical_count = 0;
159        for items in result.sections.values() {
160            for item in items {
161                if item.is_critical_failure() {
162                    critical_count += 1;
163                }
164            }
165        }
166
167        assert!(!result.has_critical_failure, "batuta has {critical_count} critical failure(s)",);
168    }
169
170    #[test]
171    fn test_fals_int_002_batuta_achieves_kaizen_grade() {
172        // batuta should achieve at least Kaizen Required grade
173        let path = PathBuf::from(".");
174        let result = evaluate_critical_only(&path);
175
176        assert!(
177            result.grade.passes(),
178            "Expected Kaizen Required or better, got {} ({:.1}%)",
179            result.grade,
180            result.score
181        );
182    }
183
184    #[test]
185    fn test_fals_int_003_all_items_have_tps_principle() {
186        let path = PathBuf::from(".");
187        let result = evaluate_critical_only(&path);
188
189        for (section, items) in &result.sections {
190            for item in items {
191                assert!(
192                    !item.tps_principle.is_empty(),
193                    "Item {}.{} missing TPS principle",
194                    section,
195                    item.id
196                );
197            }
198        }
199    }
200
201    #[test]
202    fn test_fals_int_004_result_serializes_to_json() {
203        let path = PathBuf::from(".");
204        let result = evaluate_critical_only(&path);
205
206        let json = serde_json::to_string(&result);
207        // Eagerly evaluate error for coverage (avoids lazy assert! format closure)
208        let serialize_err = json.as_ref().err().map(|e| format!("{e:?}"));
209        assert!(json.is_ok(), "Failed to serialize result: {serialize_err:?}");
210
211        // Verify deserialize roundtrip
212        let json_str = json.expect("unexpected failure");
213        let parsed: Result<ChecklistResult, _> = serde_json::from_str(&json_str);
214        let parse_err = parsed.as_ref().err().map(|e| format!("{e:?}"));
215        assert!(parsed.is_ok(), "Failed to deserialize result: {parse_err:?}");
216    }
217
218    #[test]
219    fn test_fals_int_005_result_summary_format() {
220        let path = PathBuf::from(".");
221        let result = evaluate_critical_only(&path);
222
223        let summary = result.summary();
224        // Summary should contain grade, score, and pass count
225        assert!(summary.contains('%'), "Summary missing percentage: {}", summary);
226        assert!(
227            summary.contains("passed") || summary.contains("RELEASE"),
228            "Summary missing status: {}",
229            summary
230        );
231    }
232
233    // =========================================================================
234    // FALS-INT-010: Edge Cases
235    // =========================================================================
236
237    #[test]
238    fn test_fals_int_010_nonexistent_project() {
239        let path = PathBuf::from("/nonexistent/project/path");
240        let result = evaluate_critical_only(&path);
241
242        // Should not panic, should return result with failures
243        assert!(result.total_items > 0);
244    }
245
246    #[test]
247    fn test_fals_int_011_empty_directory() {
248        // Create a temp dir for testing
249        let temp_dir = std::env::temp_dir().join("batuta_test_empty");
250        let _ = std::fs::create_dir_all(&temp_dir);
251
252        let result = evaluate_critical_only(&temp_dir);
253
254        // Should not panic, should return result
255        assert!(result.total_items > 0);
256
257        let _ = std::fs::remove_dir(&temp_dir);
258    }
259
260    // =========================================================================
261    // Additional Coverage Tests
262    // =========================================================================
263
264    #[test]
265    fn test_fals_mod_evaluate_project_all_sections() {
266        let path = PathBuf::from(".");
267        let result = evaluate_project(&path);
268
269        // Should have all 10 sections
270        assert!(result.sections.len() >= 10);
271        assert!(result.sections.contains_key("Sovereign Data Governance"));
272        assert!(result.sections.contains_key("ML Technical Debt Prevention"));
273        assert!(result.sections.contains_key("Hypothesis-Driven Development"));
274        assert!(result.sections.contains_key("Numerical Reproducibility"));
275        assert!(result.sections.contains_key("Performance & Waste Elimination"));
276        assert!(result.sections.contains_key("Safety & Formal Verification"));
277        assert!(result.sections.contains_key("Jidoka Automated Gates"));
278        assert!(result.sections.contains_key("Model Cards & Auditability"));
279        assert!(result.sections.contains_key("Cross-Platform & API"));
280        assert!(result.sections.contains_key("Architectural Invariants"));
281    }
282
283    #[test]
284    fn test_fals_mod_result_finalize_counts() {
285        let path = PathBuf::from(".");
286        let result = evaluate_project(&path);
287
288        // Total items should be sum of all section items
289        let expected_total: usize = result.sections.values().map(|v| v.len()).sum();
290        assert_eq!(result.total_items, expected_total);
291
292        // Passed + failed + other <= total
293        assert!(result.passed_items + result.failed_items <= result.total_items);
294    }
295
296    #[test]
297    fn test_fals_mod_result_score_range() {
298        let path = PathBuf::from(".");
299        let result = evaluate_project(&path);
300
301        // Score should be between 0 and 100
302        assert!(result.score >= 0.0);
303        assert!(result.score <= 100.0);
304    }
305
306    #[test]
307    fn test_fals_mod_result_passes_method() {
308        let path = PathBuf::from(".");
309        let result = evaluate_critical_only(&path);
310
311        // passes() should be consistent with grade.passes()
312        assert_eq!(result.passes(), result.grade.passes() && !result.has_critical_failure);
313    }
314
315    #[test]
316    fn test_fals_mod_evaluate_project_nonexistent() {
317        let path = PathBuf::from("/nonexistent/project");
318        let result = evaluate_project(&path);
319        // Should not panic, should have all 10 sections
320        assert!(result.sections.len() >= 10);
321        assert!(result.total_items > 0);
322    }
323
324    #[test]
325    fn test_fals_mod_evaluate_project_temp_dir() {
326        let temp_dir = std::env::temp_dir().join("batuta_fals_mod_temp");
327        let _ = std::fs::create_dir_all(&temp_dir);
328        let result = evaluate_project(&temp_dir);
329        assert!(result.sections.len() >= 10);
330        // Score may vary but should be in valid range
331        assert!(result.score >= 0.0 && result.score <= 100.0);
332        let _ = std::fs::remove_dir(&temp_dir);
333    }
334
335    #[test]
336    fn test_fals_mod_critical_only_nonexistent() {
337        let path = PathBuf::from("/nonexistent/path/xyz");
338        let result = evaluate_critical_only(&path);
339        assert!(result.sections.contains_key("Architectural Invariants"));
340        // Score may be low but should not panic
341        assert!(result.score >= 0.0);
342    }
343
344    #[test]
345    fn test_fals_mod_critical_failure_format_chain() {
346        // Exercise the .map closure in the critical failure iterator chain
347        // by constructing a result that has critical failures, so the
348        // filter passes items through to the map closure.
349        let mut result = ChecklistResult::new(std::path::Path::new("/test"));
350        result.add_section(
351            "Test",
352            vec![
353                CheckItem::new("CF-01", "Critical Test", "Critical claim")
354                    .with_severity(Severity::Critical)
355                    .with_tps("Jidoka")
356                    .fail("Critical failure reason"),
357                CheckItem::new("OK-01", "Pass Test", "Pass claim").with_tps("Kaizen").pass(),
358            ],
359        );
360        result.finalize();
361
362        // Exercise the same iterator chain used in test_fals_int_001
363        let critical_failures: Vec<_> = result
364            .sections
365            .values()
366            .flat_map(|items| items.iter())
367            .filter(|i| i.is_critical_failure())
368            .map(|i| format!("{}: {}", i.id, i.rejection_reason.as_deref().unwrap_or("")))
369            .collect();
370
371        assert_eq!(critical_failures.len(), 1);
372        assert!(critical_failures[0].contains("CF-01"));
373        assert!(critical_failures[0].contains("Critical failure reason"));
374    }
375}
batuta/falsification/mod.rs

batuta/falsification/
mod.rs