batuta/falsification/
numerical_reproducibility.rs

1//! Section 4: Numerical Reproducibility (NR-01 to NR-15)
2//!
3//! IEEE 754 compliance, reference implementation parity, numerical stability.
4//!
5//! # TPS Principles
6//!
7//! - **Jidoka**: Automatic compliance verification
8//! - **Genchi Genbutsu**: Verify on actual hardware
9//! - **Baseline comparison**: Reference parity
10
11use super::types::{CheckItem, Evidence, EvidenceType, Severity};
12use std::path::Path;
13use std::time::Instant;
14
15/// Evaluate all Numerical Reproducibility checks.
16pub fn evaluate_all(project_path: &Path) -> Vec<CheckItem> {
17    vec![
18        check_ieee754_compliance(project_path),
19        check_cross_platform_determinism(project_path),
20        check_numpy_parity(project_path),
21        check_sklearn_parity(project_path),
22        check_linalg_accuracy(project_path),
23        check_kahan_summation(project_path),
24        check_rng_quality(project_path),
25        check_quantization_bounds(project_path),
26        check_gradient_correctness(project_path),
27        check_tokenizer_parity(project_path),
28        check_attention_correctness(project_path),
29        check_loss_accuracy(project_path),
30        check_optimizer_state(project_path),
31        check_normalization_correctness(project_path),
32        check_matmul_stability(project_path),
33    ]
34}
35
36/// NR-01: IEEE 754 Floating-Point Compliance
37pub fn check_ieee754_compliance(project_path: &Path) -> CheckItem {
38    let start = Instant::now();
39    let mut item = CheckItem::new(
40        "NR-01",
41        "IEEE 754 Floating-Point Compliance",
42        "SIMD operations produce IEEE 754-compliant results",
43    )
44    .with_severity(Severity::Major)
45    .with_tps("Jidoka — automatic compliance verification");
46
47    let has_fp_tests =
48        check_for_pattern(project_path, &["ieee754", "floating_point", "ulp", "f32", "f64"]);
49    let has_special_cases =
50        check_for_pattern(project_path, &["NaN", "Inf", "subnormal", "denormal"]);
51
52    item = item.with_evidence(Evidence {
53        evidence_type: EvidenceType::StaticAnalysis,
54        description: format!(
55            "IEEE754: fp_tests={}, special_cases={}",
56            has_fp_tests, has_special_cases
57        ),
58        data: None,
59        files: Vec::new(),
60    });
61
62    if has_fp_tests && has_special_cases {
63        item = item.pass();
64    } else if has_fp_tests {
65        item = item.partial("FP testing (verify special cases)");
66    } else {
67        item = item.partial("No explicit IEEE 754 testing");
68    }
69
70    item.finish_timed(start)
71}
72
73/// NR-02: Cross-Platform Numerical Determinism
74pub fn check_cross_platform_determinism(project_path: &Path) -> CheckItem {
75    let start = Instant::now();
76    let mut item = CheckItem::new(
77        "NR-02",
78        "Cross-Platform Numerical Determinism",
79        "Identical inputs produce identical outputs across platforms",
80    )
81    .with_severity(Severity::Major)
82    .with_tps("Genchi Genbutsu — verify on actual hardware");
83
84    let has_platform_tests = check_ci_matrix(project_path, &["ubuntu", "macos", "windows"]);
85    let has_arch_tests = check_for_pattern(project_path, &["x86_64", "aarch64", "arm64", "wasm32"]);
86
87    item = item.with_evidence(Evidence {
88        evidence_type: EvidenceType::StaticAnalysis,
89        description: format!(
90            "Determinism: platform_ci={}, arch_tests={}",
91            has_platform_tests, has_arch_tests
92        ),
93        data: None,
94        files: Vec::new(),
95    });
96
97    if has_platform_tests && has_arch_tests {
98        item = item.pass();
99    } else if has_platform_tests {
100        item = item.partial("Multi-platform CI (verify determinism)");
101    } else {
102        item = item.partial("Single platform testing");
103    }
104
105    item.finish_timed(start)
106}
107
108/// NR-03: NumPy Reference Parity
109pub fn check_numpy_parity(project_path: &Path) -> CheckItem {
110    let start = Instant::now();
111    let mut item = CheckItem::new(
112        "NR-03",
113        "NumPy Reference Parity",
114        "Operations match NumPy within documented epsilon",
115    )
116    .with_severity(Severity::Major)
117    .with_tps("Baseline comparison");
118
119    let has_numpy_tests = check_for_pattern(project_path, &["numpy", "NumPy", "np."]);
120    let has_golden_tests = check_for_pattern(project_path, &["golden", "reference", "expected"]);
121
122    item = item.with_evidence(Evidence {
123        evidence_type: EvidenceType::StaticAnalysis,
124        description: format!(
125            "NumPy parity: tests={}, golden={}",
126            has_numpy_tests, has_golden_tests
127        ),
128        data: None,
129        files: Vec::new(),
130    });
131
132    let is_numeric = check_for_pattern(project_path, &["ndarray", "tensor", "matrix"]);
133    if !is_numeric || has_numpy_tests || has_golden_tests {
134        item = item.pass();
135    } else {
136        item = item.partial("Numeric code without reference parity tests");
137    }
138
139    item.finish_timed(start)
140}
141
142/// NR-04: scikit-learn Algorithm Parity
143pub fn check_sklearn_parity(project_path: &Path) -> CheckItem {
144    let start = Instant::now();
145    let mut item = CheckItem::new(
146        "NR-04",
147        "scikit-learn Algorithm Parity",
148        "ML algorithms produce statistically equivalent results",
149    )
150    .with_severity(Severity::Major)
151    .with_tps("Scientific validation");
152
153    let has_sklearn_tests = check_for_pattern(
154        project_path,
155        &["sklearn", "scikit-learn", "RandomForest", "LinearRegression"],
156    );
157
158    item = item.with_evidence(Evidence {
159        evidence_type: EvidenceType::StaticAnalysis,
160        description: format!("sklearn parity: tests={}", has_sklearn_tests),
161        data: None,
162        files: Vec::new(),
163    });
164
165    let is_ml = check_for_pattern(project_path, &["classifier", "regressor", "clustering"]);
166    if !is_ml || has_sklearn_tests {
167        item = item.pass();
168    } else {
169        item = item.partial("ML code without sklearn parity tests");
170    }
171
172    item.finish_timed(start)
173}
174
175/// NR-05: Linear Algebra Decomposition Accuracy
176pub fn check_linalg_accuracy(project_path: &Path) -> CheckItem {
177    let start = Instant::now();
178    let mut item = CheckItem::new(
179        "NR-05",
180        "Linear Algebra Decomposition Accuracy",
181        "Decompositions meet LAPACK standards",
182    )
183    .with_severity(Severity::Major)
184    .with_tps("Reference baseline");
185
186    let has_decomp = check_for_pattern(project_path, &["cholesky", "svd", "qr", "lu", "eigen"]);
187    let has_accuracy_tests =
188        check_for_pattern(project_path, &["reconstruction", "residual", "1e-12"]);
189
190    item = item.with_evidence(Evidence {
191        evidence_type: EvidenceType::StaticAnalysis,
192        description: format!(
193            "LinAlg: decomp={}, accuracy_tests={}",
194            has_decomp, has_accuracy_tests
195        ),
196        data: None,
197        files: Vec::new(),
198    });
199
200    if !has_decomp || has_accuracy_tests {
201        item = item.pass();
202    } else {
203        item = item.partial("Decompositions without accuracy verification");
204    }
205
206    item.finish_timed(start)
207}
208
209/// NR-06: Kahan Summation Implementation
210pub fn check_kahan_summation(project_path: &Path) -> CheckItem {
211    let start = Instant::now();
212    let mut item = CheckItem::new(
213        "NR-06",
214        "Kahan Summation Implementation",
215        "Summation uses compensated algorithm",
216    )
217    .with_severity(Severity::Minor)
218    .with_tps("Quality built-in");
219
220    let has_kahan = check_for_pattern(project_path, &["kahan", "compensated", "pairwise_sum"]);
221
222    item = item.with_evidence(Evidence {
223        evidence_type: EvidenceType::StaticAnalysis,
224        description: format!("Kahan summation: impl={}", has_kahan),
225        data: None,
226        files: Vec::new(),
227    });
228
229    let does_summation = check_for_pattern(project_path, &["sum()", ".sum()", "reduce"]);
230    if !does_summation || has_kahan {
231        item = item.pass();
232    } else {
233        item = item.partial("Summation without compensated algorithm");
234    }
235
236    item.finish_timed(start)
237}
238
239/// NR-07: RNG Statistical Quality
240pub fn check_rng_quality(project_path: &Path) -> CheckItem {
241    let start = Instant::now();
242    let mut item =
243        CheckItem::new("NR-07", "RNG Statistical Quality", "RNG passes NIST statistical tests")
244            .with_severity(Severity::Major)
245            .with_tps("Formal verification");
246
247    let has_quality_rng = check_for_pattern(project_path, &["ChaCha", "Pcg", "Xorshift", "StdRng"]);
248    let has_rng_tests = check_for_pattern(project_path, &["nist", "diehard", "statistical_test"]);
249
250    item = item.with_evidence(Evidence {
251        evidence_type: EvidenceType::StaticAnalysis,
252        description: format!(
253            "RNG quality: quality_impl={}, tests={}",
254            has_quality_rng, has_rng_tests
255        ),
256        data: None,
257        files: Vec::new(),
258    });
259
260    let uses_rng = check_for_pattern(project_path, &["rand::", "Rng", "random"]);
261    if !uses_rng || has_quality_rng {
262        item = item.pass();
263    } else {
264        item = item.partial("RNG without quality verification");
265    }
266
267    item.finish_timed(start)
268}
269
270/// NR-08: Quantization Error Bounds
271pub fn check_quantization_bounds(project_path: &Path) -> CheckItem {
272    let start = Instant::now();
273    let mut item = CheckItem::new(
274        "NR-08",
275        "Quantization Error Bounds",
276        "Quantization maintains accuracy within bounds",
277    )
278    .with_severity(Severity::Major)
279    .with_tps("Documented tradeoffs");
280
281    let has_quant = check_for_pattern(project_path, &["quantize", "q4_0", "q8_0", "int8", "bnb"]);
282    let has_error_bounds =
283        check_for_pattern(project_path, &["perplexity", "error_bound", "quality_loss"]);
284
285    item = item.with_evidence(Evidence {
286        evidence_type: EvidenceType::StaticAnalysis,
287        description: format!("Quantization: impl={}, bounds={}", has_quant, has_error_bounds),
288        data: None,
289        files: Vec::new(),
290    });
291
292    if !has_quant || has_error_bounds {
293        item = item.pass();
294    } else {
295        item = item.partial("Quantization without error bounds");
296    }
297
298    item.finish_timed(start)
299}
300
301/// NR-09: Gradient Computation Correctness
302pub fn check_gradient_correctness(project_path: &Path) -> CheckItem {
303    let start = Instant::now();
304    let mut item = CheckItem::new(
305        "NR-09",
306        "Gradient Computation Correctness",
307        "Autograd produces correct gradients",
308    )
309    .with_severity(Severity::Critical)
310    .with_tps("Mathematical correctness");
311
312    let has_autograd =
313        check_for_pattern(project_path, &["autograd", "backward", "gradient", "grad"]);
314    let has_grad_check =
315        check_for_pattern(project_path, &["finite_difference", "grad_check", "numerical_gradient"]);
316
317    item = item.with_evidence(Evidence {
318        evidence_type: EvidenceType::StaticAnalysis,
319        description: format!("Gradients: autograd={}, check={}", has_autograd, has_grad_check),
320        data: None,
321        files: Vec::new(),
322    });
323
324    if !has_autograd || has_grad_check {
325        item = item.pass();
326    } else {
327        item = item.partial("Autograd without numerical verification");
328    }
329
330    item.finish_timed(start)
331}
332
333/// NR-10: Tokenization Parity
334pub fn check_tokenizer_parity(project_path: &Path) -> CheckItem {
335    let start = Instant::now();
336    let mut item =
337        CheckItem::new("NR-10", "Tokenization Parity", "Tokenizer matches HuggingFace output")
338            .with_severity(Severity::Major)
339            .with_tps("Reference baseline");
340
341    let has_tokenizer =
342        check_for_pattern(project_path, &["tokenizer", "Tokenizer", "bpe", "sentencepiece"]);
343    let has_parity_tests =
344        check_for_pattern(project_path, &["huggingface", "transformers", "token_ids"]);
345
346    item = item.with_evidence(Evidence {
347        evidence_type: EvidenceType::StaticAnalysis,
348        description: format!("Tokenizer: impl={}, parity={}", has_tokenizer, has_parity_tests),
349        data: None,
350        files: Vec::new(),
351    });
352
353    if !has_tokenizer || has_parity_tests {
354        item = item.pass();
355    } else {
356        item = item.partial("Tokenizer without parity tests");
357    }
358
359    item.finish_timed(start)
360}
361
362/// NR-11: Attention Mechanism Correctness
363pub fn check_attention_correctness(project_path: &Path) -> CheckItem {
364    let start = Instant::now();
365    let mut item = CheckItem::new(
366        "NR-11",
367        "Attention Mechanism Correctness",
368        "Attention computes softmax(QK^T/√d)V correctly",
369    )
370    .with_severity(Severity::Critical)
371    .with_tps("Mathematical specification");
372
373    let has_attention =
374        check_for_pattern(project_path, &["attention", "Attention", "sdpa", "multi_head"]);
375    let has_correctness_tests =
376        check_for_pattern(project_path, &["attention_test", "softmax_sum", "causal_mask"]);
377
378    item = item.with_evidence(Evidence {
379        evidence_type: EvidenceType::StaticAnalysis,
380        description: format!("Attention: impl={}, tests={}", has_attention, has_correctness_tests),
381        data: None,
382        files: Vec::new(),
383    });
384
385    if !has_attention || has_correctness_tests {
386        item = item.pass();
387    } else {
388        item = item.partial("Attention without correctness verification");
389    }
390
391    item.finish_timed(start)
392}
393
394/// NR-12: Loss Function Accuracy
395pub fn check_loss_accuracy(project_path: &Path) -> CheckItem {
396    let start = Instant::now();
397    let mut item = CheckItem::new(
398        "NR-12",
399        "Loss Function Accuracy",
400        "Loss functions match reference implementations",
401    )
402    .with_severity(Severity::Major)
403    .with_tps("Baseline comparison");
404
405    let has_loss = check_for_pattern(project_path, &["loss", "Loss", "cross_entropy", "mse"]);
406    let has_accuracy_tests =
407        check_for_pattern(project_path, &["loss_test", "reference_loss", "expected_loss"]);
408
409    item = item.with_evidence(Evidence {
410        evidence_type: EvidenceType::StaticAnalysis,
411        description: format!("Loss: impl={}, tests={}", has_loss, has_accuracy_tests),
412        data: None,
413        files: Vec::new(),
414    });
415
416    if !has_loss || has_accuracy_tests {
417        item = item.pass();
418    } else {
419        item = item.partial("Loss functions without accuracy tests");
420    }
421
422    item.finish_timed(start)
423}
424
425/// NR-13: Optimizer State Correctness
426pub fn check_optimizer_state(project_path: &Path) -> CheckItem {
427    let start = Instant::now();
428    let mut item = CheckItem::new(
429        "NR-13",
430        "Optimizer State Correctness",
431        "Optimizers maintain correct state updates",
432    )
433    .with_severity(Severity::Major)
434    .with_tps("Step-by-step verification");
435
436    let has_optimizer = check_for_pattern(project_path, &["optimizer", "Optimizer", "adam", "sgd"]);
437    let has_state_tests =
438        check_for_pattern(project_path, &["optimizer_test", "state_update", "momentum"]);
439
440    item = item.with_evidence(Evidence {
441        evidence_type: EvidenceType::StaticAnalysis,
442        description: format!("Optimizer: impl={}, tests={}", has_optimizer, has_state_tests),
443        data: None,
444        files: Vec::new(),
445    });
446
447    if !has_optimizer || has_state_tests {
448        item = item.pass();
449    } else {
450        item = item.partial("Optimizer without state verification");
451    }
452
453    item.finish_timed(start)
454}
455
456/// NR-14: Normalization Layer Correctness
457pub fn check_normalization_correctness(project_path: &Path) -> CheckItem {
458    let start = Instant::now();
459    let mut item = CheckItem::new(
460        "NR-14",
461        "Normalization Layer Correctness",
462        "Norm layers produce correct outputs",
463    )
464    .with_severity(Severity::Major)
465    .with_tps("Statistical verification");
466
467    let has_norm =
468        check_for_pattern(project_path, &["BatchNorm", "LayerNorm", "RMSNorm", "normalize"]);
469    let has_norm_tests =
470        check_for_pattern(project_path, &["norm_test", "mean_zero", "variance_one"]);
471
472    item = item.with_evidence(Evidence {
473        evidence_type: EvidenceType::StaticAnalysis,
474        description: format!("Normalization: impl={}, tests={}", has_norm, has_norm_tests),
475        data: None,
476        files: Vec::new(),
477    });
478
479    if !has_norm || has_norm_tests {
480        item = item.pass();
481    } else {
482        item = item.partial("Normalization without correctness tests");
483    }
484
485    item.finish_timed(start)
486}
487
488/// NR-15: Matrix Multiplication Numerical Stability
489pub fn check_matmul_stability(project_path: &Path) -> CheckItem {
490    let start = Instant::now();
491    let mut item = CheckItem::new(
492        "NR-15",
493        "Matrix Multiplication Stability",
494        "Matmul handles ill-conditioned matrices",
495    )
496    .with_severity(Severity::Major)
497    .with_tps("Graceful degradation");
498
499    let has_matmul = check_for_pattern(project_path, &["matmul", "gemm", "dot"]);
500    let has_stability_tests =
501        check_for_pattern(project_path, &["condition_number", "ill_conditioned", "stability"]);
502
503    item = item.with_evidence(Evidence {
504        evidence_type: EvidenceType::StaticAnalysis,
505        description: format!(
506            "Matmul stability: impl={}, tests={}",
507            has_matmul, has_stability_tests
508        ),
509        data: None,
510        files: Vec::new(),
511    });
512
513    if !has_matmul || has_stability_tests {
514        item = item.pass();
515    } else {
516        item = item.partial("Matmul without stability verification");
517    }
518
519    item.finish_timed(start)
520}
521
522// Helper functions
523fn check_for_pattern(project_path: &Path, patterns: &[&str]) -> bool {
524    super::helpers::source_contains_pattern(project_path, patterns)
525}
526
527fn check_ci_matrix(project_path: &Path, platforms: &[&str]) -> bool {
528    super::helpers::ci_platform_count(project_path, platforms) >= 2
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534    use std::path::PathBuf;
535
536    #[test]
537    fn test_evaluate_all_returns_15_items() {
538        let path = PathBuf::from(".");
539        let items = evaluate_all(&path);
540        assert_eq!(items.len(), 15);
541    }
542
543    #[test]
544    fn test_all_items_have_tps_principle() {
545        let path = PathBuf::from(".");
546        for item in evaluate_all(&path) {
547            assert!(!item.tps_principle.is_empty(), "Item {} missing TPS", item.id);
548        }
549    }
550
551    #[test]
552    fn test_all_items_have_evidence() {
553        let path = PathBuf::from(".");
554        for item in evaluate_all(&path) {
555            assert!(!item.evidence.is_empty(), "Item {} missing evidence", item.id);
556        }
557    }
558
559    #[test]
560    fn test_check_ieee754_compliance() {
561        let path = PathBuf::from(".");
562        let item = check_ieee754_compliance(&path);
563        assert_eq!(item.id, "NR-01");
564        assert!(item.name.contains("IEEE 754"));
565    }
566
567    #[test]
568    fn test_check_cross_platform_determinism() {
569        let path = PathBuf::from(".");
570        let item = check_cross_platform_determinism(&path);
571        assert_eq!(item.id, "NR-02");
572        assert!(item.name.contains("Cross-Platform"));
573    }
574
575    #[test]
576    fn test_check_numpy_parity() {
577        let path = PathBuf::from(".");
578        let item = check_numpy_parity(&path);
579        assert_eq!(item.id, "NR-03");
580        assert!(item.name.contains("NumPy"));
581    }
582
583    #[test]
584    fn test_check_sklearn_parity() {
585        let path = PathBuf::from(".");
586        let item = check_sklearn_parity(&path);
587        assert_eq!(item.id, "NR-04");
588        assert!(item.name.contains("scikit-learn"));
589    }
590
591    #[test]
592    fn test_check_linalg_accuracy() {
593        let path = PathBuf::from(".");
594        let item = check_linalg_accuracy(&path);
595        assert_eq!(item.id, "NR-05");
596    }
597
598    #[test]
599    fn test_check_kahan_summation() {
600        let path = PathBuf::from(".");
601        let item = check_kahan_summation(&path);
602        assert_eq!(item.id, "NR-06");
603    }
604
605    #[test]
606    fn test_check_rng_quality() {
607        let path = PathBuf::from(".");
608        let item = check_rng_quality(&path);
609        assert_eq!(item.id, "NR-07");
610    }
611
612    #[test]
613    fn test_check_quantization_bounds() {
614        let path = PathBuf::from(".");
615        let item = check_quantization_bounds(&path);
616        assert_eq!(item.id, "NR-08");
617    }
618
619    #[test]
620    fn test_check_gradient_correctness() {
621        let path = PathBuf::from(".");
622        let item = check_gradient_correctness(&path);
623        assert_eq!(item.id, "NR-09");
624    }
625
626    #[test]
627    fn test_check_tokenizer_parity() {
628        let path = PathBuf::from(".");
629        let item = check_tokenizer_parity(&path);
630        assert_eq!(item.id, "NR-10");
631    }
632
633    #[test]
634    fn test_check_attention_correctness() {
635        let path = PathBuf::from(".");
636        let item = check_attention_correctness(&path);
637        assert_eq!(item.id, "NR-11");
638    }
639
640    #[test]
641    fn test_check_loss_accuracy() {
642        let path = PathBuf::from(".");
643        let item = check_loss_accuracy(&path);
644        assert_eq!(item.id, "NR-12");
645    }
646
647    #[test]
648    fn test_check_optimizer_state() {
649        let path = PathBuf::from(".");
650        let item = check_optimizer_state(&path);
651        assert_eq!(item.id, "NR-13");
652    }
653
654    #[test]
655    fn test_check_normalization_correctness() {
656        let path = PathBuf::from(".");
657        let item = check_normalization_correctness(&path);
658        assert_eq!(item.id, "NR-14");
659    }
660
661    #[test]
662    fn test_check_matmul_stability() {
663        let path = PathBuf::from(".");
664        let item = check_matmul_stability(&path);
665        assert_eq!(item.id, "NR-15");
666    }
667
668    #[test]
669    fn test_all_items_have_severity() {
670        let path = PathBuf::from(".");
671        for item in evaluate_all(&path) {
672            // Severity is set via with_severity
673            assert!(
674                !item.tps_principle.is_empty(),
675                "Item {} should have TPS principle set along with severity",
676                item.id
677            );
678        }
679    }
680
681    #[test]
682    fn test_check_for_pattern_helper() {
683        let path = PathBuf::from(".");
684        // This tests the pattern matching helper
685        let has_rust = check_for_pattern(&path, &["Cargo.toml", "lib.rs"]);
686        // Should find patterns in a rust project
687        let _ = has_rust; // Validates check_for_pattern runs without panic
688    }
689
690    // =====================================================================
691    // Coverage: empty project paths to exercise alternate branches
692    // =====================================================================
693
694    /// Use an empty temp directory to ensure check_for_pattern returns false.
695    fn empty_dir() -> tempfile::TempDir {
696        tempfile::TempDir::new().expect("Failed to create temp dir")
697    }
698
699    #[test]
700    fn test_ieee754_no_features_present() {
701        let dir = empty_dir();
702        let item = check_ieee754_compliance(dir.path());
703        assert_eq!(item.id, "NR-01");
704        // Neither fp tests nor special cases → partial "No explicit IEEE 754 testing"
705        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
706        assert!(item
707            .rejection_reason
708            .as_ref()
709            .expect("unexpected failure")
710            .contains("No explicit IEEE 754"));
711    }
712
713    #[test]
714    fn test_ieee754_fp_tests_only() {
715        let dir = empty_dir();
716        // Create a fake source file with fp test patterns but no special cases
717        let src_dir = dir.path().join("src");
718        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
719        std::fs::write(src_dir.join("test.rs"), "fn test_ieee754() { let x: f32 = 1.0; }")
720            .expect("unexpected failure");
721        let item = check_ieee754_compliance(dir.path());
722        assert_eq!(item.id, "NR-01");
723        // Has fp_tests but not special_cases → partial "FP testing (verify special cases)"
724        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
725        assert!(item
726            .rejection_reason
727            .as_ref()
728            .expect("unexpected failure")
729            .contains("verify special cases"));
730    }
731
732    #[test]
733    fn test_ieee754_both_present() {
734        let dir = empty_dir();
735        let src_dir = dir.path().join("src");
736        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
737        std::fs::write(
738            src_dir.join("test.rs"),
739            "fn test_ieee754() { let x: f64 = f64::NaN; let y = f64::INFINITY; }",
740        )
741        .expect("unexpected failure");
742        let item = check_ieee754_compliance(dir.path());
743        assert_eq!(item.id, "NR-01");
744        assert_eq!(item.status, super::super::types::CheckStatus::Pass);
745    }
746
747    #[test]
748    fn test_cross_platform_no_features() {
749        let dir = empty_dir();
750        let item = check_cross_platform_determinism(dir.path());
751        assert_eq!(item.id, "NR-02");
752        // No platform or arch tests → partial "Single platform testing"
753        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
754        assert!(item
755            .rejection_reason
756            .as_ref()
757            .expect("unexpected failure")
758            .contains("Single platform"));
759    }
760
761    #[test]
762    fn test_cross_platform_platform_ci_only() {
763        let dir = empty_dir();
764        // Create CI file with platforms but no arch in source
765        let ci_dir = dir.path().join(".github").join("workflows");
766        std::fs::create_dir_all(&ci_dir).expect("mkdir failed");
767        std::fs::write(ci_dir.join("ci.yml"), "os: [ubuntu-latest, macos-latest, windows-latest]")
768            .expect("unexpected failure");
769        let item = check_cross_platform_determinism(dir.path());
770        assert_eq!(item.id, "NR-02");
771        // has_platform_tests but not has_arch_tests → partial
772        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
773        assert!(item
774            .rejection_reason
775            .as_ref()
776            .expect("unexpected failure")
777            .contains("Multi-platform CI"));
778    }
779
780    #[test]
781    fn test_cross_platform_both_present() {
782        let dir = empty_dir();
783        let ci_dir = dir.path().join(".github").join("workflows");
784        std::fs::create_dir_all(&ci_dir).expect("mkdir failed");
785        std::fs::write(ci_dir.join("ci.yml"), "os: [ubuntu-latest, macos-latest, windows-latest]")
786            .expect("unexpected failure");
787        let src_dir = dir.path().join("src");
788        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
789        std::fs::write(
790            src_dir.join("arch.rs"),
791            "cfg(target_arch = \"x86_64\") cfg(target_arch = \"aarch64\")",
792        )
793        .expect("unexpected failure");
794        let item = check_cross_platform_determinism(dir.path());
795        assert_eq!(item.id, "NR-02");
796        assert_eq!(item.status, super::super::types::CheckStatus::Pass);
797    }
798
799    #[test]
800    fn test_numpy_parity_numeric_no_tests() {
801        let dir = empty_dir();
802        let src_dir = dir.path().join("src");
803        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
804        // Has numeric code but no numpy/golden tests
805        std::fs::write(
806            src_dir.join("numeric.rs"),
807            "fn matmul(tensor: &[f32], matrix: &[f32]) -> Vec<f32> { vec![] }",
808        )
809        .expect("unexpected failure");
810        let item = check_numpy_parity(dir.path());
811        assert_eq!(item.id, "NR-03");
812        // is_numeric && !has_numpy_tests && !has_golden_tests → partial
813        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
814        assert!(item
815            .rejection_reason
816            .as_ref()
817            .expect("unexpected failure")
818            .contains("Numeric code without reference parity"));
819    }
820
821    #[test]
822    fn test_sklearn_parity_ml_no_tests() {
823        let dir = empty_dir();
824        let src_dir = dir.path().join("src");
825        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
826        std::fs::write(src_dir.join("ml.rs"), "struct KnnClassifier { } fn classifier() {}")
827            .expect("unexpected failure");
828        let item = check_sklearn_parity(dir.path());
829        assert_eq!(item.id, "NR-04");
830        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
831        assert!(item
832            .rejection_reason
833            .as_ref()
834            .expect("unexpected failure")
835            .contains("ML code without sklearn parity"));
836    }
837
838    #[test]
839    fn test_linalg_decomp_no_tests() {
840        let dir = empty_dir();
841        let src_dir = dir.path().join("src");
842        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
843        std::fs::write(
844            src_dir.join("linalg.rs"),
845            "fn cholesky_decompose(m: &Mat) -> Mat { todo!() }",
846        )
847        .expect("unexpected failure");
848        let item = check_linalg_accuracy(dir.path());
849        assert_eq!(item.id, "NR-05");
850        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
851        assert!(item
852            .rejection_reason
853            .as_ref()
854            .expect("unexpected failure")
855            .contains("Decompositions without accuracy"));
856    }
857
858    #[test]
859    fn test_kahan_summation_needed() {
860        let dir = empty_dir();
861        let src_dir = dir.path().join("src");
862        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
863        std::fs::write(src_dir.join("math.rs"), "fn total(v: &[f64]) -> f64 { v.iter().sum() }")
864            .expect("unexpected failure");
865        let item = check_kahan_summation(dir.path());
866        assert_eq!(item.id, "NR-06");
867        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
868        assert!(item
869            .rejection_reason
870            .as_ref()
871            .expect("unexpected failure")
872            .contains("Summation without compensated"));
873    }
874
875    #[test]
876    fn test_rng_quality_uses_rng_no_quality() {
877        let dir = empty_dir();
878        let src_dir = dir.path().join("src");
879        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
880        std::fs::write(
881            src_dir.join("rng.rs"),
882            "use rand::Rng; fn random_val() { let mut rng = rand::thread_rng(); }",
883        )
884        .expect("unexpected failure");
885        let item = check_rng_quality(dir.path());
886        assert_eq!(item.id, "NR-07");
887        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
888        assert!(item
889            .rejection_reason
890            .as_ref()
891            .expect("unexpected failure")
892            .contains("RNG without quality"));
893    }
894
895    #[test]
896    fn test_quantization_no_bounds() {
897        let dir = empty_dir();
898        let src_dir = dir.path().join("src");
899        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
900        std::fs::write(
901            src_dir.join("quant.rs"),
902            "fn quantize_to_int8(data: &[f32]) -> Vec<i8> { vec![] }",
903        )
904        .expect("unexpected failure");
905        let item = check_quantization_bounds(dir.path());
906        assert_eq!(item.id, "NR-08");
907        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
908        assert!(item
909            .rejection_reason
910            .as_ref()
911            .expect("unexpected failure")
912            .contains("Quantization without error bounds"));
913    }
914
915    #[test]
916    fn test_gradient_autograd_no_check() {
917        let dir = empty_dir();
918        let src_dir = dir.path().join("src");
919        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
920        std::fs::write(
921            src_dir.join("grad.rs"),
922            "fn backward(grad: &Tensor) {} fn autograd_engine() {}",
923        )
924        .expect("unexpected failure");
925        let item = check_gradient_correctness(dir.path());
926        assert_eq!(item.id, "NR-09");
927        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
928        assert!(item
929            .rejection_reason
930            .as_ref()
931            .expect("unexpected failure")
932            .contains("Autograd without numerical verification"));
933    }
934
935    #[test]
936    fn test_tokenizer_no_parity() {
937        let dir = empty_dir();
938        let src_dir = dir.path().join("src");
939        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
940        std::fs::write(
941            src_dir.join("tok.rs"),
942            "struct Tokenizer { vocab: Vec<String> } fn bpe_encode() {}",
943        )
944        .expect("unexpected failure");
945        let item = check_tokenizer_parity(dir.path());
946        assert_eq!(item.id, "NR-10");
947        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
948        assert!(item
949            .rejection_reason
950            .as_ref()
951            .expect("unexpected failure")
952            .contains("Tokenizer without parity"));
953    }
954
955    #[test]
956    fn test_attention_no_correctness() {
957        let dir = empty_dir();
958        let src_dir = dir.path().join("src");
959        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
960        std::fs::write(
961            src_dir.join("attn.rs"),
962            "fn multi_head_attention(q: &[f32], k: &[f32], v: &[f32]) -> Vec<f32> { vec![] }",
963        )
964        .expect("unexpected failure");
965        let item = check_attention_correctness(dir.path());
966        assert_eq!(item.id, "NR-11");
967        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
968        assert!(item
969            .rejection_reason
970            .as_ref()
971            .expect("unexpected failure")
972            .contains("Attention without correctness"));
973    }
974
975    #[test]
976    fn test_loss_no_accuracy() {
977        let dir = empty_dir();
978        let src_dir = dir.path().join("src");
979        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
980        std::fs::write(
981            src_dir.join("loss_fn.rs"),
982            "fn cross_entropy(pred: &[f32], target: &[f32]) -> f32 { 0.0 }",
983        )
984        .expect("unexpected failure");
985        let item = check_loss_accuracy(dir.path());
986        assert_eq!(item.id, "NR-12");
987        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
988        assert!(item
989            .rejection_reason
990            .as_ref()
991            .expect("unexpected failure")
992            .contains("Loss functions without accuracy"));
993    }
994
995    #[test]
996    fn test_optimizer_no_state_tests() {
997        let dir = empty_dir();
998        let src_dir = dir.path().join("src");
999        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
1000        std::fs::write(
1001            src_dir.join("optim.rs"),
1002            "struct AdamOptimizer { lr: f64 } fn sgd_step() {}",
1003        )
1004        .expect("unexpected failure");
1005        let item = check_optimizer_state(dir.path());
1006        assert_eq!(item.id, "NR-13");
1007        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
1008        assert!(item
1009            .rejection_reason
1010            .as_ref()
1011            .expect("unexpected failure")
1012            .contains("Optimizer without state verification"));
1013    }
1014
1015    #[test]
1016    fn test_normalization_no_correctness() {
1017        let dir = empty_dir();
1018        let src_dir = dir.path().join("src");
1019        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
1020        std::fs::write(
1021            src_dir.join("norm.rs"),
1022            "fn LayerNorm(x: &[f32]) -> Vec<f32> { vec![] } fn RMSNorm() {}",
1023        )
1024        .expect("unexpected failure");
1025        let item = check_normalization_correctness(dir.path());
1026        assert_eq!(item.id, "NR-14");
1027        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
1028        assert!(item
1029            .rejection_reason
1030            .as_ref()
1031            .expect("unexpected failure")
1032            .contains("Normalization without correctness"));
1033    }
1034
1035    #[test]
1036    fn test_matmul_no_stability() {
1037        let dir = empty_dir();
1038        let src_dir = dir.path().join("src");
1039        std::fs::create_dir_all(&src_dir).expect("mkdir failed");
1040        std::fs::write(
1041            src_dir.join("matmul.rs"),
1042            "fn gemm(a: &[f32], b: &[f32], c: &mut [f32]) { /* matmul */ }",
1043        )
1044        .expect("unexpected failure");
1045        let item = check_matmul_stability(dir.path());
1046        assert_eq!(item.id, "NR-15");
1047        assert_eq!(item.status, super::super::types::CheckStatus::Partial);
1048        assert!(item
1049            .rejection_reason
1050            .as_ref()
1051            .expect("unexpected failure")
1052            .contains("Matmul without stability"));
1053    }
1054
1055    #[test]
1056    fn test_evaluate_all_empty_project() {
1057        let dir = empty_dir();
1058        let items = evaluate_all(dir.path());
1059        assert_eq!(items.len(), 15);
1060        // All should have non-zero duration and evidence
1061        for item in &items {
1062            assert!(!item.evidence.is_empty(), "Item {} missing evidence", item.id);
1063        }
1064    }
1065
1066    #[test]
1067    fn test_check_ci_matrix_helper() {
1068        let dir = empty_dir();
1069        let count = check_ci_matrix(dir.path(), &["ubuntu", "macos"]);
1070        assert!(!count);
1071    }
1072}
batuta/falsification/numerical_reproducibility.rs

batuta/falsification/
numerical_reproducibility.rs