Skip to main content

simular/falsification/
mod.rs

1//! Popperian falsification framework.
2//!
3//! Implements Karl Popper's philosophy of science:
4//! - Every hypothesis must be falsifiable
5//! - Null hypothesis testing (NHST)
6//! - Robustness metrics via Signal Temporal Logic
7//!
8//! # Demarcation Criterion
9//!
10//! A theory T is scientific iff there exists some observation O
11//! that could refute T.
12
13use serde::{Deserialize, Serialize};
14
15/// Result of null hypothesis significance testing.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub enum NHSTResult {
18    /// H₀ rejected: evidence supports falsification.
19    Rejected {
20        /// Statistical p-value.
21        p_value: f64,
22        /// Effect size (Cohen's d or similar).
23        effect_size: f64,
24        /// Test statistic.
25        test_statistic: f64,
26    },
27    /// Failed to reject H₀: model corroborated (not proven).
28    NotRejected {
29        /// Statistical p-value.
30        p_value: f64,
31        /// Statistical power.
32        power: f64,
33    },
34}
35
36impl NHSTResult {
37    /// Check if the null hypothesis was rejected.
38    #[must_use]
39    pub const fn is_rejected(&self) -> bool {
40        matches!(self, Self::Rejected { .. })
41    }
42
43    /// Get the p-value.
44    #[must_use]
45    pub const fn p_value(&self) -> f64 {
46        match self {
47            Self::Rejected { p_value, .. } | Self::NotRejected { p_value, .. } => *p_value,
48        }
49    }
50}
51
52/// Predictions generated by a hypothesis.
53#[derive(Debug, Clone, Default)]
54pub struct Predictions {
55    /// Predicted values.
56    pub values: Vec<f64>,
57    /// Prediction uncertainties.
58    pub uncertainties: Vec<f64>,
59}
60
61impl Predictions {
62    /// Create new empty predictions.
63    #[must_use]
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Add a prediction.
69    pub fn add(&mut self, value: f64, uncertainty: f64) {
70        self.values.push(value);
71        self.uncertainties.push(uncertainty);
72    }
73
74    /// Get number of predictions.
75    #[must_use]
76    pub fn len(&self) -> usize {
77        self.values.len()
78    }
79
80    /// Check if empty.
81    #[must_use]
82    pub fn is_empty(&self) -> bool {
83        self.values.is_empty()
84    }
85}
86
87/// Observations to test against predictions.
88#[derive(Debug, Clone, Default)]
89pub struct Observations {
90    /// Observed values.
91    pub values: Vec<f64>,
92    /// Measurement uncertainties.
93    pub uncertainties: Vec<f64>,
94}
95
96impl Observations {
97    /// Create new empty observations.
98    #[must_use]
99    pub fn new() -> Self {
100        Self::default()
101    }
102
103    /// Add an observation.
104    pub fn add(&mut self, value: f64, uncertainty: f64) {
105        self.values.push(value);
106        self.uncertainties.push(uncertainty);
107    }
108
109    /// Get number of observations.
110    #[must_use]
111    pub fn len(&self) -> usize {
112        self.values.len()
113    }
114
115    /// Check if empty.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.values.is_empty()
119    }
120}
121
122/// Falsification criteria defining what would refute a hypothesis.
123#[derive(Debug, Clone)]
124pub struct FalsificationCriteria {
125    /// Metric name.
126    pub metric: String,
127    /// Comparison operator.
128    pub operator: ComparisonOp,
129    /// Threshold value.
130    pub threshold: f64,
131}
132
133/// Comparison operator for falsification criteria.
134#[derive(Debug, Clone, Copy, PartialEq, Eq)]
135pub enum ComparisonOp {
136    /// Less than.
137    Lt,
138    /// Less than or equal.
139    Le,
140    /// Greater than.
141    Gt,
142    /// Greater than or equal.
143    Ge,
144    /// Equal (within tolerance).
145    Eq,
146    /// Not equal.
147    Ne,
148}
149
150impl FalsificationCriteria {
151    /// Check if a value satisfies the criterion.
152    #[must_use]
153    pub fn check(&self, value: f64) -> bool {
154        match self.operator {
155            ComparisonOp::Lt => value < self.threshold,
156            ComparisonOp::Le => value <= self.threshold,
157            ComparisonOp::Gt => value > self.threshold,
158            ComparisonOp::Ge => value >= self.threshold,
159            ComparisonOp::Eq => (value - self.threshold).abs() < f64::EPSILON,
160            ComparisonOp::Ne => (value - self.threshold).abs() >= f64::EPSILON,
161        }
162    }
163
164    /// Create a "less than" criterion.
165    #[must_use]
166    pub fn less_than(metric: impl Into<String>, threshold: f64) -> Self {
167        Self {
168            metric: metric.into(),
169            operator: ComparisonOp::Lt,
170            threshold,
171        }
172    }
173
174    /// Create a "greater than" criterion.
175    #[must_use]
176    pub fn greater_than(metric: impl Into<String>, threshold: f64) -> Self {
177        Self {
178            metric: metric.into(),
179            operator: ComparisonOp::Gt,
180            threshold,
181        }
182    }
183}
184
185/// Trait for falsifiable hypotheses.
186///
187/// Implements Popper's demarcation criterion: a hypothesis is scientific
188/// only if it can be falsified by observation.
189pub trait FalsifiableHypothesis {
190    /// Type of state this hypothesis operates on.
191    type State;
192
193    /// Generate testable predictions from state.
194    fn predict(&self, state: &Self::State) -> Predictions;
195
196    /// Define what would falsify this hypothesis.
197    fn falsification_criteria(&self) -> Vec<FalsificationCriteria>;
198
199    /// Compute robustness degree (distance to falsification).
200    ///
201    /// Positive values indicate satisfaction with margin.
202    /// Negative values indicate violation.
203    /// Zero indicates boundary (maximally falsifiable).
204    fn robustness(&self, state: &Self::State) -> f64;
205
206    /// Perform null hypothesis significance test.
207    fn null_hypothesis_test(
208        &self,
209        predictions: &Predictions,
210        observations: &Observations,
211        significance: f64,
212    ) -> NHSTResult;
213}
214
215/// Statistical test functions.
216pub mod stats {
217    use super::NHSTResult;
218
219    /// Compute mean of values.
220    #[must_use]
221    pub fn mean(values: &[f64]) -> f64 {
222        if values.is_empty() {
223            return 0.0;
224        }
225        values.iter().sum::<f64>() / values.len() as f64
226    }
227
228    /// Compute variance of values.
229    #[must_use]
230    pub fn variance(values: &[f64]) -> f64 {
231        if values.len() < 2 {
232            return 0.0;
233        }
234        let m = mean(values);
235        let sum_sq: f64 = values.iter().map(|x| (x - m).powi(2)).sum();
236        sum_sq / (values.len() - 1) as f64
237    }
238
239    /// Compute standard deviation.
240    #[must_use]
241    pub fn std_dev(values: &[f64]) -> f64 {
242        variance(values).sqrt()
243    }
244
245    /// Compute standard error of the mean.
246    #[must_use]
247    pub fn std_error(values: &[f64]) -> f64 {
248        if values.is_empty() {
249            return 0.0;
250        }
251        std_dev(values) / (values.len() as f64).sqrt()
252    }
253
254    /// Perform one-sample t-test.
255    ///
256    /// Tests H₀: μ = μ₀ against H₁: μ ≠ μ₀.
257    #[must_use]
258    pub fn one_sample_t_test(values: &[f64], mu_0: f64, significance: f64) -> NHSTResult {
259        if values.len() < 2 {
260            return NHSTResult::NotRejected {
261                p_value: 1.0,
262                power: 0.0,
263            };
264        }
265
266        let n = values.len() as f64;
267        let sample_mean = mean(values);
268        let sample_std = std_dev(values);
269
270        if sample_std < f64::EPSILON {
271            // No variance - can't perform test
272            return NHSTResult::NotRejected {
273                p_value: 1.0,
274                power: 0.0,
275            };
276        }
277
278        let t_stat = (sample_mean - mu_0) / (sample_std / n.sqrt());
279        let _df = n - 1.0; // degrees of freedom (used for exact t-distribution)
280
281        // Approximate p-value using normal distribution for large n
282        let p_value = 2.0 * (1.0 - normal_cdf(t_stat.abs()));
283
284        // Effect size (Cohen's d)
285        let effect_size = (sample_mean - mu_0) / sample_std;
286
287        if p_value < significance {
288            NHSTResult::Rejected {
289                p_value,
290                effect_size: effect_size.abs(),
291                test_statistic: t_stat,
292            }
293        } else {
294            // Approximate power (simplified)
295            let power = 1.0 - normal_cdf(1.96 - effect_size.abs() * n.sqrt());
296            NHSTResult::NotRejected {
297                p_value,
298                power: power.clamp(0.0, 1.0),
299            }
300        }
301    }
302
303    /// Perform two-sample t-test.
304    ///
305    /// Tests H₀: μ₁ = μ₂ against H₁: μ₁ ≠ μ₂.
306    #[must_use]
307    pub fn two_sample_t_test(sample1: &[f64], sample2: &[f64], significance: f64) -> NHSTResult {
308        if sample1.len() < 2 || sample2.len() < 2 {
309            return NHSTResult::NotRejected {
310                p_value: 1.0,
311                power: 0.0,
312            };
313        }
314
315        let n1 = sample1.len() as f64;
316        let n2 = sample2.len() as f64;
317
318        let mean1 = mean(sample1);
319        let mean2 = mean(sample2);
320
321        let var1 = variance(sample1);
322        let var2 = variance(sample2);
323
324        // Pooled standard error
325        let se = (var1 / n1 + var2 / n2).sqrt();
326
327        if se < f64::EPSILON {
328            return NHSTResult::NotRejected {
329                p_value: 1.0,
330                power: 0.0,
331            };
332        }
333
334        let t_stat = (mean1 - mean2) / se;
335
336        // Approximate p-value
337        let p_value = 2.0 * (1.0 - normal_cdf(t_stat.abs()));
338
339        // Effect size
340        let pooled_std = ((var1 + var2) / 2.0).sqrt();
341        let effect_size = if pooled_std > f64::EPSILON {
342            (mean1 - mean2) / pooled_std
343        } else {
344            0.0
345        };
346
347        if p_value < significance {
348            NHSTResult::Rejected {
349                p_value,
350                effect_size: effect_size.abs(),
351                test_statistic: t_stat,
352            }
353        } else {
354            NHSTResult::NotRejected {
355                p_value,
356                power: 0.5, // Simplified
357            }
358        }
359    }
360
361    /// Chi-square goodness of fit test.
362    #[must_use]
363    pub fn chi_square_test(observed: &[f64], expected: &[f64], significance: f64) -> NHSTResult {
364        if observed.len() != expected.len() || observed.is_empty() {
365            return NHSTResult::NotRejected {
366                p_value: 1.0,
367                power: 0.0,
368            };
369        }
370
371        let chi_sq: f64 = observed
372            .iter()
373            .zip(expected)
374            .filter(|(_, &e)| e > f64::EPSILON)
375            .map(|(&o, &e)| (o - e).powi(2) / e)
376            .sum();
377
378        let df = (observed.len() - 1) as f64;
379
380        // Approximate p-value (simplified)
381        let p_value = 1.0 - chi_square_cdf(chi_sq, df);
382
383        if p_value < significance {
384            NHSTResult::Rejected {
385                p_value,
386                effect_size: (chi_sq / df).sqrt(), // Cramér's V approximation
387                test_statistic: chi_sq,
388            }
389        } else {
390            NHSTResult::NotRejected {
391                p_value,
392                power: 0.5,
393            }
394        }
395    }
396
397    /// Standard normal CDF approximation (private).
398    fn normal_cdf(x: f64) -> f64 {
399        0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
400    }
401
402    /// Standard normal CDF approximation (public).
403    #[must_use]
404    pub fn normal_cdf_pub(x: f64) -> f64 {
405        normal_cdf(x)
406    }
407
408    /// Chi-square CDF approximation.
409    fn chi_square_cdf(x: f64, df: f64) -> f64 {
410        if x <= 0.0 {
411            return 0.0;
412        }
413        // Wilson-Hilferty approximation
414        let z = (x / df).powf(1.0 / 3.0) - (1.0 - 2.0 / (9.0 * df));
415        let se = (2.0 / (9.0 * df)).sqrt();
416        normal_cdf(z / se)
417    }
418
419    /// Error function approximation.
420    fn erf(x: f64) -> f64 {
421        // Approximation with max error 1.5e-7
422        let a1 = 0.254_829_592;
423        let a2 = -0.284_496_736;
424        let a3 = 1.421_413_741;
425        let a4 = -1.453_152_027;
426        let a5 = 1.061_405_429;
427        let p = 0.327_591_1;
428
429        let sign = if x < 0.0 { -1.0 } else { 1.0 };
430        let x = x.abs();
431
432        let t = 1.0 / (1.0 + p * x);
433        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
434
435        sign * y
436    }
437}
438
439// ============================================================================
440// Nullification Test Framework (Appendix F)
441// ============================================================================
442
443/// Result of a nullification test.
444#[derive(Debug, Clone, Serialize, Deserialize)]
445pub struct NullificationResult {
446    /// Hypothesis identifier.
447    pub hypothesis_id: String,
448    /// Whether H₀ was rejected.
449    pub rejected: bool,
450    /// Statistical p-value.
451    pub p_value: f64,
452    /// 95% confidence interval.
453    pub confidence_interval: (f64, f64),
454    /// Effect size (Cohen's d).
455    pub effect_size: f64,
456    /// Observations collected.
457    pub observations: Vec<f64>,
458}
459
460impl NullificationResult {
461    /// Format result following Princeton reporting standards.
462    #[must_use]
463    pub fn report(&self) -> String {
464        format!(
465            "{}: {} (p={:.4}, 95% CI [{:.4}, {:.4}], d={:.2})",
466            self.hypothesis_id,
467            if self.rejected {
468                "REJECTED"
469            } else {
470                "NOT REJECTED"
471            },
472            self.p_value,
473            self.confidence_interval.0,
474            self.confidence_interval.1,
475            self.effect_size,
476        )
477    }
478}
479
480/// Popperian nullification test for ML simulation.
481///
482/// Following Princeton methodology: minimum 5 runs, bootstrap CI.
483#[derive(Debug, Clone)]
484pub struct NullificationTest {
485    /// Null hypothesis identifier.
486    pub hypothesis_id: String,
487    /// Number of independent runs (minimum 5 per Princeton methodology).
488    pub n_runs: usize,
489    /// Significance level (α = 0.05 standard).
490    pub alpha: f64,
491    /// Number of bootstrap samples for CI.
492    pub bootstrap_samples: usize,
493    /// Expected value under H₀ (for one-sample tests).
494    pub expected_value: f64,
495}
496
497impl NullificationTest {
498    /// Create a new nullification test.
499    #[must_use]
500    pub fn new(hypothesis_id: impl Into<String>) -> Self {
501        Self {
502            hypothesis_id: hypothesis_id.into(),
503            n_runs: 5,
504            alpha: 0.05,
505            bootstrap_samples: 10_000,
506            expected_value: 0.0,
507        }
508    }
509
510    /// Set number of runs (minimum 5).
511    #[must_use]
512    pub fn with_runs(mut self, n_runs: usize) -> Self {
513        self.n_runs = n_runs.max(5);
514        self
515    }
516
517    /// Set significance level.
518    #[must_use]
519    pub fn with_alpha(mut self, alpha: f64) -> Self {
520        self.alpha = alpha.clamp(0.001, 0.1);
521        self
522    }
523
524    /// Set expected value under H₀.
525    #[must_use]
526    pub fn with_expected(mut self, expected: f64) -> Self {
527        self.expected_value = expected;
528        self
529    }
530
531    /// Execute nullification test with a test function.
532    ///
533    /// The `test_fn` is called `n_runs` times and should return
534    /// the test statistic for each run.
535    pub fn execute<F>(&self, test_fn: F) -> NullificationResult
536    where
537        F: Fn() -> f64,
538    {
539        // Collect observations
540        let observations: Vec<f64> = (0..self.n_runs).map(|_| test_fn()).collect();
541
542        // Compute bootstrap 95% CI
543        let ci = self.bootstrap_ci(&observations);
544
545        // Compute p-value and effect size
546        let (p_value, effect_size) = self.compute_stats(&observations);
547
548        NullificationResult {
549            hypothesis_id: self.hypothesis_id.clone(),
550            rejected: p_value < self.alpha,
551            p_value,
552            confidence_interval: ci,
553            effect_size,
554            observations,
555        }
556    }
557
558    /// Execute nullification test with provided observations.
559    #[must_use]
560    pub fn evaluate(&self, observations: &[f64]) -> NullificationResult {
561        let ci = self.bootstrap_ci(observations);
562        let (p_value, effect_size) = self.compute_stats(observations);
563
564        NullificationResult {
565            hypothesis_id: self.hypothesis_id.clone(),
566            rejected: p_value < self.alpha,
567            p_value,
568            confidence_interval: ci,
569            effect_size,
570            observations: observations.to_vec(),
571        }
572    }
573
574    /// Compute bootstrap confidence interval.
575    fn bootstrap_ci(&self, observations: &[f64]) -> (f64, f64) {
576        if observations.is_empty() {
577            return (0.0, 0.0);
578        }
579
580        // Simple bootstrap using resampling
581        let n = observations.len();
582        let mut bootstrap_means = Vec::with_capacity(self.bootstrap_samples.min(1000));
583
584        // Use a simple LCG for bootstrap sampling (deterministic)
585        let mut lcg_state = 12345_u64;
586        for _ in 0..self.bootstrap_samples.min(1000) {
587            let mut sum = 0.0;
588            for _ in 0..n {
589                // LCG: next = (a * state + c) mod m
590                lcg_state = lcg_state
591                    .wrapping_mul(6_364_136_223_846_793_005)
592                    .wrapping_add(1);
593                let idx = (lcg_state as usize) % n;
594                sum += observations[idx];
595            }
596            bootstrap_means.push(sum / n as f64);
597        }
598
599        bootstrap_means.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
600
601        // 95% CI: 2.5th and 97.5th percentiles
602        let lower_idx = (bootstrap_means.len() as f64 * 0.025) as usize;
603        let upper_idx = (bootstrap_means.len() as f64 * 0.975) as usize;
604
605        let lower = bootstrap_means.get(lower_idx).copied().unwrap_or(0.0);
606        let upper = bootstrap_means
607            .get(upper_idx.min(bootstrap_means.len() - 1))
608            .copied()
609            .unwrap_or(0.0);
610
611        (lower, upper)
612    }
613
614    /// Compute p-value and effect size.
615    fn compute_stats(&self, observations: &[f64]) -> (f64, f64) {
616        if observations.len() < 2 {
617            return (1.0, 0.0);
618        }
619
620        let mean = stats::mean(observations);
621        let std = stats::std_dev(observations);
622        let n = observations.len() as f64;
623
624        if std < f64::EPSILON {
625            // No variance
626            let p_value = if (mean - self.expected_value).abs() < f64::EPSILON {
627                1.0 // Exactly at expected
628            } else {
629                0.0 // Different from expected
630            };
631            return (p_value, 0.0);
632        }
633
634        // t-statistic
635        let t_stat = (mean - self.expected_value) / (std / n.sqrt());
636
637        // Approximate two-tailed p-value
638        let p_value = 2.0 * (1.0 - stats::normal_cdf_pub(t_stat.abs()));
639
640        // Cohen's d
641        let effect_size = (mean - self.expected_value).abs() / std;
642
643        (p_value.clamp(0.0, 1.0), effect_size)
644    }
645}
646
647/// Report containing multiple nullification test results.
648#[derive(Debug, Clone, Default, Serialize, Deserialize)]
649pub struct NullificationReport {
650    /// Individual test results.
651    pub results: Vec<NullificationResult>,
652    /// Summary statistics.
653    pub summary: ReportSummary,
654}
655
656/// Summary of nullification report.
657#[derive(Debug, Clone, Default, Serialize, Deserialize)]
658pub struct ReportSummary {
659    /// Total number of tests.
660    pub total_tests: usize,
661    /// Number of rejected hypotheses.
662    pub rejected: usize,
663    /// Number of not-rejected hypotheses.
664    pub not_rejected: usize,
665    /// Overall status.
666    pub status: String,
667}
668
669impl NullificationReport {
670    /// Create new empty report.
671    #[must_use]
672    pub fn new() -> Self {
673        Self::default()
674    }
675
676    /// Add a test result.
677    pub fn add(&mut self, result: NullificationResult) {
678        if result.rejected {
679            self.summary.rejected += 1;
680        } else {
681            self.summary.not_rejected += 1;
682        }
683        self.summary.total_tests += 1;
684        self.results.push(result);
685
686        // Update status
687        self.summary.status = if self.summary.rejected == 0 {
688            "PASS".to_string()
689        } else if self.summary.rejected == self.summary.total_tests {
690            "FAIL".to_string()
691        } else {
692            "PARTIAL_PASS".to_string()
693        };
694    }
695
696    /// Get full report as string.
697    #[must_use]
698    pub fn full_report(&self) -> String {
699        use std::fmt::Write;
700
701        let mut report = String::new();
702        report.push_str("=== Nullification Report ===\n\n");
703
704        for result in &self.results {
705            report.push_str(&result.report());
706            report.push('\n');
707        }
708
709        let _ = write!(
710            report,
711            "\nSummary: {} tests, {} rejected, {} not rejected\n",
712            self.summary.total_tests, self.summary.rejected, self.summary.not_rejected
713        );
714        let _ = writeln!(report, "Status: {}", self.summary.status);
715
716        report
717    }
718}
719
720/// ML-specific nullification hypotheses.
721pub mod ml_hypotheses {
722    use super::NullificationTest;
723
724    /// H₀-TRAIN-01: Training converges identically across runs with same seed.
725    #[must_use]
726    pub fn training_determinism() -> NullificationTest {
727        NullificationTest::new("H0-TRAIN-01").with_expected(0.0) // Expected bitwise difference = 0
728    }
729
730    /// H₀-TRAIN-02: Loss monotonically decreases (no spikes).
731    #[must_use]
732    pub fn loss_stability(threshold_sigma: f64) -> NullificationTest {
733        NullificationTest::new("H0-TRAIN-02").with_expected(threshold_sigma) // Z-score threshold
734    }
735
736    /// H₀-TRAIN-03: Gradient norms remain bounded.
737    #[must_use]
738    pub fn gradient_bounded(max_norm: f64) -> NullificationTest {
739        NullificationTest::new("H0-TRAIN-03").with_expected(max_norm)
740    }
741
742    /// H₀-TRAIN-05: Model parameters remain finite.
743    #[must_use]
744    pub fn params_finite() -> NullificationTest {
745        NullificationTest::new("H0-TRAIN-05").with_expected(0.0) // Expected NaN count = 0
746    }
747
748    /// H₀-PRED-01: Predictions are deterministic for temperature=0.
749    #[must_use]
750    pub fn prediction_determinism() -> NullificationTest {
751        NullificationTest::new("H0-PRED-01").with_expected(0.0) // Expected variance = 0
752    }
753
754    /// H₀-PRED-03: Latency within specified SLA.
755    #[must_use]
756    pub fn latency_sla(sla_ms: f64) -> NullificationTest {
757        NullificationTest::new("H0-PRED-03").with_expected(sla_ms)
758    }
759
760    /// H₀-MULTI-01: Accuracy >= oracle baseline.
761    #[must_use]
762    pub fn accuracy_baseline(baseline: f64) -> NullificationTest {
763        NullificationTest::new("H0-MULTI-01").with_expected(baseline)
764    }
765
766    /// H₀-MULTI-05: Results are statistically significant (5 runs, t-test).
767    #[must_use]
768    pub fn statistical_significance() -> NullificationTest {
769        NullificationTest::new("H0-MULTI-05")
770            .with_runs(5)
771            .with_alpha(0.05)
772    }
773}
774
775// ============================================================================
776// Energy Conservation Hypothesis
777// ============================================================================
778
779/// Energy conservation hypothesis.
780///
781/// Tests whether total energy is conserved within tolerance.
782#[derive(Debug, Clone)]
783pub struct EnergyConservationHypothesis {
784    /// Initial energy.
785    pub initial_energy: f64,
786    /// Tolerance for energy drift.
787    pub tolerance: f64,
788}
789
790impl EnergyConservationHypothesis {
791    /// Create a new energy conservation hypothesis.
792    #[must_use]
793    pub fn new(initial_energy: f64, tolerance: f64) -> Self {
794        Self {
795            initial_energy,
796            tolerance,
797        }
798    }
799}
800
801impl FalsifiableHypothesis for EnergyConservationHypothesis {
802    type State = f64; // Current energy
803
804    fn predict(&self, _state: &Self::State) -> Predictions {
805        let mut predictions = Predictions::new();
806        predictions.add(self.initial_energy, self.tolerance);
807        predictions
808    }
809
810    fn falsification_criteria(&self) -> Vec<FalsificationCriteria> {
811        vec![FalsificationCriteria::less_than(
812            "energy_drift",
813            self.tolerance,
814        )]
815    }
816
817    fn robustness(&self, state: &Self::State) -> f64 {
818        let drift =
819            (state - self.initial_energy).abs() / self.initial_energy.abs().max(f64::EPSILON);
820        self.tolerance - drift
821    }
822
823    fn null_hypothesis_test(
824        &self,
825        predictions: &Predictions,
826        observations: &Observations,
827        significance: f64,
828    ) -> NHSTResult {
829        if predictions.is_empty() || observations.is_empty() {
830            return NHSTResult::NotRejected {
831                p_value: 1.0,
832                power: 0.0,
833            };
834        }
835
836        stats::two_sample_t_test(&predictions.values, &observations.values, significance)
837    }
838}
839
840#[cfg(test)]
841mod tests {
842    use super::stats::*;
843    use super::*;
844
845    // -------------------------------------------------------------------------
846    // NullificationTest Tests
847    // -------------------------------------------------------------------------
848
849    #[test]
850    fn test_nullification_test_new() {
851        let test = NullificationTest::new("H0-TEST-01");
852        assert_eq!(test.hypothesis_id, "H0-TEST-01");
853        assert_eq!(test.n_runs, 5);
854        assert!((test.alpha - 0.05).abs() < f64::EPSILON);
855    }
856
857    #[test]
858    fn test_nullification_test_with_runs() {
859        let test = NullificationTest::new("H0-TEST").with_runs(10);
860        assert_eq!(test.n_runs, 10);
861
862        // Should enforce minimum of 5
863        let test = NullificationTest::new("H0-TEST").with_runs(3);
864        assert_eq!(test.n_runs, 5);
865    }
866
867    #[test]
868    fn test_nullification_test_execute() {
869        let test = NullificationTest::new("H0-TEST").with_expected(0.0);
870        let result = test.execute(|| 0.0);
871        assert!(!result.rejected); // All zeros = no difference from expected
872    }
873
874    #[test]
875    fn test_nullification_test_execute_reject() {
876        let test = NullificationTest::new("H0-TEST").with_expected(0.0);
877        let result = test.execute(|| 100.0); // All 100s, clearly different from 0
878        assert!(result.rejected);
879    }
880
881    #[test]
882    fn test_nullification_test_evaluate() {
883        let test = NullificationTest::new("H0-TEST").with_expected(0.0);
884        let observations = vec![0.1, 0.05, -0.05, 0.02, -0.01];
885        let result = test.evaluate(&observations);
886        assert!(!result.rejected); // Small values close to 0
887    }
888
889    #[test]
890    fn test_nullification_result_report() {
891        let result = NullificationResult {
892            hypothesis_id: "H0-TEST".to_string(),
893            rejected: true,
894            p_value: 0.01,
895            confidence_interval: (0.5, 1.5),
896            effect_size: 0.8,
897            observations: vec![1.0],
898        };
899        let report = result.report();
900        assert!(report.contains("H0-TEST"));
901        assert!(report.contains("REJECTED"));
902        assert!(report.contains("0.0100"));
903    }
904
905    #[test]
906    fn test_nullification_report() {
907        let mut report = NullificationReport::new();
908
909        report.add(NullificationResult {
910            hypothesis_id: "H0-1".to_string(),
911            rejected: false,
912            p_value: 0.1,
913            confidence_interval: (0.0, 1.0),
914            effect_size: 0.1,
915            observations: vec![],
916        });
917
918        report.add(NullificationResult {
919            hypothesis_id: "H0-2".to_string(),
920            rejected: true,
921            p_value: 0.01,
922            confidence_interval: (0.0, 1.0),
923            effect_size: 0.8,
924            observations: vec![],
925        });
926
927        assert_eq!(report.summary.total_tests, 2);
928        assert_eq!(report.summary.rejected, 1);
929        assert_eq!(report.summary.not_rejected, 1);
930        assert_eq!(report.summary.status, "PARTIAL_PASS");
931    }
932
933    #[test]
934    fn test_nullification_report_full() {
935        let mut report = NullificationReport::new();
936        report.add(NullificationResult {
937            hypothesis_id: "H0-PASS".to_string(),
938            rejected: false,
939            p_value: 0.5,
940            confidence_interval: (0.0, 1.0),
941            effect_size: 0.1,
942            observations: vec![1.0, 2.0],
943        });
944
945        let text = report.full_report();
946        assert!(text.contains("Nullification Report"));
947        assert!(text.contains("H0-PASS"));
948        assert!(text.contains("NOT REJECTED"));
949    }
950
951    #[test]
952    fn test_ml_hypotheses() {
953        let test = ml_hypotheses::training_determinism();
954        assert_eq!(test.hypothesis_id, "H0-TRAIN-01");
955
956        let test = ml_hypotheses::loss_stability(3.0);
957        assert_eq!(test.hypothesis_id, "H0-TRAIN-02");
958
959        let test = ml_hypotheses::prediction_determinism();
960        assert_eq!(test.hypothesis_id, "H0-PRED-01");
961
962        let test = ml_hypotheses::accuracy_baseline(0.9);
963        assert!((test.expected_value - 0.9).abs() < f64::EPSILON);
964    }
965
966    // -------------------------------------------------------------------------
967    // Original Tests
968    // -------------------------------------------------------------------------
969
970    #[test]
971    fn test_nhst_result() {
972        let rejected = NHSTResult::Rejected {
973            p_value: 0.01,
974            effect_size: 0.5,
975            test_statistic: 2.5,
976        };
977        assert!(rejected.is_rejected());
978        assert!((rejected.p_value() - 0.01).abs() < f64::EPSILON);
979
980        let not_rejected = NHSTResult::NotRejected {
981            p_value: 0.1,
982            power: 0.8,
983        };
984        assert!(!not_rejected.is_rejected());
985    }
986
987    #[test]
988    fn test_predictions_observations() {
989        let mut pred = Predictions::new();
990        pred.add(1.0, 0.1);
991        pred.add(2.0, 0.2);
992        assert_eq!(pred.len(), 2);
993        assert!(!pred.is_empty());
994
995        let mut obs = Observations::new();
996        obs.add(1.1, 0.05);
997        assert_eq!(obs.len(), 1);
998    }
999
1000    #[test]
1001    fn test_falsification_criteria() {
1002        let crit = FalsificationCriteria::less_than("energy_drift", 0.01);
1003        assert!(crit.check(0.005));
1004        assert!(!crit.check(0.02));
1005
1006        let crit = FalsificationCriteria::greater_than("power", 0.8);
1007        assert!(crit.check(0.9));
1008        assert!(!crit.check(0.7));
1009    }
1010
1011    #[test]
1012    fn test_stats_mean() {
1013        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1014        assert!((mean(&values) - 3.0).abs() < f64::EPSILON);
1015
1016        let empty: Vec<f64> = vec![];
1017        assert!((mean(&empty) - 0.0).abs() < f64::EPSILON);
1018    }
1019
1020    #[test]
1021    fn test_stats_variance() {
1022        let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1023        let var = variance(&values);
1024        assert!((var - 4.571_428_571_428_571).abs() < 0.001);
1025    }
1026
1027    #[test]
1028    fn test_stats_std_dev() {
1029        let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1030        let sd = std_dev(&values);
1031        assert!((sd - 2.138).abs() < 0.01);
1032    }
1033
1034    #[test]
1035    fn test_one_sample_t_test() {
1036        // Test with values clearly different from null hypothesis
1037        let values = vec![5.0, 5.1, 4.9, 5.2, 4.8, 5.0, 5.1, 4.9];
1038        let result = one_sample_t_test(&values, 0.0, 0.05);
1039        assert!(result.is_rejected());
1040
1041        // Test with values close to null hypothesis
1042        let values = vec![0.1, -0.1, 0.05, -0.05, 0.0, 0.02, -0.02, 0.01];
1043        let result = one_sample_t_test(&values, 0.0, 0.05);
1044        assert!(!result.is_rejected());
1045    }
1046
1047    #[test]
1048    fn test_two_sample_t_test() {
1049        // Clearly different samples
1050        let sample1 = vec![10.0, 11.0, 12.0, 10.5, 11.5];
1051        let sample2 = vec![1.0, 2.0, 1.5, 2.5, 1.2];
1052        let result = two_sample_t_test(&sample1, &sample2, 0.05);
1053        assert!(result.is_rejected());
1054
1055        // Similar samples
1056        let sample1 = vec![5.0, 5.1, 4.9, 5.2, 4.8];
1057        let sample2 = vec![5.05, 5.15, 4.95, 5.1, 4.85];
1058        let result = two_sample_t_test(&sample1, &sample2, 0.05);
1059        assert!(!result.is_rejected());
1060    }
1061
1062    #[test]
1063    fn test_energy_conservation_hypothesis() {
1064        let hypothesis = EnergyConservationHypothesis::new(100.0, 0.01);
1065
1066        // Test robustness
1067        let robustness_good = hypothesis.robustness(&100.5); // 0.5% drift
1068        assert!(robustness_good > 0.0);
1069
1070        let robustness_bad = hypothesis.robustness(&110.0); // 10% drift
1071        assert!(robustness_bad < 0.0);
1072
1073        // Test falsification criteria
1074        let criteria = hypothesis.falsification_criteria();
1075        assert_eq!(criteria.len(), 1);
1076        assert_eq!(criteria[0].metric, "energy_drift");
1077    }
1078
1079    // -------------------------------------------------------------------------
1080    // Additional Tests for Coverage
1081    // -------------------------------------------------------------------------
1082
1083    #[test]
1084    fn test_falsification_criteria_all_operators() {
1085        // Test Le (less than or equal)
1086        let crit = FalsificationCriteria {
1087            metric: "test".to_string(),
1088            operator: ComparisonOp::Le,
1089            threshold: 10.0,
1090        };
1091        assert!(crit.check(9.0));
1092        assert!(crit.check(10.0));
1093        assert!(!crit.check(11.0));
1094
1095        // Test Ge (greater than or equal)
1096        let crit = FalsificationCriteria {
1097            metric: "test".to_string(),
1098            operator: ComparisonOp::Ge,
1099            threshold: 10.0,
1100        };
1101        assert!(!crit.check(9.0));
1102        assert!(crit.check(10.0));
1103        assert!(crit.check(11.0));
1104
1105        // Test Eq (equal within epsilon)
1106        let crit = FalsificationCriteria {
1107            metric: "test".to_string(),
1108            operator: ComparisonOp::Eq,
1109            threshold: 10.0,
1110        };
1111        assert!(crit.check(10.0));
1112        assert!(!crit.check(10.1));
1113
1114        // Test Ne (not equal)
1115        let crit = FalsificationCriteria {
1116            metric: "test".to_string(),
1117            operator: ComparisonOp::Ne,
1118            threshold: 10.0,
1119        };
1120        assert!(!crit.check(10.0));
1121        assert!(crit.check(10.1));
1122    }
1123
1124    #[test]
1125    fn test_stats_std_error() {
1126        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1127        let se = std_error(&values);
1128        assert!(se > 0.0);
1129
1130        let empty: Vec<f64> = vec![];
1131        assert!((std_error(&empty) - 0.0).abs() < f64::EPSILON);
1132    }
1133
1134    #[test]
1135    fn test_stats_chi_square_test() {
1136        // Test chi-square with clearly different distributions
1137        let observed = vec![10.0, 20.0, 30.0, 40.0];
1138        let expected = vec![25.0, 25.0, 25.0, 25.0];
1139        let result = chi_square_test(&observed, &expected, 0.05);
1140        assert!(result.is_rejected());
1141
1142        // Test chi-square with similar distributions
1143        let observed = vec![24.0, 26.0, 24.0, 26.0];
1144        let expected = vec![25.0, 25.0, 25.0, 25.0];
1145        let result = chi_square_test(&observed, &expected, 0.05);
1146        assert!(!result.is_rejected());
1147    }
1148
1149    #[test]
1150    fn test_chi_square_test_edge_cases() {
1151        // Mismatched lengths
1152        let result = chi_square_test(&[1.0, 2.0], &[1.0], 0.05);
1153        assert!(!result.is_rejected());
1154        assert!((result.p_value() - 1.0).abs() < f64::EPSILON);
1155
1156        // Empty arrays
1157        let result = chi_square_test(&[], &[], 0.05);
1158        assert!(!result.is_rejected());
1159    }
1160
1161    #[test]
1162    fn test_stats_normal_cdf_pub() {
1163        // Test at various points
1164        assert!((normal_cdf_pub(0.0) - 0.5).abs() < 0.01);
1165        assert!(normal_cdf_pub(3.0) > 0.99);
1166        assert!(normal_cdf_pub(-3.0) < 0.01);
1167    }
1168
1169    #[test]
1170    fn test_ml_hypotheses_all() {
1171        let test = ml_hypotheses::gradient_bounded(10.0);
1172        assert_eq!(test.hypothesis_id, "H0-TRAIN-03");
1173        assert!((test.expected_value - 10.0).abs() < f64::EPSILON);
1174
1175        let test = ml_hypotheses::params_finite();
1176        assert_eq!(test.hypothesis_id, "H0-TRAIN-05");
1177
1178        let test = ml_hypotheses::latency_sla(100.0);
1179        assert_eq!(test.hypothesis_id, "H0-PRED-03");
1180        assert!((test.expected_value - 100.0).abs() < f64::EPSILON);
1181
1182        let test = ml_hypotheses::statistical_significance();
1183        assert_eq!(test.hypothesis_id, "H0-MULTI-05");
1184        assert_eq!(test.n_runs, 5);
1185        assert!((test.alpha - 0.05).abs() < f64::EPSILON);
1186    }
1187
1188    #[test]
1189    fn test_nullification_test_with_alpha() {
1190        let test = NullificationTest::new("H0-TEST").with_alpha(0.01);
1191        assert!((test.alpha - 0.01).abs() < f64::EPSILON);
1192
1193        // Test clamping
1194        let test = NullificationTest::new("H0-TEST").with_alpha(0.0001); // Below minimum
1195        assert!((test.alpha - 0.001).abs() < f64::EPSILON);
1196
1197        let test = NullificationTest::new("H0-TEST").with_alpha(0.5); // Above maximum
1198        assert!((test.alpha - 0.1).abs() < f64::EPSILON);
1199    }
1200
1201    #[test]
1202    fn test_nullification_report_all_pass() {
1203        let mut report = NullificationReport::new();
1204        report.add(NullificationResult {
1205            hypothesis_id: "H0-1".to_string(),
1206            rejected: false,
1207            p_value: 0.5,
1208            confidence_interval: (0.0, 1.0),
1209            effect_size: 0.1,
1210            observations: vec![],
1211        });
1212        assert_eq!(report.summary.status, "PASS");
1213    }
1214
1215    #[test]
1216    fn test_nullification_report_all_fail() {
1217        let mut report = NullificationReport::new();
1218        report.add(NullificationResult {
1219            hypothesis_id: "H0-1".to_string(),
1220            rejected: true,
1221            p_value: 0.01,
1222            confidence_interval: (0.0, 1.0),
1223            effect_size: 0.8,
1224            observations: vec![],
1225        });
1226        assert_eq!(report.summary.status, "FAIL");
1227    }
1228
1229    #[test]
1230    fn test_bootstrap_ci_empty() {
1231        let test = NullificationTest::new("H0-TEST");
1232        let result = test.evaluate(&[]);
1233        assert!((result.confidence_interval.0 - 0.0).abs() < f64::EPSILON);
1234        assert!((result.confidence_interval.1 - 0.0).abs() < f64::EPSILON);
1235    }
1236
1237    #[test]
1238    fn test_compute_stats_single_value() {
1239        let test = NullificationTest::new("H0-TEST").with_expected(0.0);
1240        let result = test.evaluate(&[1.0]);
1241        assert!((result.p_value - 1.0).abs() < f64::EPSILON);
1242    }
1243
1244    #[test]
1245    fn test_compute_stats_no_variance() {
1246        let test = NullificationTest::new("H0-TEST").with_expected(5.0);
1247        let result = test.evaluate(&[5.0, 5.0, 5.0, 5.0, 5.0]);
1248        // All same values = expected, p_value should be 1.0
1249        assert!((result.p_value - 1.0).abs() < f64::EPSILON);
1250
1251        // Different from expected, all same values
1252        let test = NullificationTest::new("H0-TEST").with_expected(0.0);
1253        let result = test.evaluate(&[10.0, 10.0, 10.0, 10.0, 10.0]);
1254        assert!((result.p_value - 0.0).abs() < f64::EPSILON);
1255    }
1256
1257    #[test]
1258    fn test_one_sample_t_test_edge_cases() {
1259        // Single value
1260        let result = one_sample_t_test(&[5.0], 0.0, 0.05);
1261        assert!(!result.is_rejected());
1262        assert!((result.p_value() - 1.0).abs() < f64::EPSILON);
1263
1264        // No variance
1265        let result = one_sample_t_test(&[5.0, 5.0, 5.0, 5.0, 5.0], 5.0, 0.05);
1266        assert!(!result.is_rejected());
1267    }
1268
1269    #[test]
1270    fn test_two_sample_t_test_edge_cases() {
1271        // Single value in each sample
1272        let result = two_sample_t_test(&[5.0], &[10.0], 0.05);
1273        assert!(!result.is_rejected());
1274
1275        // No variance in both samples (different values)
1276        let result = two_sample_t_test(
1277            &[5.0, 5.0, 5.0, 5.0, 5.0],
1278            &[10.0, 10.0, 10.0, 10.0, 10.0],
1279            0.05,
1280        );
1281        assert!(!result.is_rejected()); // se is near zero
1282    }
1283
1284    #[test]
1285    fn test_energy_conservation_hypothesis_predict() {
1286        let hypothesis = EnergyConservationHypothesis::new(100.0, 0.01);
1287        let predictions = hypothesis.predict(&100.0);
1288        assert_eq!(predictions.len(), 1);
1289        assert!((predictions.values[0] - 100.0).abs() < f64::EPSILON);
1290    }
1291
1292    #[test]
1293    fn test_energy_conservation_hypothesis_null_test() {
1294        let hypothesis = EnergyConservationHypothesis::new(100.0, 0.01);
1295
1296        // Empty predictions
1297        let pred = Predictions::new();
1298        let obs = Observations::new();
1299        let result = hypothesis.null_hypothesis_test(&pred, &obs, 0.05);
1300        assert!(!result.is_rejected());
1301
1302        // With data
1303        let mut pred = Predictions::new();
1304        pred.add(100.0, 0.1);
1305        pred.add(100.0, 0.1);
1306        pred.add(100.0, 0.1);
1307
1308        let mut obs = Observations::new();
1309        obs.add(100.5, 0.1);
1310        obs.add(99.5, 0.1);
1311        obs.add(100.2, 0.1);
1312
1313        let result = hypothesis.null_hypothesis_test(&pred, &obs, 0.05);
1314        // Similar values, shouldn't reject
1315        assert!(!result.is_rejected());
1316    }
1317
1318    #[test]
1319    fn test_predictions_default() {
1320        let pred = Predictions::default();
1321        assert!(pred.is_empty());
1322        assert_eq!(pred.len(), 0);
1323    }
1324
1325    #[test]
1326    fn test_observations_default() {
1327        let obs = Observations::default();
1328        assert!(obs.is_empty());
1329        assert_eq!(obs.len(), 0);
1330    }
1331
1332    #[test]
1333    fn test_falsification_criteria_debug() {
1334        let crit = FalsificationCriteria::less_than("test", 0.01);
1335        let debug = format!("{:?}", crit);
1336        assert!(debug.contains("test"));
1337    }
1338
1339    #[test]
1340    fn test_comparison_op_eq() {
1341        assert_eq!(ComparisonOp::Lt, ComparisonOp::Lt);
1342        assert_ne!(ComparisonOp::Lt, ComparisonOp::Gt);
1343    }
1344
1345    #[test]
1346    fn test_nhst_result_clone() {
1347        let result = NHSTResult::Rejected {
1348            p_value: 0.01,
1349            effect_size: 0.5,
1350            test_statistic: 2.5,
1351        };
1352        let cloned = result.clone();
1353        assert!(cloned.is_rejected());
1354    }
1355
1356    #[test]
1357    fn test_nullification_result_clone() {
1358        let result = NullificationResult {
1359            hypothesis_id: "H0-TEST".to_string(),
1360            rejected: true,
1361            p_value: 0.01,
1362            confidence_interval: (0.5, 1.5),
1363            effect_size: 0.8,
1364            observations: vec![1.0, 2.0],
1365        };
1366        let cloned = result.clone();
1367        assert_eq!(cloned.hypothesis_id, "H0-TEST");
1368        assert!(cloned.rejected);
1369    }
1370
1371    #[test]
1372    fn test_energy_conservation_clone() {
1373        let hypothesis = EnergyConservationHypothesis::new(100.0, 0.01);
1374        let cloned = hypothesis.clone();
1375        assert!((cloned.initial_energy - 100.0).abs() < f64::EPSILON);
1376    }
1377}
1378
1379#[cfg(test)]
1380mod proptests {
1381    use super::stats::*;
1382    use super::*;
1383    use proptest::prelude::*;
1384
1385    proptest! {
1386        /// Falsification: mean of identical values equals that value.
1387        #[test]
1388        fn prop_mean_constant(value in -1000.0f64..1000.0, n in 1usize..100) {
1389            let values: Vec<f64> = vec![value; n];
1390            let m = mean(&values);
1391            prop_assert!((m - value).abs() < 1e-10);
1392        }
1393
1394        /// Falsification: variance of identical values is zero.
1395        #[test]
1396        fn prop_variance_constant(value in -1000.0f64..1000.0, n in 2usize..100) {
1397            let values: Vec<f64> = vec![value; n];
1398            let v = variance(&values);
1399            prop_assert!(v.abs() < 1e-10);
1400        }
1401
1402        /// Falsification: robustness sign indicates satisfaction.
1403        #[test]
1404        fn prop_robustness_sign(
1405            initial in 1.0f64..1000.0,
1406            tolerance in 0.001f64..0.1,
1407            drift_factor in 0.0f64..0.2,
1408        ) {
1409            let hypothesis = EnergyConservationHypothesis::new(initial, tolerance);
1410            let current = initial * (1.0 + drift_factor);
1411            let robustness = hypothesis.robustness(&current);
1412
1413            if drift_factor < tolerance {
1414                prop_assert!(robustness > 0.0,
1415                    "Expected positive robustness for drift {} < tolerance {}",
1416                    drift_factor, tolerance);
1417            }
1418            // Note: drift_factor == tolerance gives robustness ≈ 0
1419        }
1420    }
1421}