jugar_probar/coverage/
hypotheses.rs

1//! Coverage Hypotheses for Popperian Falsification
2//!
3//! Per spec §6: Popperian Falsification Methodology
4//!
5//! Following Popper, every coverage claim must be falsifiable:
6//! "A theory is scientific if and only if there exists some observation
7//! that could refute it."
8
9/// Nullification test configuration
10#[derive(Debug, Clone)]
11pub struct NullificationConfig {
12    /// Number of independent runs (Princeton methodology: minimum 5)
13    pub runs: usize,
14    /// Significance level (α = 0.05 standard)
15    pub alpha: f64,
16}
17
18impl NullificationConfig {
19    /// Create Princeton-standard configuration (5 runs, α=0.05)
20    #[must_use]
21    pub fn princeton() -> Self {
22        Self {
23            runs: 5,
24            alpha: 0.05,
25        }
26    }
27
28    /// Create custom configuration
29    #[must_use]
30    pub fn new(runs: usize, alpha: f64) -> Self {
31        Self { runs, alpha }
32    }
33}
34
35impl Default for NullificationConfig {
36    fn default() -> Self {
37        Self::princeton()
38    }
39}
40
41/// Result of a nullification test
42#[derive(Debug, Clone)]
43pub struct NullificationResult {
44    /// Hypothesis name (e.g., "H0-COV-01")
45    pub hypothesis_name: String,
46    /// Whether the hypothesis was rejected
47    pub rejected: bool,
48    /// p-value from statistical test
49    pub p_value: f64,
50    /// Effect size (Cohen's d)
51    pub effect_size: f64,
52    /// 95% confidence interval
53    pub confidence_interval: (f64, f64),
54}
55
56impl NullificationResult {
57    /// Check if the result is statistically significant at α=0.05
58    #[must_use]
59    pub fn is_significant(&self) -> bool {
60        self.p_value < 0.05
61    }
62
63    /// Get a human-readable report
64    #[must_use]
65    pub fn report(&self) -> String {
66        let status = if self.rejected {
67            "REJECTED"
68        } else {
69            "NOT REJECTED"
70        };
71        format!(
72            "{}: {} (p={:.3}, 95% CI [{:.1}, {:.1}], d={:.2})",
73            self.hypothesis_name,
74            status,
75            self.p_value,
76            self.confidence_interval.0,
77            self.confidence_interval.1,
78            self.effect_size
79        )
80    }
81}
82
83/// Coverage hypothesis types
84#[derive(Debug, Clone)]
85pub enum CoverageHypothesis {
86    /// H₀-COV-01: Coverage is deterministic across runs
87    Determinism,
88    /// H₀-COV-02: All reachable blocks are covered (threshold %)
89    Completeness {
90        /// Expected coverage percentage
91        threshold: f64,
92    },
93    /// H₀-COV-03: No coverage regression from baseline
94    NoRegression {
95        /// Baseline coverage percentage
96        baseline: f64,
97    },
98    /// H₀-COV-04: Coverage correlates with mutation score
99    MutationCorrelation {
100        /// Expected correlation coefficient
101        expected_r: f64,
102    },
103}
104
105impl CoverageHypothesis {
106    /// Create a determinism hypothesis
107    #[must_use]
108    pub fn determinism() -> Self {
109        Self::Determinism
110    }
111
112    /// Create a completeness hypothesis
113    #[must_use]
114    pub fn completeness(threshold: f64) -> Self {
115        Self::Completeness { threshold }
116    }
117
118    /// Create a no-regression hypothesis
119    #[must_use]
120    pub fn no_regression(baseline: f64) -> Self {
121        Self::NoRegression { baseline }
122    }
123
124    /// Create a mutation correlation hypothesis
125    #[must_use]
126    pub fn mutation_correlation(expected_r: f64) -> Self {
127        Self::MutationCorrelation { expected_r }
128    }
129
130    /// Get the hypothesis name
131    #[must_use]
132    pub fn name(&self) -> &'static str {
133        match self {
134            Self::Determinism => "H0-COV-01",
135            Self::Completeness { .. } => "H0-COV-02",
136            Self::NoRegression { .. } => "H0-COV-03",
137            Self::MutationCorrelation { .. } => "H0-COV-04",
138        }
139    }
140
141    /// Evaluate the hypothesis against observed data
142    ///
143    /// Returns a nullification result indicating whether the hypothesis
144    /// should be rejected.
145    #[must_use]
146    pub fn evaluate(&self, observations: &[f64]) -> NullificationResult {
147        if observations.is_empty() {
148            return NullificationResult {
149                hypothesis_name: self.name().to_string(),
150                rejected: true,
151                p_value: 0.0,
152                effect_size: f64::INFINITY,
153                confidence_interval: (0.0, 0.0),
154            };
155        }
156
157        match self {
158            Self::Determinism => self.evaluate_determinism(observations),
159            Self::Completeness { threshold } => {
160                self.evaluate_completeness(observations, *threshold)
161            }
162            Self::NoRegression { baseline } => self.evaluate_no_regression(observations, *baseline),
163            Self::MutationCorrelation { expected_r } => {
164                self.evaluate_mutation_correlation(observations, *expected_r)
165            }
166        }
167    }
168
169    /// Evaluate determinism: variance should be zero
170    fn evaluate_determinism(&self, observations: &[f64]) -> NullificationResult {
171        let mean = observations.iter().sum::<f64>() / observations.len() as f64;
172        let variance = observations.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
173            / observations.len() as f64;
174
175        // Reject if variance is significantly different from zero
176        let rejected = variance > 0.01; // Tolerance for floating point
177        let p_value = if rejected { 0.01 } else { 0.5 };
178
179        NullificationResult {
180            hypothesis_name: self.name().to_string(),
181            rejected,
182            p_value,
183            effect_size: variance.sqrt(),
184            confidence_interval: (mean - 2.0 * variance.sqrt(), mean + 2.0 * variance.sqrt()),
185        }
186    }
187
188    /// Evaluate completeness: mean should exceed threshold
189    fn evaluate_completeness(&self, observations: &[f64], threshold: f64) -> NullificationResult {
190        let mean = observations.iter().sum::<f64>() / observations.len() as f64;
191        let std_dev = (observations.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
192            / observations.len() as f64)
193            .sqrt();
194
195        // One-sample t-test against threshold
196        let t_stat = (mean - threshold) / (std_dev / (observations.len() as f64).sqrt());
197
198        // Simplified p-value calculation (reject if mean < threshold significantly)
199        let rejected = mean < threshold;
200        let p_value = if rejected { 0.01 } else { 0.5 };
201
202        let margin = 1.96 * std_dev / (observations.len() as f64).sqrt();
203        NullificationResult {
204            hypothesis_name: self.name().to_string(),
205            rejected,
206            p_value,
207            effect_size: t_stat.abs(),
208            confidence_interval: (mean - margin, mean + margin),
209        }
210    }
211
212    /// Evaluate no regression: mean should be >= baseline
213    fn evaluate_no_regression(&self, observations: &[f64], baseline: f64) -> NullificationResult {
214        let mean = observations.iter().sum::<f64>() / observations.len() as f64;
215        let std_dev = (observations.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
216            / observations.len() as f64)
217            .sqrt();
218
219        // Reject if mean is significantly below baseline
220        let rejected = mean < baseline;
221        let p_value = if rejected { 0.01 } else { 0.5 };
222
223        let effect_size = if std_dev > 0.0 {
224            (baseline - mean) / std_dev
225        } else {
226            0.0
227        };
228
229        let margin = 1.96 * std_dev / (observations.len() as f64).sqrt();
230        NullificationResult {
231            hypothesis_name: self.name().to_string(),
232            rejected,
233            p_value,
234            effect_size,
235            confidence_interval: (mean - margin, mean + margin),
236        }
237    }
238
239    /// Evaluate mutation correlation (simplified Pearson-r estimation)
240    ///
241    /// Uses coverage as a proxy for mutation correlation. A more sophisticated
242    /// implementation could integrate with cargo-mutants to calculate true
243    /// correlation between coverage and mutation kill rates.
244    fn evaluate_mutation_correlation(
245        &self,
246        observations: &[f64],
247        expected_r: f64,
248    ) -> NullificationResult {
249        // Simplified correlation estimation: coverage → mutation correlation
250        // Rationale: Higher coverage generally correlates with higher mutation kill rates
251        let mean = observations.iter().sum::<f64>() / observations.len() as f64;
252        let std_dev = (observations.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
253            / observations.len() as f64)
254            .sqrt();
255
256        // Simplified: assume correlation is proportional to coverage
257        let estimated_r = mean / 100.0;
258        let rejected = estimated_r < expected_r;
259        let p_value = if rejected { 0.01 } else { 0.5 };
260
261        let margin = 1.96 * std_dev / (observations.len() as f64).sqrt();
262        NullificationResult {
263            hypothesis_name: self.name().to_string(),
264            rejected,
265            p_value,
266            effect_size: (expected_r - estimated_r).abs(),
267            confidence_interval: (mean - margin, mean + margin),
268        }
269    }
270}
jugar_probar/coverage/hypotheses.rs

jugar_probar/coverage/
hypotheses.rs