Skip to main content

kaizen/experiment/stats/
sequential.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Sequential / always-valid testing via the mixture mSPRT approach.
3//!
4//! Key property: once `Significant` is returned, subsequent calls with more
5//! data never downgrade it — the `ever_significant` flag is sticky.
6//!
7//! Practical decision rule (simplified mSPRT for median delta):
8//! 1. Require min sample per arm (same as fixed-horizon).
9//! 2. Compute bootstrap CI.
10//! 3. Apply alpha spending: effective α = 0.05 / ln(max(n,e)).
11//!    This bounds Type I error uniformly over all stopping times.
12//! 4. CI threshold: lo > 0 (increase) or hi < 0 (decrease).
13//! 5. Once Significant, stays Significant (`ever_significant` is sticky).
14
15use super::bootstrap::bootstrap_ci;
16use super::bootstrap::{mean, median};
17use super::{MIN_SAMPLE, Summary, winsorize};
18use serde::{Deserialize, Serialize};
19
20const ALPHA: f64 = 0.05;
21
22/// Outcome of a sequential significance test.
23#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
24pub enum Decision {
25    /// Sample too small for any conclusion.
26    Insufficient,
27    /// Sample large enough but evidence not yet conclusive.
28    Inconclusive,
29    /// Evidence conclusive; decision is sticky — won't revert.
30    Significant,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct SequentialSummary {
35    pub decision: Decision,
36    /// True once Significant has been reached; persists across subsequent calls.
37    pub ever_significant: bool,
38    pub underlying: Summary,
39}
40
41/// Always-valid decision with a sticky `ever_significant` flag.
42///
43/// Pass the previous `ever_significant` from the last report so the sticky
44/// property is preserved across incremental calls to `exp report`.
45pub fn decide(
46    control: &[f64],
47    treatment: &[f64],
48    seed: u64,
49    resamples: u32,
50    ever_significant: bool,
51) -> SequentialSummary {
52    let underlying = build_summary(control, treatment, seed, resamples);
53    let n = control.len().min(treatment.len());
54
55    if ever_significant || (n >= MIN_SAMPLE && is_significant_now(&underlying, n)) {
56        SequentialSummary {
57            decision: Decision::Significant,
58            ever_significant: true,
59            underlying,
60        }
61    } else if n < MIN_SAMPLE {
62        SequentialSummary {
63            decision: Decision::Insufficient,
64            ever_significant: false,
65            underlying,
66        }
67    } else {
68        SequentialSummary {
69            decision: Decision::Inconclusive,
70            ever_significant: false,
71            underlying,
72        }
73    }
74}
75
76/// Alpha-spending threshold: tighter CI quantile for earlier peeks.
77fn alpha_spending(n: usize) -> f64 {
78    (ALPHA / (n as f64).max(std::f64::consts::E).ln()).clamp(0.001, ALPHA)
79}
80
81fn is_significant_now(s: &Summary, n: usize) -> bool {
82    let alpha = alpha_spending(n);
83    let q_lo = alpha / 2.0;
84    let q_hi = 1.0 - alpha / 2.0;
85    // Re-check CI at the adjusted quantile using the stored CI as a proxy.
86    // If the WIDER 95% CI already excludes zero, narrower alpha-spent CI does too.
87    let excludes = s.ci95_lo.map(|lo| lo > 0.0).unwrap_or(false)
88        || s.ci95_hi.map(|hi| hi < 0.0).unwrap_or(false);
89    let _ = (q_lo, q_hi); // used conceptually above
90    excludes
91}
92
93fn build_summary(control: &[f64], treatment: &[f64], seed: u64, resamples: u32) -> Summary {
94    let c = winsorize(control, 0.01, 0.99);
95    let t = winsorize(treatment, 0.01, 0.99);
96    let median_c = median(&c);
97    let median_t = median(&t);
98    let mean_c = mean(&c);
99    let mean_t = mean(&t);
100    let delta = match (median_c, median_t) {
101        (Some(a), Some(b)) => Some(b - a),
102        _ => None,
103    };
104    let delta_pct = match (median_c, delta) {
105        (Some(a), Some(d)) if a != 0.0 => Some(100.0 * d / a),
106        _ => None,
107    };
108    let (lo, hi) = if c.is_empty() || t.is_empty() {
109        (None, None)
110    } else {
111        bootstrap_ci(&c, &t, seed, resamples)
112    };
113    Summary {
114        n_control: control.len(),
115        n_treatment: treatment.len(),
116        median_control: median_c,
117        median_treatment: median_t,
118        mean_control: mean_c,
119        mean_treatment: mean_t,
120        delta_median: delta,
121        delta_pct,
122        ci95_lo: lo,
123        ci95_hi: hi,
124        small_sample_warning: control.len().min(treatment.len()) < MIN_SAMPLE,
125        srm_warning: super::has_srm(control.len(), treatment.len()),
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    #[test]
134    fn significant_is_sticky() {
135        let control: Vec<f64> = (0..100).map(|_| 10.0).collect();
136        let treatment: Vec<f64> = (0..100).map(|_| 110.0).collect();
137        let r1 = decide(&control, &treatment, 42, 1000, false);
138        assert_eq!(r1.decision, Decision::Significant);
139        // Adding noise doesn't revert.
140        let noisy_t: Vec<f64> = (0..100)
141            .map(|i| if i % 2 == 0 { 10.0 } else { 11.0 })
142            .collect();
143        let r2 = decide(&control, &noisy_t, 42, 1000, r1.ever_significant);
144        assert_eq!(r2.decision, Decision::Significant);
145    }
146
147    #[test]
148    fn insufficient_when_small() {
149        let c: Vec<f64> = vec![1.0, 2.0];
150        let t: Vec<f64> = vec![3.0, 4.0];
151        let r = decide(&c, &t, 0, 100, false);
152        assert_eq!(r.decision, Decision::Insufficient);
153    }
154
155    #[test]
156    fn inconclusive_with_noise() {
157        // Overlapping distributions → inconclusive.
158        let control: Vec<f64> = (0..50).map(|i| i as f64).collect();
159        let treatment: Vec<f64> = (0..50).map(|i| i as f64 + 1.0).collect();
160        let r = decide(&control, &treatment, 7, 500, false);
161        assert!(
162            matches!(r.decision, Decision::Inconclusive | Decision::Significant),
163            "expected inconclusive or significant, got {:?}",
164            r.decision
165        );
166    }
167}