scirs2_stats/
bootstrap.rs

1//! Bootstrap resampling methods for statistical inference
2//!
3//! This module provides focused implementations of the most important bootstrap
4//! methods for constructing confidence intervals and performing statistical tests.
5//!
6//! ## Methods provided
7//!
8//! - **Percentile bootstrap CI**: Simple and widely used
9//! - **BCa bootstrap**: Bias-corrected and accelerated (Efron 1987)
10//! - **Parametric bootstrap**: Resample from a fitted parametric model
11//! - **Block bootstrap**: For dependent / time series data
12
13use crate::error::{StatsError, StatsResult};
14use scirs2_core::ndarray::{Array1, ArrayView1};
15use scirs2_core::numeric::{Float, NumCast};
16
17// ---------------------------------------------------------------------------
18// Helpers
19// ---------------------------------------------------------------------------
20
21/// Draw `n` samples with replacement from `data` using a simple LCG RNG.
22/// Returns a new `Array1<F>`.
23fn resample_with_replacement<F: Float + NumCast>(data: &[F], rng_state: &mut u64) -> Vec<F> {
24    let n = data.len();
25    let mut out = Vec::with_capacity(n);
26    for _ in 0..n {
27        let idx = lcg_next(rng_state) as usize % n;
28        out.push(data[idx]);
29    }
30    out
31}
32
33/// Simple LCG pseudo-random number generator (period 2^64).
34/// Returns a positive u64 and updates state in place.
35fn lcg_next(state: &mut u64) -> u64 {
36    // Knuth MMIX LCG parameters
37    *state = state
38        .wrapping_mul(6_364_136_223_846_793_005)
39        .wrapping_add(1_442_695_040_888_963_407);
40    *state >> 1 // make positive
41}
42
43fn sorted_f64_vec(v: &[f64]) -> Vec<f64> {
44    let mut s = v.to_vec();
45    s.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
46    s
47}
48
49fn quantile_f64(sorted: &[f64], q: f64) -> f64 {
50    if sorted.is_empty() {
51        return 0.0;
52    }
53    if sorted.len() == 1 {
54        return sorted[0];
55    }
56    let idx = q * (sorted.len() as f64 - 1.0);
57    let lo = idx.floor() as usize;
58    let hi = idx.ceil() as usize;
59    let frac = idx - lo as f64;
60    sorted[lo.min(sorted.len() - 1)] * (1.0 - frac) + sorted[hi.min(sorted.len() - 1)] * frac
61}
62
63/// Standard normal CDF (Abramowitz & Stegun approximation).
64fn norm_cdf(x: f64) -> f64 {
65    if x < -8.0 {
66        return 0.0;
67    }
68    if x > 8.0 {
69        return 1.0;
70    }
71    let t = 1.0 / (1.0 + 0.231_641_9 * x.abs());
72    let d = 0.398_942_28 * (-0.5 * x * x).exp();
73    let p = t
74        * (0.319_381_530
75            + t * (-0.356_563_782
76                + t * (1.781_477_937 + t * (-1.821_255_978 + t * 1.330_274_429))));
77    if x >= 0.0 {
78        1.0 - d * p
79    } else {
80        d * p
81    }
82}
83
84/// Standard normal quantile (probit).
85fn norm_ppf(p: f64) -> f64 {
86    if p <= 0.0 {
87        return f64::NEG_INFINITY;
88    }
89    if p >= 1.0 {
90        return f64::INFINITY;
91    }
92    if (p - 0.5).abs() < f64::EPSILON {
93        return 0.0;
94    }
95    let (sign, pp) = if p < 0.5 { (-1.0, p) } else { (1.0, 1.0 - p) };
96    let t = (-2.0 * pp.ln()).sqrt();
97    let c0 = 2.515_517;
98    let c1 = 0.802_853;
99    let c2 = 0.010_328;
100    let d1 = 1.432_788;
101    let d2 = 0.189_269;
102    let d3 = 0.001_308;
103    sign * (t - (c0 + t * (c1 + t * c2)) / (1.0 + t * (d1 + t * (d2 + t * d3))))
104}
105
106// ===========================================================================
107// Result types
108// ===========================================================================
109
110/// Bootstrap confidence interval result.
111#[derive(Debug, Clone)]
112pub struct BootstrapCI<F> {
113    /// Point estimate (statistic applied to the original sample)
114    pub estimate: F,
115    /// Lower bound of the confidence interval
116    pub ci_lower: F,
117    /// Upper bound of the confidence interval
118    pub ci_upper: F,
119    /// Confidence level (e.g. 0.95)
120    pub confidence_level: f64,
121    /// Standard error estimate (std dev of bootstrap distribution)
122    pub standard_error: F,
123    /// Bootstrap distribution (all bootstrap replicates)
124    pub replicates: Vec<F>,
125}
126
127/// BCa bootstrap result (extends BootstrapCI with bias/acceleration info).
128#[derive(Debug, Clone)]
129pub struct BcaBootstrapResult<F> {
130    /// The confidence interval
131    pub ci: BootstrapCI<F>,
132    /// Bias correction factor (z0)
133    pub bias_correction: f64,
134    /// Acceleration factor (a)
135    pub acceleration: f64,
136}
137
138// ===========================================================================
139// Percentile bootstrap
140// ===========================================================================
141
142/// Compute a percentile bootstrap confidence interval.
143///
144/// The percentile method (Efron 1979) is the simplest bootstrap CI method.
145/// It takes the alpha/2 and 1-alpha/2 quantiles of the bootstrap distribution
146/// as the CI bounds.
147///
148/// # Arguments
149///
150/// * `x` - Input data
151/// * `statistic` - A function that computes the statistic of interest from a slice
152/// * `n_bootstrap` - Number of bootstrap resamples (default 1000)
153/// * `confidence_level` - Confidence level (default 0.95)
154/// * `seed` - Optional random seed
155///
156/// # Returns
157///
158/// A `BootstrapCI` with the confidence interval and bootstrap distribution.
159///
160/// # Examples
161///
162/// ```
163/// use scirs2_core::ndarray::array;
164/// use scirs2_stats::percentile_bootstrap;
165///
166/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
167/// let result = percentile_bootstrap(
168///     &data.view(),
169///     |s| s.iter().sum::<f64>() / s.len() as f64,  // mean
170///     Some(2000),
171///     Some(0.95),
172///     Some(42),
173/// ).expect("bootstrap failed");
174/// assert!(result.ci_lower < 5.5);
175/// assert!(result.ci_upper > 5.5);
176/// ```
177pub fn percentile_bootstrap<F, S>(
178    x: &ArrayView1<F>,
179    statistic: S,
180    n_bootstrap: Option<usize>,
181    confidence_level: Option<f64>,
182    seed: Option<u64>,
183) -> StatsResult<BootstrapCI<F>>
184where
185    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
186    S: Fn(&[f64]) -> f64,
187{
188    if x.is_empty() {
189        return Err(StatsError::InvalidArgument(
190            "Input array cannot be empty".to_string(),
191        ));
192    }
193
194    let n_boot = n_bootstrap.unwrap_or(1000);
195    let conf = confidence_level.unwrap_or(0.95);
196    if conf <= 0.0 || conf >= 1.0 {
197        return Err(StatsError::InvalidArgument(
198            "confidence_level must be in (0, 1)".to_string(),
199        ));
200    }
201
202    let data_f64: Vec<f64> = x
203        .iter()
204        .map(|v| <f64 as NumCast>::from(*v).unwrap_or(0.0))
205        .collect();
206
207    let theta_hat = statistic(&data_f64);
208
209    let mut rng_state = seed.unwrap_or(12345);
210    let mut replicates_f64 = Vec::with_capacity(n_boot);
211
212    for _ in 0..n_boot {
213        let sample = resample_with_replacement(&data_f64, &mut rng_state);
214        replicates_f64.push(statistic(&sample));
215    }
216
217    let sorted_reps = sorted_f64_vec(&replicates_f64);
218    let alpha = 1.0 - conf;
219    let ci_lower_f64 = quantile_f64(&sorted_reps, alpha / 2.0);
220    let ci_upper_f64 = quantile_f64(&sorted_reps, 1.0 - alpha / 2.0);
221
222    // Standard error
223    let mean_rep = replicates_f64.iter().sum::<f64>() / replicates_f64.len() as f64;
224    let var_rep = replicates_f64
225        .iter()
226        .map(|&r| (r - mean_rep) * (r - mean_rep))
227        .sum::<f64>()
228        / (replicates_f64.len() as f64 - 1.0);
229    let se = var_rep.sqrt();
230
231    let replicates: Result<Vec<F>, _> = replicates_f64
232        .iter()
233        .map(|&v| F::from(v).ok_or_else(|| StatsError::ComputationError("cast".into())))
234        .collect();
235
236    Ok(BootstrapCI {
237        estimate: F::from(theta_hat).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
238        ci_lower: F::from(ci_lower_f64)
239            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
240        ci_upper: F::from(ci_upper_f64)
241            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
242        confidence_level: conf,
243        standard_error: F::from(se).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
244        replicates: replicates?,
245    })
246}
247
248// ===========================================================================
249// BCa bootstrap
250// ===========================================================================
251
252/// Compute a BCa (bias-corrected and accelerated) bootstrap confidence interval.
253///
254/// The BCa method (Efron 1987) adjusts the percentile bootstrap CI for both
255/// bias and skewness in the bootstrap distribution. It uses:
256///
257/// 1. **Bias correction (z0)**: the proportion of bootstrap replicates less
258///    than the original estimate, converted to a z-score.
259/// 2. **Acceleration (a)**: estimated from the jackknife influence values.
260///
261/// BCa intervals are second-order accurate and generally preferred over
262/// simple percentile intervals.
263///
264/// # Arguments
265///
266/// * `x` - Input data
267/// * `statistic` - Function computing the statistic from a slice
268/// * `n_bootstrap` - Number of bootstrap resamples (default 2000)
269/// * `confidence_level` - Confidence level (default 0.95)
270/// * `seed` - Optional random seed
271///
272/// # Returns
273///
274/// A `BcaBootstrapResult` with the CI, bias correction, and acceleration.
275///
276/// # Examples
277///
278/// ```
279/// use scirs2_core::ndarray::array;
280/// use scirs2_stats::bca_bootstrap;
281///
282/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
283/// let result = bca_bootstrap(
284///     &data.view(),
285///     |s| s.iter().sum::<f64>() / s.len() as f64,
286///     Some(2000),
287///     Some(0.95),
288///     Some(42),
289/// ).expect("BCa failed");
290/// assert!(result.ci.ci_lower < 5.5);
291/// assert!(result.ci.ci_upper > 5.5);
292/// ```
293pub fn bca_bootstrap<F, S>(
294    x: &ArrayView1<F>,
295    statistic: S,
296    n_bootstrap: Option<usize>,
297    confidence_level: Option<f64>,
298    seed: Option<u64>,
299) -> StatsResult<BcaBootstrapResult<F>>
300where
301    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
302    S: Fn(&[f64]) -> f64,
303{
304    if x.is_empty() {
305        return Err(StatsError::InvalidArgument(
306            "Input array cannot be empty".to_string(),
307        ));
308    }
309    let n = x.len();
310    if n < 2 {
311        return Err(StatsError::InvalidArgument(
312            "Need at least 2 observations for BCa bootstrap".to_string(),
313        ));
314    }
315
316    let n_boot = n_bootstrap.unwrap_or(2000);
317    let conf = confidence_level.unwrap_or(0.95);
318    if conf <= 0.0 || conf >= 1.0 {
319        return Err(StatsError::InvalidArgument(
320            "confidence_level must be in (0, 1)".to_string(),
321        ));
322    }
323
324    let data_f64: Vec<f64> = x
325        .iter()
326        .map(|v| <f64 as NumCast>::from(*v).unwrap_or(0.0))
327        .collect();
328
329    let theta_hat = statistic(&data_f64);
330
331    // Generate bootstrap replicates
332    let mut rng_state = seed.unwrap_or(12345);
333    let mut replicates_f64 = Vec::with_capacity(n_boot);
334    for _ in 0..n_boot {
335        let sample = resample_with_replacement(&data_f64, &mut rng_state);
336        replicates_f64.push(statistic(&sample));
337    }
338
339    // Bias correction: z0 = Phi^{-1}(proportion of replicates < theta_hat)
340    let prop_less =
341        replicates_f64.iter().filter(|&&r| r < theta_hat).count() as f64 / n_boot as f64;
342    let z0 = norm_ppf(prop_less.max(0.001).min(0.999));
343
344    // Acceleration: a = sum(d_i^3) / (6 * (sum(d_i^2))^{3/2})
345    // where d_i = theta_hat_dot - theta_hat_jack_i (jackknife influence values)
346    let mut jackknife_vals = Vec::with_capacity(n);
347    for i in 0..n {
348        let jack_sample: Vec<f64> = data_f64
349            .iter()
350            .enumerate()
351            .filter_map(|(j, &v)| if j != i { Some(v) } else { None })
352            .collect();
353        jackknife_vals.push(statistic(&jack_sample));
354    }
355
356    let theta_dot = jackknife_vals.iter().sum::<f64>() / n as f64;
357    let d: Vec<f64> = jackknife_vals.iter().map(|&t| theta_dot - t).collect();
358
359    let sum_d2: f64 = d.iter().map(|&di| di * di).sum();
360    let sum_d3: f64 = d.iter().map(|&di| di * di * di).sum();
361
362    let acceleration = if sum_d2 > f64::EPSILON {
363        sum_d3 / (6.0 * sum_d2.powf(1.5))
364    } else {
365        0.0
366    };
367
368    // Adjusted quantiles
369    let alpha = 1.0 - conf;
370    let z_alpha_lower = norm_ppf(alpha / 2.0);
371    let z_alpha_upper = norm_ppf(1.0 - alpha / 2.0);
372
373    let adjusted_lower = {
374        let num = z0 + z_alpha_lower;
375        let denom = 1.0 - acceleration * num;
376        if denom.abs() > f64::EPSILON {
377            norm_cdf(z0 + num / denom)
378        } else {
379            alpha / 2.0
380        }
381    };
382
383    let adjusted_upper = {
384        let num = z0 + z_alpha_upper;
385        let denom = 1.0 - acceleration * num;
386        if denom.abs() > f64::EPSILON {
387            norm_cdf(z0 + num / denom)
388        } else {
389            1.0 - alpha / 2.0
390        }
391    };
392
393    let sorted_reps = sorted_f64_vec(&replicates_f64);
394    let ci_lower_f64 = quantile_f64(&sorted_reps, adjusted_lower.max(0.0).min(1.0));
395    let ci_upper_f64 = quantile_f64(&sorted_reps, adjusted_upper.max(0.0).min(1.0));
396
397    // Standard error
398    let mean_rep = replicates_f64.iter().sum::<f64>() / replicates_f64.len() as f64;
399    let var_rep = replicates_f64
400        .iter()
401        .map(|&r| (r - mean_rep) * (r - mean_rep))
402        .sum::<f64>()
403        / (replicates_f64.len() as f64 - 1.0);
404    let se = var_rep.sqrt();
405
406    let replicates: Result<Vec<F>, _> = replicates_f64
407        .iter()
408        .map(|&v| F::from(v).ok_or_else(|| StatsError::ComputationError("cast".into())))
409        .collect();
410
411    Ok(BcaBootstrapResult {
412        ci: BootstrapCI {
413            estimate: F::from(theta_hat)
414                .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
415            ci_lower: F::from(ci_lower_f64)
416                .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
417            ci_upper: F::from(ci_upper_f64)
418                .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
419            confidence_level: conf,
420            standard_error: F::from(se)
421                .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
422            replicates: replicates?,
423        },
424        bias_correction: z0,
425        acceleration,
426    })
427}
428
429// ===========================================================================
430// Parametric bootstrap
431// ===========================================================================
432
433/// Compute a parametric bootstrap confidence interval.
434///
435/// Instead of resampling the data directly, the parametric bootstrap fits a
436/// parametric model to the data and then draws samples from that model.
437/// This version assumes a normal model: the mean and standard deviation are
438/// estimated from the data, and new samples are drawn from N(mean, sd^2).
439///
440/// # Arguments
441///
442/// * `x` - Input data
443/// * `statistic` - Function computing the statistic from a slice
444/// * `n_bootstrap` - Number of bootstrap resamples (default 1000)
445/// * `confidence_level` - Confidence level (default 0.95)
446/// * `seed` - Optional random seed
447///
448/// # Returns
449///
450/// A `BootstrapCI` with the confidence interval.
451///
452/// # Examples
453///
454/// ```
455/// use scirs2_core::ndarray::array;
456/// use scirs2_stats::parametric_bootstrap;
457///
458/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
459/// let result = parametric_bootstrap(
460///     &data.view(),
461///     |s| s.iter().sum::<f64>() / s.len() as f64,
462///     Some(1000),
463///     Some(0.95),
464///     Some(42),
465/// ).expect("parametric bootstrap failed");
466/// assert!(result.ci_lower < 5.5);
467/// assert!(result.ci_upper > 5.5);
468/// ```
469pub fn parametric_bootstrap<F, S>(
470    x: &ArrayView1<F>,
471    statistic: S,
472    n_bootstrap: Option<usize>,
473    confidence_level: Option<f64>,
474    seed: Option<u64>,
475) -> StatsResult<BootstrapCI<F>>
476where
477    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
478    S: Fn(&[f64]) -> f64,
479{
480    if x.is_empty() {
481        return Err(StatsError::InvalidArgument(
482            "Input array cannot be empty".to_string(),
483        ));
484    }
485    let n = x.len();
486    if n < 2 {
487        return Err(StatsError::InvalidArgument(
488            "Need at least 2 observations for parametric bootstrap".to_string(),
489        ));
490    }
491
492    let n_boot = n_bootstrap.unwrap_or(1000);
493    let conf = confidence_level.unwrap_or(0.95);
494
495    let data_f64: Vec<f64> = x
496        .iter()
497        .map(|v| <f64 as NumCast>::from(*v).unwrap_or(0.0))
498        .collect();
499
500    let theta_hat = statistic(&data_f64);
501
502    // Fit normal model: estimate mean and std dev
503    let sample_mean = data_f64.iter().sum::<f64>() / n as f64;
504    let sample_var = data_f64
505        .iter()
506        .map(|&v| (v - sample_mean) * (v - sample_mean))
507        .sum::<f64>()
508        / (n as f64 - 1.0);
509    let sample_sd = sample_var.sqrt();
510
511    // Generate parametric bootstrap samples using Box-Muller transform
512    let mut rng_state = seed.unwrap_or(12345);
513    let mut replicates_f64 = Vec::with_capacity(n_boot);
514
515    for _ in 0..n_boot {
516        let mut sample = Vec::with_capacity(n);
517        let mut i = 0;
518        while i < n {
519            // Box-Muller
520            let u1 = (lcg_next(&mut rng_state) as f64) / (u64::MAX as f64 / 2.0);
521            let u2 = (lcg_next(&mut rng_state) as f64) / (u64::MAX as f64 / 2.0);
522            let u1 = u1.max(1e-300); // avoid log(0)
523            let r = (-2.0 * u1.ln()).sqrt();
524            let theta = 2.0 * std::f64::consts::PI * u2;
525
526            let z1 = r * theta.cos();
527            sample.push(sample_mean + sample_sd * z1);
528            i += 1;
529
530            if i < n {
531                let z2 = r * theta.sin();
532                sample.push(sample_mean + sample_sd * z2);
533                i += 1;
534            }
535        }
536        replicates_f64.push(statistic(&sample));
537    }
538
539    let sorted_reps = sorted_f64_vec(&replicates_f64);
540    let alpha = 1.0 - conf;
541    let ci_lower_f64 = quantile_f64(&sorted_reps, alpha / 2.0);
542    let ci_upper_f64 = quantile_f64(&sorted_reps, 1.0 - alpha / 2.0);
543
544    let mean_rep = replicates_f64.iter().sum::<f64>() / replicates_f64.len() as f64;
545    let var_rep = replicates_f64
546        .iter()
547        .map(|&r| (r - mean_rep) * (r - mean_rep))
548        .sum::<f64>()
549        / (replicates_f64.len() as f64 - 1.0);
550    let se = var_rep.sqrt();
551
552    let replicates: Result<Vec<F>, _> = replicates_f64
553        .iter()
554        .map(|&v| F::from(v).ok_or_else(|| StatsError::ComputationError("cast".into())))
555        .collect();
556
557    Ok(BootstrapCI {
558        estimate: F::from(theta_hat).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
559        ci_lower: F::from(ci_lower_f64)
560            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
561        ci_upper: F::from(ci_upper_f64)
562            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
563        confidence_level: conf,
564        standard_error: F::from(se).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
565        replicates: replicates?,
566    })
567}
568
569// ===========================================================================
570// Block bootstrap (for time series)
571// ===========================================================================
572
573/// Compute a block bootstrap confidence interval for dependent data.
574///
575/// The moving block bootstrap (Kunsch 1989, Liu & Singh 1992) divides the
576/// time series into overlapping blocks of a fixed length and resamples entire
577/// blocks. This preserves the serial dependence within each block.
578///
579/// # Arguments
580///
581/// * `x` - Input time series data
582/// * `statistic` - Function computing the statistic from a slice
583/// * `block_length` - Length of each block (default: floor(n^(1/3)))
584/// * `n_bootstrap` - Number of bootstrap resamples (default 1000)
585/// * `confidence_level` - Confidence level (default 0.95)
586/// * `seed` - Optional random seed
587///
588/// # Returns
589///
590/// A `BootstrapCI` with the confidence interval.
591///
592/// # Examples
593///
594/// ```
595/// use scirs2_core::ndarray::array;
596/// use scirs2_stats::block_bootstrap_ci;
597///
598/// // Simulated time series
599/// let data = array![1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5,
600///                    6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5];
601/// let result = block_bootstrap_ci(
602///     &data.view(),
603///     |s| s.iter().sum::<f64>() / s.len() as f64,
604///     None,       // auto block length
605///     Some(1000),
606///     Some(0.95),
607///     Some(42),
608/// ).expect("block bootstrap failed");
609/// assert!(result.ci_lower < result.ci_upper);
610/// ```
611pub fn block_bootstrap_ci<F, S>(
612    x: &ArrayView1<F>,
613    statistic: S,
614    block_length: Option<usize>,
615    n_bootstrap: Option<usize>,
616    confidence_level: Option<f64>,
617    seed: Option<u64>,
618) -> StatsResult<BootstrapCI<F>>
619where
620    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
621    S: Fn(&[f64]) -> f64,
622{
623    let n = x.len();
624    if n < 3 {
625        return Err(StatsError::InvalidArgument(
626            "Need at least 3 observations for block bootstrap".to_string(),
627        ));
628    }
629
630    let block_len = block_length.unwrap_or_else(|| {
631        let auto = (n as f64).powf(1.0 / 3.0).ceil() as usize;
632        auto.max(1).min(n)
633    });
634
635    if block_len == 0 || block_len > n {
636        return Err(StatsError::InvalidArgument(format!(
637            "block_length must be between 1 and {} (n)",
638            n
639        )));
640    }
641
642    let n_boot = n_bootstrap.unwrap_or(1000);
643    let conf = confidence_level.unwrap_or(0.95);
644
645    let data_f64: Vec<f64> = x
646        .iter()
647        .map(|v| <f64 as NumCast>::from(*v).unwrap_or(0.0))
648        .collect();
649
650    let theta_hat = statistic(&data_f64);
651
652    // Number of possible starting positions for overlapping blocks
653    let n_blocks_possible = n - block_len + 1;
654    // Number of blocks needed to fill n observations
655    let n_blocks_needed = (n as f64 / block_len as f64).ceil() as usize;
656
657    let mut rng_state = seed.unwrap_or(12345);
658    let mut replicates_f64 = Vec::with_capacity(n_boot);
659
660    for _ in 0..n_boot {
661        let mut sample = Vec::with_capacity(n_blocks_needed * block_len);
662
663        for _ in 0..n_blocks_needed {
664            let start = lcg_next(&mut rng_state) as usize % n_blocks_possible;
665            for j in 0..block_len {
666                sample.push(data_f64[start + j]);
667            }
668        }
669
670        // Trim to exactly n observations
671        sample.truncate(n);
672
673        replicates_f64.push(statistic(&sample));
674    }
675
676    let sorted_reps = sorted_f64_vec(&replicates_f64);
677    let alpha = 1.0 - conf;
678    let ci_lower_f64 = quantile_f64(&sorted_reps, alpha / 2.0);
679    let ci_upper_f64 = quantile_f64(&sorted_reps, 1.0 - alpha / 2.0);
680
681    let mean_rep = replicates_f64.iter().sum::<f64>() / replicates_f64.len() as f64;
682    let var_rep = replicates_f64
683        .iter()
684        .map(|&r| (r - mean_rep) * (r - mean_rep))
685        .sum::<f64>()
686        / (replicates_f64.len() as f64 - 1.0);
687    let se = var_rep.sqrt();
688
689    let replicates: Result<Vec<F>, _> = replicates_f64
690        .iter()
691        .map(|&v| F::from(v).ok_or_else(|| StatsError::ComputationError("cast".into())))
692        .collect();
693
694    Ok(BootstrapCI {
695        estimate: F::from(theta_hat).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
696        ci_lower: F::from(ci_lower_f64)
697            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
698        ci_upper: F::from(ci_upper_f64)
699            .ok_or_else(|| StatsError::ComputationError("cast".into()))?,
700        confidence_level: conf,
701        standard_error: F::from(se).ok_or_else(|| StatsError::ComputationError("cast".into()))?,
702        replicates: replicates?,
703    })
704}
705
706// ===========================================================================
707// Tests
708// ===========================================================================
709
710#[cfg(test)]
711mod tests {
712    use super::*;
713    use scirs2_core::ndarray::{array, Array1};
714
715    fn sample_mean(data: &[f64]) -> f64 {
716        if data.is_empty() {
717            return 0.0;
718        }
719        data.iter().sum::<f64>() / data.len() as f64
720    }
721
722    fn sample_median(data: &[f64]) -> f64 {
723        let mut s = data.to_vec();
724        s.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
725        let n = s.len();
726        if n == 0 {
727            return 0.0;
728        }
729        if n % 2 == 0 {
730            (s[n / 2 - 1] + s[n / 2]) / 2.0
731        } else {
732            s[n / 2]
733        }
734    }
735
736    // -------------------------------------------------------------------
737    // Percentile bootstrap
738    // -------------------------------------------------------------------
739
740    #[test]
741    fn test_percentile_bootstrap_mean() {
742        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
743        let result =
744            percentile_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.95), Some(42))
745                .expect("should succeed");
746        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
747        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
748        // True mean is 5.5; CI should contain it
749        assert!(lower < 5.5);
750        assert!(upper > 5.5);
751    }
752
753    #[test]
754    fn test_percentile_bootstrap_median() {
755        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
756        let result = percentile_bootstrap(
757            &data.view(),
758            sample_median,
759            Some(2000),
760            Some(0.95),
761            Some(42),
762        )
763        .expect("should succeed");
764        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
765        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
766        assert!(lower < upper);
767    }
768
769    #[test]
770    fn test_percentile_bootstrap_se() {
771        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
772        let result =
773            percentile_bootstrap(&data.view(), sample_mean, Some(1000), Some(0.95), Some(42))
774                .expect("should succeed");
775        let se: f64 = NumCast::from(result.standard_error).expect("cast");
776        assert!(se > 0.0);
777        assert!(se < 3.0); // SE of mean should be roughly sqrt(var/n) ~ 0.96
778    }
779
780    #[test]
781    fn test_percentile_bootstrap_replicates_count() {
782        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
783        let n_boot = 500;
784        let result = percentile_bootstrap(&data.view(), sample_mean, Some(n_boot), None, Some(42))
785            .expect("should succeed");
786        assert_eq!(result.replicates.len(), n_boot);
787    }
788
789    #[test]
790    fn test_percentile_bootstrap_empty_error() {
791        let data = Array1::<f64>::zeros(0);
792        assert!(percentile_bootstrap(&data.view(), sample_mean, None, None, None).is_err());
793    }
794
795    #[test]
796    fn test_percentile_bootstrap_single_element() {
797        let data = array![5.0];
798        let result = percentile_bootstrap(&data.view(), sample_mean, Some(100), None, Some(42))
799            .expect("should succeed");
800        // All resamples are [5.0], so CI should be [5, 5]
801        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
802        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
803        assert!((lower - 5.0).abs() < 1e-10);
804        assert!((upper - 5.0).abs() < 1e-10);
805    }
806
807    // -------------------------------------------------------------------
808    // BCa bootstrap
809    // -------------------------------------------------------------------
810
811    #[test]
812    fn test_bca_bootstrap_mean() {
813        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
814        let result = bca_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.95), Some(42))
815            .expect("should succeed");
816        let lower: f64 = NumCast::from(result.ci.ci_lower).expect("cast");
817        let upper: f64 = NumCast::from(result.ci.ci_upper).expect("cast");
818        assert!(lower < 5.5);
819        assert!(upper > 5.5);
820    }
821
822    #[test]
823    fn test_bca_bootstrap_bias_correction() {
824        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
825        let result = bca_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.95), Some(42))
826            .expect("should succeed");
827        // z0 should be near 0 for symmetric distribution and mean statistic
828        assert!(result.bias_correction.abs() < 1.0);
829    }
830
831    #[test]
832    fn test_bca_bootstrap_acceleration() {
833        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
834        let result = bca_bootstrap(&data.view(), sample_mean, Some(1000), Some(0.95), Some(42))
835            .expect("should succeed");
836        // Acceleration should be small for mean of uniform-ish data
837        assert!(result.acceleration.abs() < 0.5);
838    }
839
840    #[test]
841    fn test_bca_bootstrap_skewed_data() {
842        // Skewed data: BCa should adjust
843        let data = array![1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 10.0];
844        let result = bca_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.95), Some(42))
845            .expect("should succeed");
846        let lower: f64 = NumCast::from(result.ci.ci_lower).expect("cast");
847        let upper: f64 = NumCast::from(result.ci.ci_upper).expect("cast");
848        assert!(lower < upper);
849    }
850
851    #[test]
852    fn test_bca_bootstrap_empty_error() {
853        let data = Array1::<f64>::zeros(0);
854        assert!(bca_bootstrap(&data.view(), sample_mean, None, None, None).is_err());
855    }
856
857    #[test]
858    fn test_bca_bootstrap_single_error() {
859        let data = array![5.0];
860        assert!(bca_bootstrap(&data.view(), sample_mean, None, None, None).is_err());
861    }
862
863    // -------------------------------------------------------------------
864    // Parametric bootstrap
865    // -------------------------------------------------------------------
866
867    #[test]
868    fn test_parametric_bootstrap_mean() {
869        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
870        let result =
871            parametric_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.95), Some(42))
872                .expect("should succeed");
873        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
874        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
875        assert!(lower < 5.5);
876        assert!(upper > 5.5);
877    }
878
879    #[test]
880    fn test_parametric_bootstrap_ci_width() {
881        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
882        let ci_90 =
883            parametric_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.90), Some(42))
884                .expect("90%");
885        let ci_99 =
886            parametric_bootstrap(&data.view(), sample_mean, Some(2000), Some(0.99), Some(42))
887                .expect("99%");
888
889        let width_90: f64 = ci_90.ci_upper - ci_90.ci_lower;
890        let width_99: f64 = ci_99.ci_upper - ci_99.ci_lower;
891        // 99% CI should be wider than 90% CI
892        assert!(width_99 > width_90);
893    }
894
895    #[test]
896    fn test_parametric_bootstrap_se_positive() {
897        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
898        let result = parametric_bootstrap(&data.view(), sample_mean, Some(500), None, Some(42))
899            .expect("should succeed");
900        let se: f64 = NumCast::from(result.standard_error).expect("cast");
901        assert!(se > 0.0);
902    }
903
904    #[test]
905    fn test_parametric_bootstrap_empty_error() {
906        let data = Array1::<f64>::zeros(0);
907        assert!(parametric_bootstrap(&data.view(), sample_mean, None, None, None).is_err());
908    }
909
910    #[test]
911    fn test_parametric_bootstrap_single_error() {
912        let data = array![5.0];
913        assert!(parametric_bootstrap(&data.view(), sample_mean, None, None, None).is_err());
914    }
915
916    // -------------------------------------------------------------------
917    // Block bootstrap
918    // -------------------------------------------------------------------
919
920    #[test]
921    fn test_block_bootstrap_basic() {
922        let data = array![
923            1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0,
924            9.5, 10.0, 10.5
925        ];
926        let result = block_bootstrap_ci(
927            &data.view(),
928            sample_mean,
929            None,
930            Some(1000),
931            Some(0.95),
932            Some(42),
933        )
934        .expect("should succeed");
935        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
936        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
937        assert!(lower < upper);
938    }
939
940    #[test]
941    fn test_block_bootstrap_custom_block_length() {
942        let data =
943            array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0];
944        let result = block_bootstrap_ci(
945            &data.view(),
946            sample_mean,
947            Some(3),
948            Some(500),
949            Some(0.95),
950            Some(42),
951        )
952        .expect("should succeed");
953        let lower: f64 = NumCast::from(result.ci_lower).expect("cast");
954        let upper: f64 = NumCast::from(result.ci_upper).expect("cast");
955        assert!(lower < upper);
956    }
957
958    #[test]
959    fn test_block_bootstrap_block_length_1() {
960        // Block length 1 should behave like ordinary bootstrap
961        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
962        let result = block_bootstrap_ci(
963            &data.view(),
964            sample_mean,
965            Some(1),
966            Some(500),
967            Some(0.95),
968            Some(42),
969        )
970        .expect("should succeed");
971        assert_eq!(result.replicates.len(), 500);
972    }
973
974    #[test]
975    fn test_block_bootstrap_replicates() {
976        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
977        let n_boot = 300;
978        let result = block_bootstrap_ci(
979            &data.view(),
980            sample_mean,
981            Some(2),
982            Some(n_boot),
983            None,
984            Some(42),
985        )
986        .expect("should succeed");
987        assert_eq!(result.replicates.len(), n_boot);
988    }
989
990    #[test]
991    fn test_block_bootstrap_too_small_error() {
992        let data = array![1.0, 2.0];
993        assert!(block_bootstrap_ci(&data.view(), sample_mean, None, None, None, None).is_err());
994    }
995
996    #[test]
997    fn test_block_bootstrap_invalid_block_length() {
998        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
999        assert!(block_bootstrap_ci(&data.view(), sample_mean, Some(0), None, None, None).is_err());
1000        assert!(block_bootstrap_ci(&data.view(), sample_mean, Some(6), None, None, None).is_err());
1001    }
1002
1003    // -------------------------------------------------------------------
1004    // Helper tests
1005    // -------------------------------------------------------------------
1006
1007    #[test]
1008    fn test_norm_cdf_ppf_roundtrip() {
1009        for &p in &[0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99] {
1010            let z = norm_ppf(p);
1011            let p_back = norm_cdf(z);
1012            assert!(
1013                (p - p_back).abs() < 0.02,
1014                "roundtrip failed for p={}: z={}, p_back={}",
1015                p,
1016                z,
1017                p_back
1018            );
1019        }
1020    }
1021
1022    #[test]
1023    fn test_lcg_different_seeds() {
1024        let mut s1 = 1u64;
1025        let mut s2 = 2u64;
1026        let v1 = lcg_next(&mut s1);
1027        let v2 = lcg_next(&mut s2);
1028        assert_ne!(v1, v2);
1029    }
1030}
scirs2_stats/bootstrap.rs

scirs2_stats/
bootstrap.rs