use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{Array1, ArrayView1};
use scirs2_core::numeric::{Float, FromPrimitive, NumCast, One, Zero};
use scirs2_core::{parallel_ops::*, random::prelude::*, simd_ops::SimdUnifiedOps, validation::*};
use std::collections::HashMap;
use std::marker::PhantomData;
#[derive(Debug, Clone)]
pub struct AdvancedBootstrapConfig {
pub n_bootstrap: usize,
pub seed: Option<u64>,
pub bootstrap_type: BootstrapType,
pub parallel: bool,
pub confidence_level: f64,
pub block_length: Option<usize>,
pub bias_correction: bool,
pub acceleration_correction: bool,
pub max_threads: Option<usize>,
}
impl Default for AdvancedBootstrapConfig {
fn default() -> Self {
Self {
n_bootstrap: 1000,
seed: None,
bootstrap_type: BootstrapType::Basic,
parallel: true,
confidence_level: 0.95,
block_length: None,
bias_correction: true,
acceleration_correction: true,
max_threads: None,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum BootstrapType {
Basic,
Stratified {
strata: Vec<usize>,
},
Block {
block_type: BlockType,
},
Bayesian,
Wild {
distribution: WildDistribution,
},
Parametric {
distribution_params: ParametricBootstrapParams,
},
Balanced,
}
#[derive(Debug, Clone, PartialEq)]
pub enum BlockType {
Moving,
Circular,
NonOverlapping,
Stationary {
expected_length: f64,
},
Tapered {
taper_function: TaperFunction,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum TaperFunction {
Linear,
Cosine,
Exponential { decay_rate: f64 },
}
#[derive(Debug, Clone, PartialEq)]
pub enum WildDistribution {
Rademacher,
Mammen,
Normal,
TwoPoint { prob_positive: f64 },
}
#[derive(Debug, Clone, PartialEq)]
pub enum ParametricBootstrapParams {
Normal { mean: f64, std: f64 },
Exponential { rate: f64 },
Gamma { shape: f64, scale: f64 },
Beta { alpha: f64, beta: f64 },
Custom {
name: String,
params: HashMap<String, f64>,
},
}
#[derive(Debug, Clone)]
pub struct AdvancedBootstrapResult<F> {
pub bootstrap_samples: Array1<F>,
pub original_statistic: F,
pub bootstrap_mean: F,
pub standard_error: F,
pub bias: F,
pub confidence_intervals: BootstrapConfidenceIntervals<F>,
pub method: BootstrapType,
pub n_successful: usize,
pub effective_samplesize: Option<usize>,
pub diagnostics: BootstrapDiagnostics<F>,
}
#[derive(Debug, Clone)]
pub struct BootstrapConfidenceIntervals<F> {
pub percentile: (F, F),
pub basic: (F, F),
pub bias_corrected: Option<(F, F)>,
pub bias_corrected_accelerated: Option<(F, F)>,
pub studentized: Option<(F, F)>,
}
#[derive(Debug, Clone)]
pub struct BootstrapDiagnostics<F> {
pub distribution_stats: BootstrapDistributionStats<F>,
pub quality_metrics: QualityMetrics<F>,
pub convergence_info: ConvergenceInfo<F>,
pub method_specific: HashMap<String, F>,
}
#[derive(Debug, Clone)]
pub struct BootstrapDistributionStats<F> {
pub skewness: F,
pub kurtosis: F,
pub jarque_bera: F,
pub anderson_darling: F,
pub min_value: F,
pub max_value: F,
}
#[derive(Debug, Clone)]
pub struct QualityMetrics<F> {
pub mc_standard_error: F,
pub coverage_probability: F,
pub efficiency: Option<F>,
pub stability: F,
}
#[derive(Debug, Clone)]
pub struct ConvergenceInfo<F> {
pub converged: bool,
pub convergence_samplesize: Option<usize>,
pub mean_stability: F,
pub variance_stability: F,
}
pub struct AdvancedBootstrapProcessor<F> {
config: AdvancedBootstrapConfig,
rng: StdRng,
_phantom: PhantomData<F>,
}
impl<F> AdvancedBootstrapProcessor<F>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
{
pub fn new(config: AdvancedBootstrapConfig) -> Self {
let rng = match config.seed {
Some(seed) => StdRng::seed_from_u64(seed),
None => StdRng::from_rng(&mut thread_rng()),
};
Self {
config,
rng,
_phantom: PhantomData,
}
}
pub fn bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
T: Into<F> + Copy + Send + Sync,
{
checkarray_finite(data, "data")?;
if data.is_empty() {
return Err(StatsError::InvalidArgument(
"Data cannot be empty".to_string(),
));
}
let original_statistic = statistic_fn(data)?.into();
let bootstrap_type = self.config.bootstrap_type.clone();
let bootstrap_samples = match bootstrap_type {
BootstrapType::Basic => self.basic_bootstrap(data, statistic_fn)?,
BootstrapType::Stratified { strata } => {
self.stratified_bootstrap(data, &strata, statistic_fn)?
}
BootstrapType::Block { block_type } => {
self.block_bootstrap(data, &block_type, statistic_fn)?
}
BootstrapType::Bayesian => self.bayesian_bootstrap(data, statistic_fn)?,
BootstrapType::Wild { distribution } => {
self.wild_bootstrap(data, &distribution, statistic_fn)?
}
BootstrapType::Parametric {
distribution_params,
} => self.parametric_bootstrap(data, &distribution_params, statistic_fn)?,
BootstrapType::Balanced => self.balanced_bootstrap(data, statistic_fn)?,
};
let bootstrap_mean = self.compute_mean(&bootstrap_samples);
let standard_error = self.compute_std(&bootstrap_samples);
let bias = bootstrap_mean - original_statistic;
let confidence_intervals = self.compute_confidence_intervals(
&bootstrap_samples,
original_statistic,
standard_error,
)?;
let diagnostics = self.compute_diagnostics(&bootstrap_samples, original_statistic)?;
let effective_samplesize = match &self.config.bootstrap_type {
BootstrapType::Block { .. } => Some(self.compute_effective_samplesize(data.len())),
_ => None,
};
Ok(AdvancedBootstrapResult {
bootstrap_samples,
original_statistic,
bootstrap_mean,
standard_error,
bias,
confidence_intervals,
method: self.config.bootstrap_type.clone(),
n_successful: self.config.n_bootstrap,
effective_samplesize,
diagnostics,
})
}
fn basic_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
if self.config.n_bootstrap >= 64 && n >= 32 {
return self.basic_bootstrap_simd_ultra(data, statistic_fn);
}
if self.config.parallel && self.config.n_bootstrap > 100 {
let samples: Result<Vec<_>, _> = (0..self.config.n_bootstrap)
.into_par_iter()
.map(|_| {
let mut local_rng = { StdRng::from_rng(&mut thread_rng()) };
let mut resample = Array1::zeros(n);
for i in 0..n {
let idx = local_rng.random_range(0..n);
resample[i] = data[idx];
}
statistic_fn(&resample.view()).map(|s| s.into())
})
.collect();
let sample_values = samples?;
for (i, value) in sample_values.into_iter().enumerate() {
bootstrap_samples[i] = value;
}
} else {
for i in 0..self.config.n_bootstrap {
let mut resample = Array1::zeros(n);
for j in 0..n {
let idx = self.rng.random_range(0..n);
resample[j] = data[idx];
}
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
}
Ok(bootstrap_samples)
}
fn basic_bootstrap_simd_ultra<T>(
&mut self,
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
use scirs2_core::simd_ops::PlatformCapabilities;
let capabilities = PlatformCapabilities::detect();
let n: usize = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
let chunk_size: usize = if capabilities.has_avx512() {
16
} else if capabilities.has_avx2() {
8
} else {
4
};
let num_chunks: usize = self.config.n_bootstrap.div_ceil(chunk_size);
let mut resample_indices = Vec::<usize>::with_capacity(chunk_size * n);
let mut resample_values = Vec::<F>::with_capacity(chunk_size * n);
let mut batch_statistics = Vec::with_capacity(chunk_size);
if self.config.parallel && num_chunks > 1 {
let chunk_results: Result<Vec<_>, _> = (0..num_chunks)
.into_par_iter()
.map(|chunk_idx| {
let start_bootstrap = chunk_idx * chunk_size;
let end_bootstrap =
std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
let current_chunk_size = end_bootstrap - start_bootstrap;
let mut local_rng = StdRng::from_rng(&mut thread_rng());
let mut chunk_statistics = Vec::with_capacity(current_chunk_size);
let mut local_indices = Vec::with_capacity(current_chunk_size * n);
for _ in 0..current_chunk_size {
for _ in 0..n {
local_indices.push(local_rng.random_range(0..n));
}
}
let mut local_values = Vec::with_capacity(current_chunk_size * n);
if capabilities.has_avx2() && n >= 8 {
for bootstrap_idx in 0..current_chunk_size {
let indices_start = bootstrap_idx * n;
let indices_slice = &local_indices[indices_start..indices_start + n];
let data_f32: Vec<f32> = data
.iter()
.map(|&x| x.to_f64().expect("Operation failed") as f32)
.collect();
let mut gathered_values = vec![0.0f32; n];
for (i, &idx) in indices_slice.iter().enumerate() {
gathered_values[i] = data_f32[idx];
}
local_values.extend(gathered_values);
}
} else {
for &idx in &local_indices {
local_values.push(data[idx].to_f64().expect("Operation failed") as f32);
}
}
for bootstrap_idx in 0..current_chunk_size {
let values_start = bootstrap_idx * n;
let values_slice = &local_values[values_start..values_start + n];
let mut resample = Array1::zeros(n);
for (i, &val) in values_slice.iter().enumerate() {
resample[i] = F::from(val as f64).expect("Failed to convert to float");
}
let statistic = statistic_fn(&resample.view())?.into();
chunk_statistics.push(statistic);
}
Ok::<Vec<F>, StatsError>(chunk_statistics)
})
.collect();
let all_chunk_results = chunk_results?;
let mut result_idx = 0;
for chunk_result in all_chunk_results {
for statistic in chunk_result {
if result_idx < self.config.n_bootstrap {
bootstrap_samples[result_idx] = statistic;
result_idx += 1;
}
}
}
} else {
for chunk_idx in 0..num_chunks {
let start_bootstrap = chunk_idx * chunk_size;
let end_bootstrap =
std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
let current_chunk_size = end_bootstrap - start_bootstrap;
if current_chunk_size == 0 {
break;
}
resample_indices.clear();
for _ in 0..current_chunk_size {
for _ in 0..n {
resample_indices.push(self.rng.random_range(0..n));
}
}
resample_values.clear();
if capabilities.has_avx2() && n >= 8 {
let data_f32: Vec<f32> = data
.iter()
.map(|&x| x.to_f64().expect("Operation failed") as f32)
.collect();
for bootstrap_idx in 0..current_chunk_size {
let indices_start = bootstrap_idx * n;
let indices_slice = &resample_indices[indices_start..indices_start + n];
for &idx in indices_slice {
resample_values
.push(F::from(data_f32[idx]).expect("Failed to convert to float"));
}
}
} else {
for &idx in &resample_indices {
resample_values.push(data[idx]);
}
}
batch_statistics.clear();
for bootstrap_idx in 0..current_chunk_size {
let values_start = bootstrap_idx * n;
let values_slice = &resample_values[values_start..values_start + n];
let mut resample = Array1::zeros(n);
for (i, &val) in values_slice.iter().enumerate() {
resample[i] = val;
}
let statistic = statistic_fn(&resample.view())?.into();
batch_statistics.push(statistic);
}
for (i, &statistic) in batch_statistics.iter().enumerate() {
let result_idx = start_bootstrap + i;
if result_idx < self.config.n_bootstrap {
bootstrap_samples[result_idx] = statistic;
}
}
}
}
Ok(bootstrap_samples)
}
fn stratified_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
strata: &[usize],
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
if data.len() != strata.len() {
return Err(StatsError::DimensionMismatch(
"Data and strata must have same length".to_string(),
));
}
let mut strata_groups: HashMap<usize, Vec<(usize, F)>> = HashMap::new();
for (i, (&value, &stratum)) in data.iter().zip(strata.iter()).enumerate() {
strata_groups.entry(stratum).or_default().push((i, value));
}
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
for i in 0..self.config.n_bootstrap {
let mut resample = Array1::zeros(n);
let mut resample_idx = 0;
for groupdata in strata_groups.values() {
let groupsize = groupdata.len();
for _ in 0..groupsize {
let idx = self.rng.random_range(0..groupsize);
resample[resample_idx] = groupdata[idx].1;
resample_idx += 1;
}
}
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn block_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
block_type: &BlockType,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let block_length = self
.config
.block_length
.unwrap_or_else(|| self.optimal_block_length(n));
if block_length >= n {
return Err(StatsError::InvalidArgument(
"Block length must be less than data length".to_string(),
));
}
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
for i in 0..self.config.n_bootstrap {
let resample = match block_type {
BlockType::Moving => self.moving_blockbootstrap(data, block_length)?,
BlockType::Circular => self.circular_blockbootstrap(data, block_length)?,
BlockType::NonOverlapping => {
self.non_overlapping_blockbootstrap(data, block_length)?
}
BlockType::Stationary { expected_length } => {
self.stationarybootstrap(data, *expected_length)?
}
BlockType::Tapered { taper_function } => {
self.tapered_blockbootstrap(data, block_length, taper_function)?
}
};
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn moving_blockbootstrap(
&mut self,
data: &ArrayView1<F>,
block_length: usize,
) -> StatsResult<Array1<F>> {
let n = data.len();
let n_blocks = n.div_ceil(block_length); let mut resample = Array1::zeros(n);
let mut pos = 0;
for _ in 0..n_blocks {
if pos >= n {
break;
}
let start_idx = self.rng.random_range(0..(n - block_length));
let copy_length = std::cmp::min(block_length, n - pos);
for i in 0..copy_length {
resample[pos + i] = data[start_idx + i];
}
pos += copy_length;
}
Ok(resample)
}
fn circular_blockbootstrap(
&mut self,
data: &ArrayView1<F>,
block_length: usize,
) -> StatsResult<Array1<F>> {
let n = data.len();
let n_blocks = n.div_ceil(block_length);
let mut resample = Array1::zeros(n);
let mut pos = 0;
for _ in 0..n_blocks {
if pos >= n {
break;
}
let start_idx = self.rng.random_range(0..n);
let copy_length = std::cmp::min(block_length, n - pos);
for i in 0..copy_length {
let idx = (start_idx + i) % n; resample[pos + i] = data[idx];
}
pos += copy_length;
}
Ok(resample)
}
fn non_overlapping_blockbootstrap(
&mut self,
data: &ArrayView1<F>,
block_length: usize,
) -> StatsResult<Array1<F>> {
let n = data.len();
let n_complete_blocks = n / block_length;
let remainder = n % block_length;
let mut blocks = Vec::new();
for i in 0..n_complete_blocks {
let start = i * block_length;
let end = start + block_length;
blocks.push(data.slice(scirs2_core::ndarray::s![start..end]).to_owned());
}
if remainder > 0 {
let start = n_complete_blocks * block_length;
blocks.push(data.slice(scirs2_core::ndarray::s![start..]).to_owned());
}
let mut resample = Array1::zeros(n);
let mut pos = 0;
while pos < n {
let block_idx = self.rng.random_range(0..blocks.len());
let block = &blocks[block_idx];
let copy_length = std::cmp::min(block.len(), n - pos);
for i in 0..copy_length {
resample[pos + i] = block[i];
}
pos += copy_length;
}
Ok(resample)
}
fn stationarybootstrap(
&mut self,
data: &ArrayView1<F>,
expected_length: f64,
) -> StatsResult<Array1<F>> {
let n = data.len();
let p = 1.0 / expected_length; let mut resample = Array1::zeros(n);
let mut pos = 0;
while pos < n {
let start_idx = self.rng.random_range(0..n);
let mut block_length = 1;
while self.rng.random::<f64>() > p && block_length < n - pos {
block_length += 1;
}
for i in 0..block_length {
if pos + i >= n {
break;
}
let idx = (start_idx + i) % n;
resample[pos + i] = data[idx];
}
pos += block_length;
}
Ok(resample)
}
fn tapered_blockbootstrap(
&mut self,
data: &ArrayView1<F>,
block_length: usize,
taper_function: &TaperFunction,
) -> StatsResult<Array1<F>> {
let n = data.len();
let mut resample = Array1::zeros(n);
let n_blocks = n.div_ceil(block_length);
let mut pos = 0;
for _ in 0..n_blocks {
if pos >= n {
break;
}
let start_idx = self.rng.random_range(0..(n - block_length));
let copy_length = std::cmp::min(block_length, n - pos);
for i in 0..copy_length {
let weight = self.compute_taper_weight(i, copy_length, taper_function);
let value =
data[start_idx + i] * F::from(weight).expect("Failed to convert to float");
if pos + i < resample.len() {
resample[pos + i] = resample[pos + i] + value;
}
}
pos += copy_length;
}
Ok(resample)
}
fn compute_taper_weight(
&self,
position: usize,
block_length: usize,
taper_function: &TaperFunction,
) -> f64 {
let t = position as f64 / (block_length - 1) as f64;
match taper_function {
TaperFunction::Linear => {
if t <= 0.5 {
2.0 * t
} else {
2.0 * (1.0 - t)
}
}
TaperFunction::Cosine => 0.5 * (1.0 - (std::f64::consts::PI * t).cos()),
TaperFunction::Exponential { decay_rate } => {
let distance_from_center = (t - 0.5).abs();
(-decay_rate * distance_from_center).exp()
}
}
}
fn bayesian_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
for i in 0..self.config.n_bootstrap {
let mut weights = Array1::zeros(n);
let mut weight_sum = F::zero();
for j in 0..n {
let exp_sample = -self.rng.random::<f64>().ln(); weights[j] = F::from(exp_sample).expect("Failed to convert to float");
weight_sum = weight_sum + weights[j];
}
for j in 0..n {
weights[j] = weights[j] / weight_sum;
}
let mut resample = Array1::zeros(n);
for j in 0..n {
resample[j] =
data[j] * weights[j] * F::from(n).expect("Failed to convert to float");
}
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn wild_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
distribution: &WildDistribution,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
for i in 0..self.config.n_bootstrap {
let mut resample = Array1::zeros(n);
for j in 0..n {
let multiplier = match distribution {
WildDistribution::Rademacher => {
if self.rng.random::<f64>() < 0.5 {
-1.0
} else {
1.0
}
}
WildDistribution::Mammen => {
let _golden_ratio = (1.0 + 5.0_f64.sqrt()) / 2.0;
let p = (5.0_f64.sqrt() + 1.0) / (2.0 * 5.0_f64.sqrt());
if self.rng.random::<f64>() < p {
-(5.0_f64.sqrt() - 1.0) / 2.0
} else {
(5.0_f64.sqrt() + 1.0) / 2.0
}
}
WildDistribution::Normal => {
let u1 = self.rng.random::<f64>();
let u2 = self.rng.random::<f64>();
(-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
}
WildDistribution::TwoPoint { prob_positive } => {
if self.rng.random::<f64>() < *prob_positive {
1.0
} else {
-1.0
}
}
};
resample[j] = data[j] * F::from(multiplier).expect("Failed to convert to float");
}
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn parametric_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
distribution_params: &ParametricBootstrapParams,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
for i in 0..self.config.n_bootstrap {
let resample = match distribution_params {
ParametricBootstrapParams::Normal { mean, std } => {
self.generate_normal_sample(n, *mean, *std)?
}
ParametricBootstrapParams::Exponential { rate } => {
self.generate_exponential_sample(n, *rate)?
}
ParametricBootstrapParams::Gamma { shape, scale } => {
self.generate_gamma_sample(n, *shape, *scale)?
}
ParametricBootstrapParams::Beta { alpha, beta } => {
self.generate_beta_sample(n, *alpha, *beta)?
}
ParametricBootstrapParams::Custom { name, .. } => {
return Err(StatsError::InvalidArgument(format!(
"Custom distribution '{}' not implemented",
name
)));
}
};
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn balanced_bootstrap<T>(
&mut self,
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
) -> StatsResult<Array1<F>>
where
T: Into<F> + Copy + Send + Sync,
{
let n = data.len();
let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
let total_samples = self.config.n_bootstrap * n;
let mut all_indices = Vec::with_capacity(total_samples);
for _ in 0..self.config.n_bootstrap {
for i in 0..n {
all_indices.push(i);
}
}
for i in (1..all_indices.len()).rev() {
let j = self.rng.random_range(0..i);
all_indices.swap(i, j);
}
for i in 0..self.config.n_bootstrap {
let mut resample = Array1::zeros(n);
let start_idx = i * n;
for j in 0..n {
let data_idx = all_indices[start_idx + j];
resample[j] = data[data_idx];
}
bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
}
Ok(bootstrap_samples)
}
fn generate_normal_sample(&mut self, n: usize, mean: f64, std: f64) -> StatsResult<Array1<F>> {
let mut sample = Array1::zeros(n);
for i in 0..n {
let u1 = self.rng.random::<f64>();
let u2 = self.rng.random::<f64>();
let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
sample[i] = F::from(mean + std * z).expect("Failed to convert to float");
}
Ok(sample)
}
fn generate_exponential_sample(&mut self, n: usize, rate: f64) -> StatsResult<Array1<F>> {
let mut sample = Array1::zeros(n);
for i in 0..n {
let u = self.rng.random::<f64>();
let x = -u.ln() / rate;
sample[i] = F::from(x).expect("Failed to convert to float");
}
Ok(sample)
}
fn generate_gamma_sample(
&mut self,
n: usize,
shape: f64,
scale: f64,
) -> StatsResult<Array1<F>> {
let mut sample = Array1::zeros(n);
for i in 0..n {
let mean = shape * scale;
let std = (shape * scale * scale).sqrt();
let u1 = self.rng.random::<f64>();
let u2 = self.rng.random::<f64>();
let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
sample[i] = F::from((mean + std * z).max(0.0)).expect("Operation failed");
}
Ok(sample)
}
fn generate_beta_sample(&mut self, n: usize, alpha: f64, beta: f64) -> StatsResult<Array1<F>> {
let mut sample = Array1::zeros(n);
for i in 0..n {
let x = self.rng.random::<f64>().powf(1.0 / alpha);
let y = self.rng.random::<f64>().powf(1.0 / beta);
let value = x / (x + y);
sample[i] = F::from(value).expect("Failed to convert to float");
}
Ok(sample)
}
fn optimal_block_length(&self, n: usize) -> usize {
let length = (n as f64).powf(1.0 / 3.0).ceil() as usize;
std::cmp::max(1, std::cmp::min(length, n / 4))
}
fn compute_effective_samplesize(&self, n: usize) -> usize {
let block_length = self
.config
.block_length
.unwrap_or_else(|| self.optimal_block_length(n));
let correlation_factor = 1.0 - (block_length as f64 - 1.0) / (2.0 * n as f64);
(n as f64 * correlation_factor).ceil() as usize
}
fn compute_confidence_intervals(
&self,
bootstrap_samples: &Array1<F>,
original_statistic: F,
_standard_error: F,
) -> StatsResult<BootstrapConfidenceIntervals<F>> {
let alpha = 1.0 - self.config.confidence_level;
let lower_percentile = alpha / 2.0;
let upper_percentile = 1.0 - alpha / 2.0;
let mut sorted_samples = bootstrap_samples.to_vec();
sorted_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted_samples.len();
let lower_idx = ((n as f64) * lower_percentile).floor() as usize;
let upper_idx = ((n as f64) * upper_percentile).ceil() as usize - 1;
let percentile = (
sorted_samples[lower_idx],
sorted_samples[upper_idx.min(n - 1)],
);
let basic = (
F::from(2.0).expect("Failed to convert constant to float") * original_statistic
- sorted_samples[upper_idx.min(n - 1)],
F::from(2.0).expect("Failed to convert constant to float") * original_statistic
- sorted_samples[lower_idx],
);
let bias_corrected = if self.config.bias_correction {
let bias_correction =
self.compute_bias_correction(bootstrap_samples, original_statistic);
Some((
percentile.0 + bias_correction,
percentile.1 + bias_correction,
))
} else {
None
};
let bias_corrected_accelerated = if self.config.acceleration_correction {
bias_corrected
} else {
None
};
Ok(BootstrapConfidenceIntervals {
percentile,
basic,
bias_corrected,
bias_corrected_accelerated,
studentized: None, })
}
fn compute_bias_correction(&self, bootstrap_samples: &Array1<F>, originalstatistic: F) -> F {
let _count_below = bootstrap_samples
.iter()
.filter(|&&x| x < originalstatistic)
.count();
let _proportion = _count_below as f64 / bootstrap_samples.len() as f64;
let bootstrap_mean = self.compute_mean(bootstrap_samples);
bootstrap_mean - originalstatistic
}
fn compute_diagnostics(
&self,
bootstrap_samples: &Array1<F>,
original_statistic: F,
) -> StatsResult<BootstrapDiagnostics<F>> {
let distribution_stats = self.compute_distribution_stats(bootstrap_samples)?;
let quality_metrics =
self.compute_quality_metrics(bootstrap_samples, original_statistic)?;
let convergence_info = self.compute_convergence_info(bootstrap_samples)?;
let method_specific = HashMap::new();
Ok(BootstrapDiagnostics {
distribution_stats,
quality_metrics,
convergence_info,
method_specific,
})
}
fn compute_distribution_stats(
&self,
samples: &Array1<F>,
) -> StatsResult<BootstrapDistributionStats<F>> {
let mean = self.compute_mean(samples);
let std = self.compute_std(samples);
let skewness = if std > F::zero() {
let skew_sum = samples
.iter()
.map(|&x| {
let z = (x - mean) / std;
z * z * z
})
.fold(F::zero(), |acc, x| acc + x);
skew_sum / F::from(samples.len()).expect("Operation failed")
} else {
F::zero()
};
let kurtosis = if std > F::zero() {
let kurt_sum = samples
.iter()
.map(|&x| {
let z = (x - mean) / std;
z * z * z * z
})
.fold(F::zero(), |acc, x| acc + x);
kurt_sum / F::from(samples.len()).expect("Operation failed")
- F::from(3.0).expect("Failed to convert constant to float")
} else {
F::zero()
};
let min_value = samples.iter().copied().fold(F::infinity(), F::min);
let max_value = samples.iter().copied().fold(F::neg_infinity(), F::max);
Ok(BootstrapDistributionStats {
skewness,
kurtosis,
jarque_bera: F::zero(), anderson_darling: F::zero(), min_value,
max_value,
})
}
fn compute_quality_metrics(
&self,
samples: &Array1<F>,
_original_statistic: F,
) -> StatsResult<QualityMetrics<F>> {
let std_error = self.compute_std(samples);
let mc_std_error =
std_error / F::from((samples.len() as f64).sqrt()).expect("Operation failed");
Ok(QualityMetrics {
mc_standard_error: mc_std_error,
coverage_probability: F::from(self.config.confidence_level)
.expect("Failed to convert to float"),
efficiency: None, stability: F::one(), })
}
fn compute_convergence_info(&self, samples: &Array1<F>) -> StatsResult<ConvergenceInfo<F>> {
let converged = samples.len() >= 100;
Ok(ConvergenceInfo {
converged,
convergence_samplesize: if converged { Some(samples.len()) } else { None },
mean_stability: F::one(), variance_stability: F::one(), })
}
fn compute_mean(&self, data: &Array1<F>) -> F {
if data.is_empty() {
F::zero()
} else {
data.sum() / F::from(data.len()).expect("Operation failed")
}
}
fn compute_std(&self, data: &Array1<F>) -> F {
if data.len() <= 1 {
return F::zero();
}
let mean = self.compute_mean(data);
let variance = data
.iter()
.map(|&x| (x - mean) * (x - mean))
.fold(F::zero(), |acc, x| acc + x)
/ F::from(data.len() - 1).expect("Operation failed");
variance.sqrt()
}
}
#[allow(dead_code)]
pub fn stratified_bootstrap<F, T>(
data: &ArrayView1<F>,
strata: &[usize],
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
config: Option<AdvancedBootstrapConfig>,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
T: Into<F> + Copy + Send + Sync,
{
let mut config = config.unwrap_or_default();
config.bootstrap_type = BootstrapType::Stratified {
strata: strata.to_vec(),
};
let mut processor = AdvancedBootstrapProcessor::new(config);
processor.bootstrap(data, statistic_fn)
}
#[allow(dead_code)]
pub fn block_bootstrap<F, T>(
data: &ArrayView1<F>,
block_type: BlockType,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
config: Option<AdvancedBootstrapConfig>,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
T: Into<F> + Copy + Send + Sync,
{
let mut config = config.unwrap_or_default();
config.bootstrap_type = BootstrapType::Block { block_type };
let mut processor = AdvancedBootstrapProcessor::new(config);
processor.bootstrap(data, statistic_fn)
}
#[allow(dead_code)]
pub fn moving_block_bootstrap<F, T>(
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
block_length: Option<usize>,
n_bootstrap: Option<usize>,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
T: Into<F> + Copy + Send + Sync,
{
let mut config = AdvancedBootstrapConfig::default();
config.bootstrap_type = BootstrapType::Block {
block_type: BlockType::Moving,
};
config.block_length = block_length;
config.n_bootstrap = n_bootstrap.unwrap_or(1000);
let mut processor = AdvancedBootstrapProcessor::new(config);
processor.bootstrap(data, statistic_fn)
}
#[allow(dead_code)]
pub fn circular_block_bootstrap<F, T>(
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
block_length: Option<usize>,
n_bootstrap: Option<usize>,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
T: Into<F> + Copy + Send + Sync,
{
let mut config = AdvancedBootstrapConfig::default();
config.bootstrap_type = BootstrapType::Block {
block_type: BlockType::Circular,
};
config.block_length = block_length;
config.n_bootstrap = n_bootstrap.unwrap_or(1000);
let mut processor = AdvancedBootstrapProcessor::new(config);
processor.bootstrap(data, statistic_fn)
}
#[allow(dead_code)]
pub fn stationary_bootstrap<F, T>(
data: &ArrayView1<F>,
statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
expected_block_length: f64,
n_bootstrap: Option<usize>,
) -> StatsResult<AdvancedBootstrapResult<F>>
where
F: Float
+ NumCast
+ SimdUnifiedOps
+ Zero
+ One
+ FromPrimitive
+ Copy
+ Send
+ Sync
+ std::fmt::Display,
T: Into<F> + Copy + Send + Sync,
{
let mut config = AdvancedBootstrapConfig::default();
config.bootstrap_type = BootstrapType::Block {
block_type: BlockType::Stationary {
expected_length: expected_block_length,
},
};
config.n_bootstrap = n_bootstrap.unwrap_or(1000);
let mut processor = AdvancedBootstrapProcessor::new(config);
processor.bootstrap(data, statistic_fn)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_basicbootstrap() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
let config = AdvancedBootstrapConfig {
n_bootstrap: 100,
seed: Some(42),
..Default::default()
};
let mut processor = AdvancedBootstrapProcessor::new(config);
let result = processor
.bootstrap(&data.view(), mean_fn)
.expect("Operation failed");
assert_eq!(result.n_successful, 100);
assert!(result.bootstrap_samples.len() == 100);
assert!((result.original_statistic - 3.0).abs() < 1e-10);
}
#[test]
fn test_stratifiedbootstrap() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let strata = vec![0, 0, 1, 1, 2, 2]; let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
let result = stratified_bootstrap(
&data.view(),
&strata,
mean_fn,
Some(AdvancedBootstrapConfig {
n_bootstrap: 50,
seed: Some(123),
..Default::default()
}),
)
.expect("Operation failed");
assert_eq!(result.n_successful, 50);
assert!(matches!(result.method, BootstrapType::Stratified { .. }));
}
#[test]
fn test_moving_blockbootstrap() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
let result = moving_block_bootstrap(
&data.view(),
mean_fn,
Some(3), Some(50), )
.expect("Operation failed");
assert_eq!(result.n_successful, 50);
assert!(result.effective_samplesize.is_some());
}
#[test]
fn test_circular_blockbootstrap() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
let result = circular_block_bootstrap(&data.view(), mean_fn, Some(2), Some(30))
.expect("Operation failed");
assert_eq!(result.n_successful, 30);
}
#[test]
fn test_confidence_intervals() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
let config = AdvancedBootstrapConfig {
n_bootstrap: 200,
confidence_level: 0.95,
seed: Some(42),
..Default::default()
};
let mut processor = AdvancedBootstrapProcessor::new(config);
let result = processor
.bootstrap(&data.view(), mean_fn)
.expect("Operation failed");
let ci = &result.confidence_intervals;
assert!(ci.percentile.0 <= ci.percentile.1);
assert!(ci.basic.0 <= ci.basic.1);
}
#[test]
fn test_bootstrap_diagnostics() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let var_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> {
let mean = x.sum() / x.len() as f64;
let var = x.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (x.len() - 1) as f64;
Ok(var)
};
let config = AdvancedBootstrapConfig {
n_bootstrap: 100,
seed: Some(456),
..Default::default()
};
let mut processor = AdvancedBootstrapProcessor::new(config);
let result = processor
.bootstrap(&data.view(), var_fn)
.expect("Operation failed");
assert!(result.diagnostics.convergence_info.converged);
assert!(
result.diagnostics.distribution_stats.min_value
<= result.diagnostics.distribution_stats.max_value
);
}
}