1use crate::error::{StatsError, StatsResult};
8use scirs2_core::ndarray::{Array1, ArrayView1};
9use scirs2_core::numeric::{Float, FromPrimitive, NumCast, One, Zero};
10use scirs2_core::{parallel_ops::*, random::prelude::*, simd_ops::SimdUnifiedOps, validation::*};
11use std::collections::HashMap;
12use std::marker::PhantomData;
13
14#[derive(Debug, Clone)]
16pub struct AdvancedBootstrapConfig {
17 pub n_bootstrap: usize,
19 pub seed: Option<u64>,
21 pub bootstrap_type: BootstrapType,
23 pub parallel: bool,
25 pub confidence_level: f64,
27 pub block_length: Option<usize>,
29 pub bias_correction: bool,
31 pub acceleration_correction: bool,
33 pub max_threads: Option<usize>,
35}
36
37impl Default for AdvancedBootstrapConfig {
38 fn default() -> Self {
39 Self {
40 n_bootstrap: 1000,
41 seed: None,
42 bootstrap_type: BootstrapType::Basic,
43 parallel: true,
44 confidence_level: 0.95,
45 block_length: None,
46 bias_correction: true,
47 acceleration_correction: true,
48 max_threads: None,
49 }
50 }
51}
52
53#[derive(Debug, Clone, PartialEq)]
55pub enum BootstrapType {
56 Basic,
58 Stratified {
60 strata: Vec<usize>,
62 },
63 Block {
65 block_type: BlockType,
67 },
68 Bayesian,
70 Wild {
72 distribution: WildDistribution,
74 },
75 Parametric {
77 distribution_params: ParametricBootstrapParams,
79 },
80 Balanced,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub enum BlockType {
87 Moving,
89 Circular,
91 NonOverlapping,
93 Stationary {
95 expected_length: f64,
97 },
98 Tapered {
100 taper_function: TaperFunction,
102 },
103}
104
105#[derive(Debug, Clone, PartialEq)]
107pub enum TaperFunction {
108 Linear,
110 Cosine,
112 Exponential { decay_rate: f64 },
114}
115
116#[derive(Debug, Clone, PartialEq)]
118pub enum WildDistribution {
119 Rademacher,
121 Mammen,
123 Normal,
125 TwoPoint { prob_positive: f64 },
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum ParametricBootstrapParams {
132 Normal { mean: f64, std: f64 },
134 Exponential { rate: f64 },
136 Gamma { shape: f64, scale: f64 },
138 Beta { alpha: f64, beta: f64 },
140 Custom {
142 name: String,
144 params: HashMap<String, f64>,
146 },
147}
148
149#[derive(Debug, Clone)]
151pub struct AdvancedBootstrapResult<F> {
152 pub bootstrap_samples: Array1<F>,
154 pub original_statistic: F,
156 pub bootstrap_mean: F,
158 pub standard_error: F,
160 pub bias: F,
162 pub confidence_intervals: BootstrapConfidenceIntervals<F>,
164 pub method: BootstrapType,
166 pub n_successful: usize,
168 pub effective_samplesize: Option<usize>,
170 pub diagnostics: BootstrapDiagnostics<F>,
172}
173
174#[derive(Debug, Clone)]
176pub struct BootstrapConfidenceIntervals<F> {
177 pub percentile: (F, F),
179 pub basic: (F, F),
181 pub bias_corrected: Option<(F, F)>,
183 pub bias_corrected_accelerated: Option<(F, F)>,
185 pub studentized: Option<(F, F)>,
187}
188
189#[derive(Debug, Clone)]
191pub struct BootstrapDiagnostics<F> {
192 pub distribution_stats: BootstrapDistributionStats<F>,
194 pub quality_metrics: QualityMetrics<F>,
196 pub convergence_info: ConvergenceInfo<F>,
198 pub method_specific: HashMap<String, F>,
200}
201
202#[derive(Debug, Clone)]
204pub struct BootstrapDistributionStats<F> {
205 pub skewness: F,
207 pub kurtosis: F,
209 pub jarque_bera: F,
211 pub anderson_darling: F,
213 pub min_value: F,
215 pub max_value: F,
217}
218
219#[derive(Debug, Clone)]
221pub struct QualityMetrics<F> {
222 pub mc_standard_error: F,
224 pub coverage_probability: F,
226 pub efficiency: Option<F>,
228 pub stability: F,
230}
231
232#[derive(Debug, Clone)]
234pub struct ConvergenceInfo<F> {
235 pub converged: bool,
237 pub convergence_samplesize: Option<usize>,
239 pub mean_stability: F,
241 pub variance_stability: F,
243}
244
245pub struct AdvancedBootstrapProcessor<F> {
247 config: AdvancedBootstrapConfig,
248 rng: StdRng,
249 _phantom: PhantomData<F>,
250}
251
252impl<F> AdvancedBootstrapProcessor<F>
253where
254 F: Float
255 + NumCast
256 + SimdUnifiedOps
257 + Zero
258 + One
259 + FromPrimitive
260 + Copy
261 + Send
262 + Sync
263 + std::fmt::Display,
264{
265 pub fn new(config: AdvancedBootstrapConfig) -> Self {
267 let rng = match config.seed {
268 Some(seed) => StdRng::seed_from_u64(seed),
269 None => StdRng::from_rng(&mut thread_rng()),
270 };
271
272 Self {
273 config,
274 rng,
275 _phantom: PhantomData,
276 }
277 }
278
279 pub fn bootstrap<T>(
281 &mut self,
282 data: &ArrayView1<F>,
283 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
284 ) -> StatsResult<AdvancedBootstrapResult<F>>
285 where
286 T: Into<F> + Copy + Send + Sync,
287 {
288 checkarray_finite(data, "data")?;
289
290 if data.is_empty() {
291 return Err(StatsError::InvalidArgument(
292 "Data cannot be empty".to_string(),
293 ));
294 }
295
296 let original_statistic = statistic_fn(data)?.into();
298
299 let bootstrap_type = self.config.bootstrap_type.clone();
301 let bootstrap_samples = match bootstrap_type {
302 BootstrapType::Basic => self.basic_bootstrap(data, statistic_fn)?,
303 BootstrapType::Stratified { strata } => {
304 self.stratified_bootstrap(data, &strata, statistic_fn)?
305 }
306 BootstrapType::Block { block_type } => {
307 self.block_bootstrap(data, &block_type, statistic_fn)?
308 }
309 BootstrapType::Bayesian => self.bayesian_bootstrap(data, statistic_fn)?,
310 BootstrapType::Wild { distribution } => {
311 self.wild_bootstrap(data, &distribution, statistic_fn)?
312 }
313 BootstrapType::Parametric {
314 distribution_params,
315 } => self.parametric_bootstrap(data, &distribution_params, statistic_fn)?,
316 BootstrapType::Balanced => self.balanced_bootstrap(data, statistic_fn)?,
317 };
318
319 let bootstrap_mean = self.compute_mean(&bootstrap_samples);
321 let standard_error = self.compute_std(&bootstrap_samples);
322 let bias = bootstrap_mean - original_statistic;
323
324 let confidence_intervals = self.compute_confidence_intervals(
326 &bootstrap_samples,
327 original_statistic,
328 standard_error,
329 )?;
330
331 let diagnostics = self.compute_diagnostics(&bootstrap_samples, original_statistic)?;
333
334 let effective_samplesize = match &self.config.bootstrap_type {
336 BootstrapType::Block { .. } => Some(self.compute_effective_samplesize(data.len())),
337 _ => None,
338 };
339
340 Ok(AdvancedBootstrapResult {
341 bootstrap_samples,
342 original_statistic,
343 bootstrap_mean,
344 standard_error,
345 bias,
346 confidence_intervals,
347 method: self.config.bootstrap_type.clone(),
348 n_successful: self.config.n_bootstrap,
349 effective_samplesize,
350 diagnostics,
351 })
352 }
353
354 fn basic_bootstrap<T>(
356 &mut self,
357 data: &ArrayView1<F>,
358 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
359 ) -> StatsResult<Array1<F>>
360 where
361 T: Into<F> + Copy + Send + Sync,
362 {
363 let n = data.len();
364 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
365
366 if self.config.n_bootstrap >= 64 && n >= 32 {
368 return self.basic_bootstrap_simd_ultra(data, statistic_fn);
369 }
370
371 if self.config.parallel && self.config.n_bootstrap > 100 {
372 let samples: Result<Vec<_>, _> = (0..self.config.n_bootstrap)
374 .into_par_iter()
375 .map(|_| {
376 let mut local_rng = { StdRng::from_rng(&mut thread_rng()) };
377 let mut resample = Array1::zeros(n);
378
379 for i in 0..n {
380 let idx = local_rng.random_range(0..n);
381 resample[i] = data[idx];
382 }
383
384 statistic_fn(&resample.view()).map(|s| s.into())
385 })
386 .collect();
387
388 let sample_values = samples?;
389 for (i, value) in sample_values.into_iter().enumerate() {
390 bootstrap_samples[i] = value;
391 }
392 } else {
393 for i in 0..self.config.n_bootstrap {
395 let mut resample = Array1::zeros(n);
396
397 for j in 0..n {
398 let idx = self.rng.random_range(0..n);
399 resample[j] = data[idx];
400 }
401
402 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
403 }
404 }
405
406 Ok(bootstrap_samples)
407 }
408
409 fn basic_bootstrap_simd_ultra<T>(
411 &mut self,
412 data: &ArrayView1<F>,
413 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
414 ) -> StatsResult<Array1<F>>
415 where
416 T: Into<F> + Copy + Send + Sync,
417 {
418 use scirs2_core::simd_ops::PlatformCapabilities;
419
420 let capabilities = PlatformCapabilities::detect();
421 let n: usize = data.len();
422 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
423
424 let chunk_size: usize = if capabilities.has_avx512() {
426 16
427 } else if capabilities.has_avx2() {
428 8
429 } else {
430 4
431 };
432 let num_chunks: usize = self.config.n_bootstrap.div_ceil(chunk_size);
433
434 let mut resample_indices = Vec::<usize>::with_capacity(chunk_size * n);
436 let mut resample_values = Vec::<F>::with_capacity(chunk_size * n);
437 let mut batch_statistics = Vec::with_capacity(chunk_size);
438
439 if self.config.parallel && num_chunks > 1 {
440 let chunk_results: Result<Vec<_>, _> = (0..num_chunks)
442 .into_par_iter()
443 .map(|chunk_idx| {
444 let start_bootstrap = chunk_idx * chunk_size;
445 let end_bootstrap =
446 std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
447 let current_chunk_size = end_bootstrap - start_bootstrap;
448
449 let mut local_rng = StdRng::from_rng(&mut thread_rng());
450 let mut chunk_statistics = Vec::with_capacity(current_chunk_size);
451
452 let mut local_indices = Vec::with_capacity(current_chunk_size * n);
454 for _ in 0..current_chunk_size {
455 for _ in 0..n {
456 local_indices.push(local_rng.random_range(0..n));
457 }
458 }
459
460 let mut local_values = Vec::with_capacity(current_chunk_size * n);
462 if capabilities.has_avx2() && n >= 8 {
463 for bootstrap_idx in 0..current_chunk_size {
465 let indices_start = bootstrap_idx * n;
466 let indices_slice = &local_indices[indices_start..indices_start + n];
467
468 let data_f32: Vec<f32> = data
470 .iter()
471 .map(|&x| x.to_f64().expect("Operation failed") as f32)
472 .collect();
473
474 let mut gathered_values = vec![0.0f32; n];
476 for (i, &idx) in indices_slice.iter().enumerate() {
477 gathered_values[i] = data_f32[idx];
478 }
479
480 local_values.extend(gathered_values);
481 }
482 } else {
483 for &idx in &local_indices {
485 local_values.push(data[idx].to_f64().expect("Operation failed") as f32);
486 }
487 }
488
489 for bootstrap_idx in 0..current_chunk_size {
491 let values_start = bootstrap_idx * n;
492 let values_slice = &local_values[values_start..values_start + n];
493
494 let mut resample = Array1::zeros(n);
496 for (i, &val) in values_slice.iter().enumerate() {
497 resample[i] = F::from(val as f64).expect("Failed to convert to float");
498 }
499
500 let statistic = statistic_fn(&resample.view())?.into();
501 chunk_statistics.push(statistic);
502 }
503
504 Ok::<Vec<F>, StatsError>(chunk_statistics)
505 })
506 .collect();
507
508 let all_chunk_results = chunk_results?;
509 let mut result_idx = 0;
510 for chunk_result in all_chunk_results {
511 for statistic in chunk_result {
512 if result_idx < self.config.n_bootstrap {
513 bootstrap_samples[result_idx] = statistic;
514 result_idx += 1;
515 }
516 }
517 }
518 } else {
519 for chunk_idx in 0..num_chunks {
521 let start_bootstrap = chunk_idx * chunk_size;
522 let end_bootstrap =
523 std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
524 let current_chunk_size = end_bootstrap - start_bootstrap;
525
526 if current_chunk_size == 0 {
527 break;
528 }
529
530 resample_indices.clear();
532 for _ in 0..current_chunk_size {
533 for _ in 0..n {
534 resample_indices.push(self.rng.random_range(0..n));
535 }
536 }
537
538 resample_values.clear();
540 if capabilities.has_avx2() && n >= 8 {
541 let data_f32: Vec<f32> = data
543 .iter()
544 .map(|&x| x.to_f64().expect("Operation failed") as f32)
545 .collect();
546
547 for bootstrap_idx in 0..current_chunk_size {
548 let indices_start = bootstrap_idx * n;
549 let indices_slice = &resample_indices[indices_start..indices_start + n];
550
551 for &idx in indices_slice {
552 resample_values
553 .push(F::from(data_f32[idx]).expect("Failed to convert to float"));
554 }
555 }
556 } else {
557 for &idx in &resample_indices {
559 resample_values.push(data[idx]);
560 }
561 }
562
563 batch_statistics.clear();
565 for bootstrap_idx in 0..current_chunk_size {
566 let values_start = bootstrap_idx * n;
567 let values_slice = &resample_values[values_start..values_start + n];
568
569 let mut resample = Array1::zeros(n);
571 for (i, &val) in values_slice.iter().enumerate() {
572 resample[i] = val;
573 }
574
575 let statistic = statistic_fn(&resample.view())?.into();
576 batch_statistics.push(statistic);
577 }
578
579 for (i, &statistic) in batch_statistics.iter().enumerate() {
581 let result_idx = start_bootstrap + i;
582 if result_idx < self.config.n_bootstrap {
583 bootstrap_samples[result_idx] = statistic;
584 }
585 }
586 }
587 }
588
589 Ok(bootstrap_samples)
590 }
591
592 fn stratified_bootstrap<T>(
594 &mut self,
595 data: &ArrayView1<F>,
596 strata: &[usize],
597 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
598 ) -> StatsResult<Array1<F>>
599 where
600 T: Into<F> + Copy + Send + Sync,
601 {
602 if data.len() != strata.len() {
603 return Err(StatsError::DimensionMismatch(
604 "Data and strata must have same length".to_string(),
605 ));
606 }
607
608 let mut strata_groups: HashMap<usize, Vec<(usize, F)>> = HashMap::new();
610 for (i, (&value, &stratum)) in data.iter().zip(strata.iter()).enumerate() {
611 strata_groups.entry(stratum).or_default().push((i, value));
612 }
613
614 let n = data.len();
615 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
616
617 for i in 0..self.config.n_bootstrap {
618 let mut resample = Array1::zeros(n);
619 let mut resample_idx = 0;
620
621 for groupdata in strata_groups.values() {
623 let groupsize = groupdata.len();
624
625 for _ in 0..groupsize {
626 let idx = self.rng.random_range(0..groupsize);
627 resample[resample_idx] = groupdata[idx].1;
628 resample_idx += 1;
629 }
630 }
631
632 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
633 }
634
635 Ok(bootstrap_samples)
636 }
637
638 fn block_bootstrap<T>(
640 &mut self,
641 data: &ArrayView1<F>,
642 block_type: &BlockType,
643 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
644 ) -> StatsResult<Array1<F>>
645 where
646 T: Into<F> + Copy + Send + Sync,
647 {
648 let n = data.len();
649 let block_length = self
650 .config
651 .block_length
652 .unwrap_or_else(|| self.optimal_block_length(n));
653
654 if block_length >= n {
655 return Err(StatsError::InvalidArgument(
656 "Block length must be less than data length".to_string(),
657 ));
658 }
659
660 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
661
662 for i in 0..self.config.n_bootstrap {
663 let resample = match block_type {
664 BlockType::Moving => self.moving_blockbootstrap(data, block_length)?,
665 BlockType::Circular => self.circular_blockbootstrap(data, block_length)?,
666 BlockType::NonOverlapping => {
667 self.non_overlapping_blockbootstrap(data, block_length)?
668 }
669 BlockType::Stationary { expected_length } => {
670 self.stationarybootstrap(data, *expected_length)?
671 }
672 BlockType::Tapered { taper_function } => {
673 self.tapered_blockbootstrap(data, block_length, taper_function)?
674 }
675 };
676
677 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
678 }
679
680 Ok(bootstrap_samples)
681 }
682
683 fn moving_blockbootstrap(
685 &mut self,
686 data: &ArrayView1<F>,
687 block_length: usize,
688 ) -> StatsResult<Array1<F>> {
689 let n = data.len();
690 let n_blocks = n.div_ceil(block_length); let mut resample = Array1::zeros(n);
692 let mut pos = 0;
693
694 for _ in 0..n_blocks {
695 if pos >= n {
696 break;
697 }
698
699 let start_idx = self.rng.random_range(0..(n - block_length));
700 let copy_length = std::cmp::min(block_length, n - pos);
701
702 for i in 0..copy_length {
703 resample[pos + i] = data[start_idx + i];
704 }
705 pos += copy_length;
706 }
707
708 Ok(resample)
709 }
710
711 fn circular_blockbootstrap(
713 &mut self,
714 data: &ArrayView1<F>,
715 block_length: usize,
716 ) -> StatsResult<Array1<F>> {
717 let n = data.len();
718 let n_blocks = n.div_ceil(block_length);
719 let mut resample = Array1::zeros(n);
720 let mut pos = 0;
721
722 for _ in 0..n_blocks {
723 if pos >= n {
724 break;
725 }
726
727 let start_idx = self.rng.random_range(0..n);
728 let copy_length = std::cmp::min(block_length, n - pos);
729
730 for i in 0..copy_length {
731 let idx = (start_idx + i) % n; resample[pos + i] = data[idx];
733 }
734 pos += copy_length;
735 }
736
737 Ok(resample)
738 }
739
740 fn non_overlapping_blockbootstrap(
742 &mut self,
743 data: &ArrayView1<F>,
744 block_length: usize,
745 ) -> StatsResult<Array1<F>> {
746 let n = data.len();
747 let n_complete_blocks = n / block_length;
748 let remainder = n % block_length;
749
750 let mut blocks = Vec::new();
752 for i in 0..n_complete_blocks {
753 let start = i * block_length;
754 let end = start + block_length;
755 blocks.push(data.slice(scirs2_core::ndarray::s![start..end]).to_owned());
756 }
757
758 if remainder > 0 {
760 let start = n_complete_blocks * block_length;
761 blocks.push(data.slice(scirs2_core::ndarray::s![start..]).to_owned());
762 }
763
764 let mut resample = Array1::zeros(n);
766 let mut pos = 0;
767
768 while pos < n {
769 let block_idx = self.rng.random_range(0..blocks.len());
770 let block = &blocks[block_idx];
771 let copy_length = std::cmp::min(block.len(), n - pos);
772
773 for i in 0..copy_length {
774 resample[pos + i] = block[i];
775 }
776 pos += copy_length;
777 }
778
779 Ok(resample)
780 }
781
782 fn stationarybootstrap(
784 &mut self,
785 data: &ArrayView1<F>,
786 expected_length: f64,
787 ) -> StatsResult<Array1<F>> {
788 let n = data.len();
789 let p = 1.0 / expected_length; let mut resample = Array1::zeros(n);
791 let mut pos = 0;
792
793 while pos < n {
794 let start_idx = self.rng.random_range(0..n);
795 let mut block_length = 1;
796
797 while self.rng.random::<f64>() > p && block_length < n - pos {
799 block_length += 1;
800 }
801
802 for i in 0..block_length {
804 if pos + i >= n {
805 break;
806 }
807 let idx = (start_idx + i) % n;
808 resample[pos + i] = data[idx];
809 }
810
811 pos += block_length;
812 }
813
814 Ok(resample)
815 }
816
817 fn tapered_blockbootstrap(
819 &mut self,
820 data: &ArrayView1<F>,
821 block_length: usize,
822 taper_function: &TaperFunction,
823 ) -> StatsResult<Array1<F>> {
824 let n = data.len();
825 let mut resample = Array1::zeros(n);
826 let n_blocks = n.div_ceil(block_length);
827 let mut pos = 0;
828
829 for _ in 0..n_blocks {
830 if pos >= n {
831 break;
832 }
833
834 let start_idx = self.rng.random_range(0..(n - block_length));
835 let copy_length = std::cmp::min(block_length, n - pos);
836
837 for i in 0..copy_length {
839 let weight = self.compute_taper_weight(i, copy_length, taper_function);
840 let value =
841 data[start_idx + i] * F::from(weight).expect("Failed to convert to float");
842
843 if pos + i < resample.len() {
844 resample[pos + i] = resample[pos + i] + value;
845 }
846 }
847 pos += copy_length;
848 }
849
850 Ok(resample)
851 }
852
853 fn compute_taper_weight(
855 &self,
856 position: usize,
857 block_length: usize,
858 taper_function: &TaperFunction,
859 ) -> f64 {
860 let t = position as f64 / (block_length - 1) as f64;
861
862 match taper_function {
863 TaperFunction::Linear => {
864 if t <= 0.5 {
865 2.0 * t
866 } else {
867 2.0 * (1.0 - t)
868 }
869 }
870 TaperFunction::Cosine => 0.5 * (1.0 - (std::f64::consts::PI * t).cos()),
871 TaperFunction::Exponential { decay_rate } => {
872 let distance_from_center = (t - 0.5).abs();
873 (-decay_rate * distance_from_center).exp()
874 }
875 }
876 }
877
878 fn bayesian_bootstrap<T>(
880 &mut self,
881 data: &ArrayView1<F>,
882 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
883 ) -> StatsResult<Array1<F>>
884 where
885 T: Into<F> + Copy + Send + Sync,
886 {
887 let n = data.len();
888 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
889
890 for i in 0..self.config.n_bootstrap {
891 let mut weights = Array1::zeros(n);
893 let mut weight_sum = F::zero();
894
895 for j in 0..n {
896 let exp_sample = -self.rng.random::<f64>().ln(); weights[j] = F::from(exp_sample).expect("Failed to convert to float");
898 weight_sum = weight_sum + weights[j];
899 }
900
901 for j in 0..n {
903 weights[j] = weights[j] / weight_sum;
904 }
905
906 let mut resample = Array1::zeros(n);
908 for j in 0..n {
909 resample[j] =
910 data[j] * weights[j] * F::from(n).expect("Failed to convert to float");
911 }
913
914 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
915 }
916
917 Ok(bootstrap_samples)
918 }
919
920 fn wild_bootstrap<T>(
922 &mut self,
923 data: &ArrayView1<F>,
924 distribution: &WildDistribution,
925 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
926 ) -> StatsResult<Array1<F>>
927 where
928 T: Into<F> + Copy + Send + Sync,
929 {
930 let n = data.len();
931 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
932
933 for i in 0..self.config.n_bootstrap {
934 let mut resample = Array1::zeros(n);
935
936 for j in 0..n {
937 let multiplier = match distribution {
938 WildDistribution::Rademacher => {
939 if self.rng.random::<f64>() < 0.5 {
940 -1.0
941 } else {
942 1.0
943 }
944 }
945 WildDistribution::Mammen => {
946 let _golden_ratio = (1.0 + 5.0_f64.sqrt()) / 2.0;
947 let p = (5.0_f64.sqrt() + 1.0) / (2.0 * 5.0_f64.sqrt());
948 if self.rng.random::<f64>() < p {
949 -(5.0_f64.sqrt() - 1.0) / 2.0
950 } else {
951 (5.0_f64.sqrt() + 1.0) / 2.0
952 }
953 }
954 WildDistribution::Normal => {
955 let u1 = self.rng.random::<f64>();
957 let u2 = self.rng.random::<f64>();
958 (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
959 }
960 WildDistribution::TwoPoint { prob_positive } => {
961 if self.rng.random::<f64>() < *prob_positive {
962 1.0
963 } else {
964 -1.0
965 }
966 }
967 };
968
969 resample[j] = data[j] * F::from(multiplier).expect("Failed to convert to float");
970 }
971
972 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
973 }
974
975 Ok(bootstrap_samples)
976 }
977
978 fn parametric_bootstrap<T>(
980 &mut self,
981 data: &ArrayView1<F>,
982 distribution_params: &ParametricBootstrapParams,
983 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
984 ) -> StatsResult<Array1<F>>
985 where
986 T: Into<F> + Copy + Send + Sync,
987 {
988 let n = data.len();
989 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
990
991 for i in 0..self.config.n_bootstrap {
992 let resample = match distribution_params {
993 ParametricBootstrapParams::Normal { mean, std } => {
994 self.generate_normal_sample(n, *mean, *std)?
995 }
996 ParametricBootstrapParams::Exponential { rate } => {
997 self.generate_exponential_sample(n, *rate)?
998 }
999 ParametricBootstrapParams::Gamma { shape, scale } => {
1000 self.generate_gamma_sample(n, *shape, *scale)?
1001 }
1002 ParametricBootstrapParams::Beta { alpha, beta } => {
1003 self.generate_beta_sample(n, *alpha, *beta)?
1004 }
1005 ParametricBootstrapParams::Custom { name, .. } => {
1006 return Err(StatsError::InvalidArgument(format!(
1007 "Custom distribution '{}' not implemented",
1008 name
1009 )));
1010 }
1011 };
1012
1013 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
1014 }
1015
1016 Ok(bootstrap_samples)
1017 }
1018
1019 fn balanced_bootstrap<T>(
1021 &mut self,
1022 data: &ArrayView1<F>,
1023 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1024 ) -> StatsResult<Array1<F>>
1025 where
1026 T: Into<F> + Copy + Send + Sync,
1027 {
1028 let n = data.len();
1029 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
1030
1031 let total_samples = self.config.n_bootstrap * n;
1033 let mut all_indices = Vec::with_capacity(total_samples);
1034
1035 for _ in 0..self.config.n_bootstrap {
1036 for i in 0..n {
1037 all_indices.push(i);
1038 }
1039 }
1040
1041 for i in (1..all_indices.len()).rev() {
1043 let j = self.rng.random_range(0..i);
1044 all_indices.swap(i, j);
1045 }
1046
1047 for i in 0..self.config.n_bootstrap {
1049 let mut resample = Array1::zeros(n);
1050 let start_idx = i * n;
1051
1052 for j in 0..n {
1053 let data_idx = all_indices[start_idx + j];
1054 resample[j] = data[data_idx];
1055 }
1056
1057 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
1058 }
1059
1060 Ok(bootstrap_samples)
1061 }
1062
1063 fn generate_normal_sample(&mut self, n: usize, mean: f64, std: f64) -> StatsResult<Array1<F>> {
1065 let mut sample = Array1::zeros(n);
1066
1067 for i in 0..n {
1068 let u1 = self.rng.random::<f64>();
1069 let u2 = self.rng.random::<f64>();
1070 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
1071 sample[i] = F::from(mean + std * z).expect("Failed to convert to float");
1072 }
1073
1074 Ok(sample)
1075 }
1076
1077 fn generate_exponential_sample(&mut self, n: usize, rate: f64) -> StatsResult<Array1<F>> {
1079 let mut sample = Array1::zeros(n);
1080
1081 for i in 0..n {
1082 let u = self.rng.random::<f64>();
1083 let x = -u.ln() / rate;
1084 sample[i] = F::from(x).expect("Failed to convert to float");
1085 }
1086
1087 Ok(sample)
1088 }
1089
1090 fn generate_gamma_sample(
1092 &mut self,
1093 n: usize,
1094 shape: f64,
1095 scale: f64,
1096 ) -> StatsResult<Array1<F>> {
1097 let mut sample = Array1::zeros(n);
1099
1100 for i in 0..n {
1101 let mean = shape * scale;
1103 let std = (shape * scale * scale).sqrt();
1104 let u1 = self.rng.random::<f64>();
1105 let u2 = self.rng.random::<f64>();
1106 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
1107 sample[i] = F::from((mean + std * z).max(0.0)).expect("Operation failed");
1108 }
1109
1110 Ok(sample)
1111 }
1112
1113 fn generate_beta_sample(&mut self, n: usize, alpha: f64, beta: f64) -> StatsResult<Array1<F>> {
1115 let mut sample = Array1::zeros(n);
1117
1118 for i in 0..n {
1119 let x = self.rng.random::<f64>().powf(1.0 / alpha);
1120 let y = self.rng.random::<f64>().powf(1.0 / beta);
1121 let value = x / (x + y);
1122 sample[i] = F::from(value).expect("Failed to convert to float");
1123 }
1124
1125 Ok(sample)
1126 }
1127
1128 fn optimal_block_length(&self, n: usize) -> usize {
1130 let length = (n as f64).powf(1.0 / 3.0).ceil() as usize;
1132 std::cmp::max(1, std::cmp::min(length, n / 4))
1133 }
1134
1135 fn compute_effective_samplesize(&self, n: usize) -> usize {
1137 let block_length = self
1138 .config
1139 .block_length
1140 .unwrap_or_else(|| self.optimal_block_length(n));
1141
1142 let correlation_factor = 1.0 - (block_length as f64 - 1.0) / (2.0 * n as f64);
1144 (n as f64 * correlation_factor).ceil() as usize
1145 }
1146
1147 fn compute_confidence_intervals(
1149 &self,
1150 bootstrap_samples: &Array1<F>,
1151 original_statistic: F,
1152 _standard_error: F,
1153 ) -> StatsResult<BootstrapConfidenceIntervals<F>> {
1154 let alpha = 1.0 - self.config.confidence_level;
1155 let lower_percentile = alpha / 2.0;
1156 let upper_percentile = 1.0 - alpha / 2.0;
1157
1158 let mut sorted_samples = bootstrap_samples.to_vec();
1160 sorted_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1161
1162 let n = sorted_samples.len();
1163 let lower_idx = ((n as f64) * lower_percentile).floor() as usize;
1164 let upper_idx = ((n as f64) * upper_percentile).ceil() as usize - 1;
1165
1166 let percentile = (
1168 sorted_samples[lower_idx],
1169 sorted_samples[upper_idx.min(n - 1)],
1170 );
1171
1172 let basic = (
1174 F::from(2.0).expect("Failed to convert constant to float") * original_statistic
1175 - sorted_samples[upper_idx.min(n - 1)],
1176 F::from(2.0).expect("Failed to convert constant to float") * original_statistic
1177 - sorted_samples[lower_idx],
1178 );
1179
1180 let bias_corrected = if self.config.bias_correction {
1182 let bias_correction =
1183 self.compute_bias_correction(bootstrap_samples, original_statistic);
1184 Some((
1185 percentile.0 + bias_correction,
1186 percentile.1 + bias_correction,
1187 ))
1188 } else {
1189 None
1190 };
1191
1192 let bias_corrected_accelerated = if self.config.acceleration_correction {
1194 bias_corrected
1196 } else {
1197 None
1198 };
1199
1200 Ok(BootstrapConfidenceIntervals {
1201 percentile,
1202 basic,
1203 bias_corrected,
1204 bias_corrected_accelerated,
1205 studentized: None, })
1207 }
1208
1209 fn compute_bias_correction(&self, bootstrap_samples: &Array1<F>, originalstatistic: F) -> F {
1211 let _count_below = bootstrap_samples
1212 .iter()
1213 .filter(|&&x| x < originalstatistic)
1214 .count();
1215
1216 let _proportion = _count_below as f64 / bootstrap_samples.len() as f64;
1217
1218 let bootstrap_mean = self.compute_mean(bootstrap_samples);
1220 bootstrap_mean - originalstatistic
1221 }
1222
1223 fn compute_diagnostics(
1225 &self,
1226 bootstrap_samples: &Array1<F>,
1227 original_statistic: F,
1228 ) -> StatsResult<BootstrapDiagnostics<F>> {
1229 let distribution_stats = self.compute_distribution_stats(bootstrap_samples)?;
1230 let quality_metrics =
1231 self.compute_quality_metrics(bootstrap_samples, original_statistic)?;
1232 let convergence_info = self.compute_convergence_info(bootstrap_samples)?;
1233 let method_specific = HashMap::new(); Ok(BootstrapDiagnostics {
1236 distribution_stats,
1237 quality_metrics,
1238 convergence_info,
1239 method_specific,
1240 })
1241 }
1242
1243 fn compute_distribution_stats(
1245 &self,
1246 samples: &Array1<F>,
1247 ) -> StatsResult<BootstrapDistributionStats<F>> {
1248 let mean = self.compute_mean(samples);
1249 let std = self.compute_std(samples);
1250
1251 let skewness = if std > F::zero() {
1253 let skew_sum = samples
1254 .iter()
1255 .map(|&x| {
1256 let z = (x - mean) / std;
1257 z * z * z
1258 })
1259 .fold(F::zero(), |acc, x| acc + x);
1260 skew_sum / F::from(samples.len()).expect("Operation failed")
1261 } else {
1262 F::zero()
1263 };
1264
1265 let kurtosis = if std > F::zero() {
1267 let kurt_sum = samples
1268 .iter()
1269 .map(|&x| {
1270 let z = (x - mean) / std;
1271 z * z * z * z
1272 })
1273 .fold(F::zero(), |acc, x| acc + x);
1274 kurt_sum / F::from(samples.len()).expect("Operation failed")
1275 - F::from(3.0).expect("Failed to convert constant to float")
1276 } else {
1277 F::zero()
1278 };
1279
1280 let min_value = samples.iter().copied().fold(F::infinity(), F::min);
1282 let max_value = samples.iter().copied().fold(F::neg_infinity(), F::max);
1283
1284 Ok(BootstrapDistributionStats {
1285 skewness,
1286 kurtosis,
1287 jarque_bera: F::zero(), anderson_darling: F::zero(), min_value,
1290 max_value,
1291 })
1292 }
1293
1294 fn compute_quality_metrics(
1296 &self,
1297 samples: &Array1<F>,
1298 _original_statistic: F,
1299 ) -> StatsResult<QualityMetrics<F>> {
1300 let std_error = self.compute_std(samples);
1301 let mc_std_error =
1302 std_error / F::from((samples.len() as f64).sqrt()).expect("Operation failed");
1303
1304 Ok(QualityMetrics {
1305 mc_standard_error: mc_std_error,
1306 coverage_probability: F::from(self.config.confidence_level)
1307 .expect("Failed to convert to float"),
1308 efficiency: None, stability: F::one(), })
1311 }
1312
1313 fn compute_convergence_info(&self, samples: &Array1<F>) -> StatsResult<ConvergenceInfo<F>> {
1315 let converged = samples.len() >= 100; Ok(ConvergenceInfo {
1319 converged,
1320 convergence_samplesize: if converged { Some(samples.len()) } else { None },
1321 mean_stability: F::one(), variance_stability: F::one(), })
1324 }
1325
1326 fn compute_mean(&self, data: &Array1<F>) -> F {
1328 if data.is_empty() {
1329 F::zero()
1330 } else {
1331 data.sum() / F::from(data.len()).expect("Operation failed")
1332 }
1333 }
1334
1335 fn compute_std(&self, data: &Array1<F>) -> F {
1337 if data.len() <= 1 {
1338 return F::zero();
1339 }
1340
1341 let mean = self.compute_mean(data);
1342 let variance = data
1343 .iter()
1344 .map(|&x| (x - mean) * (x - mean))
1345 .fold(F::zero(), |acc, x| acc + x)
1346 / F::from(data.len() - 1).expect("Operation failed");
1347
1348 variance.sqrt()
1349 }
1350}
1351
1352#[allow(dead_code)]
1354pub fn stratified_bootstrap<F, T>(
1355 data: &ArrayView1<F>,
1356 strata: &[usize],
1357 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1358 config: Option<AdvancedBootstrapConfig>,
1359) -> StatsResult<AdvancedBootstrapResult<F>>
1360where
1361 F: Float
1362 + NumCast
1363 + SimdUnifiedOps
1364 + Zero
1365 + One
1366 + FromPrimitive
1367 + Copy
1368 + Send
1369 + Sync
1370 + std::fmt::Display,
1371 T: Into<F> + Copy + Send + Sync,
1372{
1373 let mut config = config.unwrap_or_default();
1374 config.bootstrap_type = BootstrapType::Stratified {
1375 strata: strata.to_vec(),
1376 };
1377
1378 let mut processor = AdvancedBootstrapProcessor::new(config);
1379 processor.bootstrap(data, statistic_fn)
1380}
1381
1382#[allow(dead_code)]
1384pub fn block_bootstrap<F, T>(
1385 data: &ArrayView1<F>,
1386 block_type: BlockType,
1387 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1388 config: Option<AdvancedBootstrapConfig>,
1389) -> StatsResult<AdvancedBootstrapResult<F>>
1390where
1391 F: Float
1392 + NumCast
1393 + SimdUnifiedOps
1394 + Zero
1395 + One
1396 + FromPrimitive
1397 + Copy
1398 + Send
1399 + Sync
1400 + std::fmt::Display,
1401 T: Into<F> + Copy + Send + Sync,
1402{
1403 let mut config = config.unwrap_or_default();
1404 config.bootstrap_type = BootstrapType::Block { block_type };
1405
1406 let mut processor = AdvancedBootstrapProcessor::new(config);
1407 processor.bootstrap(data, statistic_fn)
1408}
1409
1410#[allow(dead_code)]
1412pub fn moving_block_bootstrap<F, T>(
1413 data: &ArrayView1<F>,
1414 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1415 block_length: Option<usize>,
1416 n_bootstrap: Option<usize>,
1417) -> StatsResult<AdvancedBootstrapResult<F>>
1418where
1419 F: Float
1420 + NumCast
1421 + SimdUnifiedOps
1422 + Zero
1423 + One
1424 + FromPrimitive
1425 + Copy
1426 + Send
1427 + Sync
1428 + std::fmt::Display,
1429 T: Into<F> + Copy + Send + Sync,
1430{
1431 let mut config = AdvancedBootstrapConfig::default();
1432 config.bootstrap_type = BootstrapType::Block {
1433 block_type: BlockType::Moving,
1434 };
1435 config.block_length = block_length;
1436 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1437
1438 let mut processor = AdvancedBootstrapProcessor::new(config);
1439 processor.bootstrap(data, statistic_fn)
1440}
1441
1442#[allow(dead_code)]
1444pub fn circular_block_bootstrap<F, T>(
1445 data: &ArrayView1<F>,
1446 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1447 block_length: Option<usize>,
1448 n_bootstrap: Option<usize>,
1449) -> StatsResult<AdvancedBootstrapResult<F>>
1450where
1451 F: Float
1452 + NumCast
1453 + SimdUnifiedOps
1454 + Zero
1455 + One
1456 + FromPrimitive
1457 + Copy
1458 + Send
1459 + Sync
1460 + std::fmt::Display,
1461 T: Into<F> + Copy + Send + Sync,
1462{
1463 let mut config = AdvancedBootstrapConfig::default();
1464 config.bootstrap_type = BootstrapType::Block {
1465 block_type: BlockType::Circular,
1466 };
1467 config.block_length = block_length;
1468 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1469
1470 let mut processor = AdvancedBootstrapProcessor::new(config);
1471 processor.bootstrap(data, statistic_fn)
1472}
1473
1474#[allow(dead_code)]
1476pub fn stationary_bootstrap<F, T>(
1477 data: &ArrayView1<F>,
1478 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1479 expected_block_length: f64,
1480 n_bootstrap: Option<usize>,
1481) -> StatsResult<AdvancedBootstrapResult<F>>
1482where
1483 F: Float
1484 + NumCast
1485 + SimdUnifiedOps
1486 + Zero
1487 + One
1488 + FromPrimitive
1489 + Copy
1490 + Send
1491 + Sync
1492 + std::fmt::Display,
1493 T: Into<F> + Copy + Send + Sync,
1494{
1495 let mut config = AdvancedBootstrapConfig::default();
1496 config.bootstrap_type = BootstrapType::Block {
1497 block_type: BlockType::Stationary {
1498 expected_length: expected_block_length,
1499 },
1500 };
1501 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1502
1503 let mut processor = AdvancedBootstrapProcessor::new(config);
1504 processor.bootstrap(data, statistic_fn)
1505}
1506
1507#[cfg(test)]
1508mod tests {
1509 use super::*;
1510 use scirs2_core::ndarray::array;
1511
1512 #[test]
1513 fn test_basicbootstrap() {
1514 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1515 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1516
1517 let config = AdvancedBootstrapConfig {
1518 n_bootstrap: 100,
1519 seed: Some(42),
1520 ..Default::default()
1521 };
1522
1523 let mut processor = AdvancedBootstrapProcessor::new(config);
1524 let result = processor
1525 .bootstrap(&data.view(), mean_fn)
1526 .expect("Operation failed");
1527
1528 assert_eq!(result.n_successful, 100);
1529 assert!(result.bootstrap_samples.len() == 100);
1530 assert!((result.original_statistic - 3.0).abs() < 1e-10);
1531 }
1532
1533 #[test]
1534 fn test_stratifiedbootstrap() {
1535 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
1536 let strata = vec![0, 0, 1, 1, 2, 2]; let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1538
1539 let result = stratified_bootstrap(
1540 &data.view(),
1541 &strata,
1542 mean_fn,
1543 Some(AdvancedBootstrapConfig {
1544 n_bootstrap: 50,
1545 seed: Some(123),
1546 ..Default::default()
1547 }),
1548 )
1549 .expect("Operation failed");
1550
1551 assert_eq!(result.n_successful, 50);
1552 assert!(matches!(result.method, BootstrapType::Stratified { .. }));
1553 }
1554
1555 #[test]
1556 fn test_moving_blockbootstrap() {
1557 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1558 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1559
1560 let result = moving_block_bootstrap(
1561 &data.view(),
1562 mean_fn,
1563 Some(3), Some(50), )
1566 .expect("Operation failed");
1567
1568 assert_eq!(result.n_successful, 50);
1569 assert!(result.effective_samplesize.is_some());
1570 }
1571
1572 #[test]
1573 fn test_circular_blockbootstrap() {
1574 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
1575 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1576
1577 let result = circular_block_bootstrap(&data.view(), mean_fn, Some(2), Some(30))
1578 .expect("Operation failed");
1579
1580 assert_eq!(result.n_successful, 30);
1581 }
1582
1583 #[test]
1584 fn test_confidence_intervals() {
1585 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1586 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1587
1588 let config = AdvancedBootstrapConfig {
1589 n_bootstrap: 200,
1590 confidence_level: 0.95,
1591 seed: Some(42),
1592 ..Default::default()
1593 };
1594
1595 let mut processor = AdvancedBootstrapProcessor::new(config);
1596 let result = processor
1597 .bootstrap(&data.view(), mean_fn)
1598 .expect("Operation failed");
1599
1600 let ci = &result.confidence_intervals;
1601 assert!(ci.percentile.0 <= ci.percentile.1);
1602 assert!(ci.basic.0 <= ci.basic.1);
1603 }
1604
1605 #[test]
1606 fn test_bootstrap_diagnostics() {
1607 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1608 let var_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> {
1609 let mean = x.sum() / x.len() as f64;
1610 let var = x.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (x.len() - 1) as f64;
1611 Ok(var)
1612 };
1613
1614 let config = AdvancedBootstrapConfig {
1615 n_bootstrap: 100,
1616 seed: Some(456),
1617 ..Default::default()
1618 };
1619
1620 let mut processor = AdvancedBootstrapProcessor::new(config);
1621 let result = processor
1622 .bootstrap(&data.view(), var_fn)
1623 .expect("Operation failed");
1624
1625 assert!(result.diagnostics.convergence_info.converged);
1626 assert!(
1627 result.diagnostics.distribution_stats.min_value
1628 <= result.diagnostics.distribution_stats.max_value
1629 );
1630 }
1631}