1use crate::error::{StatsError, StatsResult};
8use scirs2_core::ndarray::{Array1, ArrayView1};
9use scirs2_core::numeric::{Float, FromPrimitive, NumCast, One, Zero};
10use scirs2_core::{parallel_ops::*, random::prelude::*, simd_ops::SimdUnifiedOps, validation::*};
11use std::collections::HashMap;
12use std::marker::PhantomData;
13
14#[derive(Debug, Clone)]
16pub struct AdvancedBootstrapConfig {
17 pub n_bootstrap: usize,
19 pub seed: Option<u64>,
21 pub bootstrap_type: BootstrapType,
23 pub parallel: bool,
25 pub confidence_level: f64,
27 pub block_length: Option<usize>,
29 pub bias_correction: bool,
31 pub acceleration_correction: bool,
33 pub max_threads: Option<usize>,
35}
36
37impl Default for AdvancedBootstrapConfig {
38 fn default() -> Self {
39 Self {
40 n_bootstrap: 1000,
41 seed: None,
42 bootstrap_type: BootstrapType::Basic,
43 parallel: true,
44 confidence_level: 0.95,
45 block_length: None,
46 bias_correction: true,
47 acceleration_correction: true,
48 max_threads: None,
49 }
50 }
51}
52
53#[derive(Debug, Clone, PartialEq)]
55pub enum BootstrapType {
56 Basic,
58 Stratified {
60 strata: Vec<usize>,
62 },
63 Block {
65 block_type: BlockType,
67 },
68 Bayesian,
70 Wild {
72 distribution: WildDistribution,
74 },
75 Parametric {
77 distribution_params: ParametricBootstrapParams,
79 },
80 Balanced,
82}
83
84#[derive(Debug, Clone, PartialEq)]
86pub enum BlockType {
87 Moving,
89 Circular,
91 NonOverlapping,
93 Stationary {
95 expected_length: f64,
97 },
98 Tapered {
100 taper_function: TaperFunction,
102 },
103}
104
105#[derive(Debug, Clone, PartialEq)]
107pub enum TaperFunction {
108 Linear,
110 Cosine,
112 Exponential { decay_rate: f64 },
114}
115
116#[derive(Debug, Clone, PartialEq)]
118pub enum WildDistribution {
119 Rademacher,
121 Mammen,
123 Normal,
125 TwoPoint { prob_positive: f64 },
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum ParametricBootstrapParams {
132 Normal { mean: f64, std: f64 },
134 Exponential { rate: f64 },
136 Gamma { shape: f64, scale: f64 },
138 Beta { alpha: f64, beta: f64 },
140 Custom {
142 name: String,
144 params: HashMap<String, f64>,
146 },
147}
148
149#[derive(Debug, Clone)]
151pub struct AdvancedBootstrapResult<F> {
152 pub bootstrap_samples: Array1<F>,
154 pub original_statistic: F,
156 pub bootstrap_mean: F,
158 pub standard_error: F,
160 pub bias: F,
162 pub confidence_intervals: BootstrapConfidenceIntervals<F>,
164 pub method: BootstrapType,
166 pub n_successful: usize,
168 pub effective_samplesize: Option<usize>,
170 pub diagnostics: BootstrapDiagnostics<F>,
172}
173
174#[derive(Debug, Clone)]
176pub struct BootstrapConfidenceIntervals<F> {
177 pub percentile: (F, F),
179 pub basic: (F, F),
181 pub bias_corrected: Option<(F, F)>,
183 pub bias_corrected_accelerated: Option<(F, F)>,
185 pub studentized: Option<(F, F)>,
187}
188
189#[derive(Debug, Clone)]
191pub struct BootstrapDiagnostics<F> {
192 pub distribution_stats: BootstrapDistributionStats<F>,
194 pub quality_metrics: QualityMetrics<F>,
196 pub convergence_info: ConvergenceInfo<F>,
198 pub method_specific: HashMap<String, F>,
200}
201
202#[derive(Debug, Clone)]
204pub struct BootstrapDistributionStats<F> {
205 pub skewness: F,
207 pub kurtosis: F,
209 pub jarque_bera: F,
211 pub anderson_darling: F,
213 pub min_value: F,
215 pub max_value: F,
217}
218
219#[derive(Debug, Clone)]
221pub struct QualityMetrics<F> {
222 pub mc_standard_error: F,
224 pub coverage_probability: F,
226 pub efficiency: Option<F>,
228 pub stability: F,
230}
231
232#[derive(Debug, Clone)]
234pub struct ConvergenceInfo<F> {
235 pub converged: bool,
237 pub convergence_samplesize: Option<usize>,
239 pub mean_stability: F,
241 pub variance_stability: F,
243}
244
245pub struct AdvancedBootstrapProcessor<F> {
247 config: AdvancedBootstrapConfig,
248 rng: StdRng,
249 _phantom: PhantomData<F>,
250}
251
252impl<F> AdvancedBootstrapProcessor<F>
253where
254 F: Float
255 + NumCast
256 + SimdUnifiedOps
257 + Zero
258 + One
259 + FromPrimitive
260 + Copy
261 + Send
262 + Sync
263 + std::fmt::Display,
264{
265 pub fn new(config: AdvancedBootstrapConfig) -> Self {
267 let rng = match config.seed {
268 Some(seed) => StdRng::seed_from_u64(seed),
269 None => StdRng::from_rng(&mut thread_rng()),
270 };
271
272 Self {
273 config,
274 rng,
275 _phantom: PhantomData,
276 }
277 }
278
279 pub fn bootstrap<T>(
281 &mut self,
282 data: &ArrayView1<F>,
283 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
284 ) -> StatsResult<AdvancedBootstrapResult<F>>
285 where
286 T: Into<F> + Copy + Send + Sync,
287 {
288 checkarray_finite(data, "data")?;
289
290 if data.is_empty() {
291 return Err(StatsError::InvalidArgument(
292 "Data cannot be empty".to_string(),
293 ));
294 }
295
296 let original_statistic = statistic_fn(data)?.into();
298
299 let bootstrap_type = self.config.bootstrap_type.clone();
301 let bootstrap_samples = match bootstrap_type {
302 BootstrapType::Basic => self.basic_bootstrap(data, statistic_fn)?,
303 BootstrapType::Stratified { strata } => {
304 self.stratified_bootstrap(data, &strata, statistic_fn)?
305 }
306 BootstrapType::Block { block_type } => {
307 self.block_bootstrap(data, &block_type, statistic_fn)?
308 }
309 BootstrapType::Bayesian => self.bayesian_bootstrap(data, statistic_fn)?,
310 BootstrapType::Wild { distribution } => {
311 self.wild_bootstrap(data, &distribution, statistic_fn)?
312 }
313 BootstrapType::Parametric {
314 distribution_params,
315 } => self.parametric_bootstrap(data, &distribution_params, statistic_fn)?,
316 BootstrapType::Balanced => self.balanced_bootstrap(data, statistic_fn)?,
317 };
318
319 let bootstrap_mean = self.compute_mean(&bootstrap_samples);
321 let standard_error = self.compute_std(&bootstrap_samples);
322 let bias = bootstrap_mean - original_statistic;
323
324 let confidence_intervals = self.compute_confidence_intervals(
326 &bootstrap_samples,
327 original_statistic,
328 standard_error,
329 )?;
330
331 let diagnostics = self.compute_diagnostics(&bootstrap_samples, original_statistic)?;
333
334 let effective_samplesize = match &self.config.bootstrap_type {
336 BootstrapType::Block { .. } => Some(self.compute_effective_samplesize(data.len())),
337 _ => None,
338 };
339
340 Ok(AdvancedBootstrapResult {
341 bootstrap_samples,
342 original_statistic,
343 bootstrap_mean,
344 standard_error,
345 bias,
346 confidence_intervals,
347 method: self.config.bootstrap_type.clone(),
348 n_successful: self.config.n_bootstrap,
349 effective_samplesize,
350 diagnostics,
351 })
352 }
353
354 fn basic_bootstrap<T>(
356 &mut self,
357 data: &ArrayView1<F>,
358 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
359 ) -> StatsResult<Array1<F>>
360 where
361 T: Into<F> + Copy + Send + Sync,
362 {
363 let n = data.len();
364 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
365
366 if self.config.n_bootstrap >= 64 && n >= 32 {
368 return self.basic_bootstrap_simd_ultra(data, statistic_fn);
369 }
370
371 if self.config.parallel && self.config.n_bootstrap > 100 {
372 let samples: Result<Vec<_>, _> = (0..self.config.n_bootstrap)
374 .into_par_iter()
375 .map(|_| {
376 let mut local_rng = { StdRng::from_rng(&mut thread_rng()) };
377 let mut resample = Array1::zeros(n);
378
379 for i in 0..n {
380 let idx = local_rng.gen_range(0..n);
381 resample[i] = data[idx];
382 }
383
384 statistic_fn(&resample.view()).map(|s| s.into())
385 })
386 .collect();
387
388 let sample_values = samples?;
389 for (i, value) in sample_values.into_iter().enumerate() {
390 bootstrap_samples[i] = value;
391 }
392 } else {
393 for i in 0..self.config.n_bootstrap {
395 let mut resample = Array1::zeros(n);
396
397 for j in 0..n {
398 let idx = self.rng.gen_range(0..n);
399 resample[j] = data[idx];
400 }
401
402 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
403 }
404 }
405
406 Ok(bootstrap_samples)
407 }
408
409 fn basic_bootstrap_simd_ultra<T>(
411 &mut self,
412 data: &ArrayView1<F>,
413 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
414 ) -> StatsResult<Array1<F>>
415 where
416 T: Into<F> + Copy + Send + Sync,
417 {
418 use scirs2_core::simd_ops::PlatformCapabilities;
419
420 let capabilities = PlatformCapabilities::detect();
421 let n: usize = data.len();
422 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
423
424 let chunk_size: usize = if capabilities.has_avx512() {
426 16
427 } else if capabilities.has_avx2() {
428 8
429 } else {
430 4
431 };
432 let num_chunks: usize = self.config.n_bootstrap.div_ceil(chunk_size);
433
434 let mut resample_indices = Vec::<usize>::with_capacity(chunk_size * n);
436 let mut resample_values = Vec::<F>::with_capacity(chunk_size * n);
437 let mut batch_statistics = Vec::with_capacity(chunk_size);
438
439 if self.config.parallel && num_chunks > 1 {
440 let chunk_results: Result<Vec<_>, _> = (0..num_chunks)
442 .into_par_iter()
443 .map(|chunk_idx| {
444 let start_bootstrap = chunk_idx * chunk_size;
445 let end_bootstrap =
446 std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
447 let current_chunk_size = end_bootstrap - start_bootstrap;
448
449 let mut local_rng = StdRng::from_rng(&mut thread_rng());
450 let mut chunk_statistics = Vec::with_capacity(current_chunk_size);
451
452 let mut local_indices = Vec::with_capacity(current_chunk_size * n);
454 for _ in 0..current_chunk_size {
455 for _ in 0..n {
456 local_indices.push(local_rng.gen_range(0..n));
457 }
458 }
459
460 let mut local_values = Vec::with_capacity(current_chunk_size * n);
462 if capabilities.has_avx2() && n >= 8 {
463 for bootstrap_idx in 0..current_chunk_size {
465 let indices_start = bootstrap_idx * n;
466 let indices_slice = &local_indices[indices_start..indices_start + n];
467
468 let data_f32: Vec<f32> =
470 data.iter().map(|&x| x.to_f64().unwrap() as f32).collect();
471
472 let mut gathered_values = vec![0.0f32; n];
474 for (i, &idx) in indices_slice.iter().enumerate() {
475 gathered_values[i] = data_f32[idx];
476 }
477
478 local_values.extend(gathered_values);
479 }
480 } else {
481 for &idx in &local_indices {
483 local_values.push(data[idx].to_f64().unwrap() as f32);
484 }
485 }
486
487 for bootstrap_idx in 0..current_chunk_size {
489 let values_start = bootstrap_idx * n;
490 let values_slice = &local_values[values_start..values_start + n];
491
492 let mut resample = Array1::zeros(n);
494 for (i, &val) in values_slice.iter().enumerate() {
495 resample[i] = F::from(val as f64).unwrap();
496 }
497
498 let statistic = statistic_fn(&resample.view())?.into();
499 chunk_statistics.push(statistic);
500 }
501
502 Ok::<Vec<F>, StatsError>(chunk_statistics)
503 })
504 .collect();
505
506 let all_chunk_results = chunk_results?;
507 let mut result_idx = 0;
508 for chunk_result in all_chunk_results {
509 for statistic in chunk_result {
510 if result_idx < self.config.n_bootstrap {
511 bootstrap_samples[result_idx] = statistic;
512 result_idx += 1;
513 }
514 }
515 }
516 } else {
517 for chunk_idx in 0..num_chunks {
519 let start_bootstrap = chunk_idx * chunk_size;
520 let end_bootstrap =
521 std::cmp::min(start_bootstrap + chunk_size, self.config.n_bootstrap);
522 let current_chunk_size = end_bootstrap - start_bootstrap;
523
524 if current_chunk_size == 0 {
525 break;
526 }
527
528 resample_indices.clear();
530 for _ in 0..current_chunk_size {
531 for _ in 0..n {
532 resample_indices.push(self.rng.gen_range(0..n));
533 }
534 }
535
536 resample_values.clear();
538 if capabilities.has_avx2() && n >= 8 {
539 let data_f32: Vec<f32> =
541 data.iter().map(|&x| x.to_f64().unwrap() as f32).collect();
542
543 for bootstrap_idx in 0..current_chunk_size {
544 let indices_start = bootstrap_idx * n;
545 let indices_slice = &resample_indices[indices_start..indices_start + n];
546
547 for &idx in indices_slice {
548 resample_values.push(F::from(data_f32[idx]).unwrap());
549 }
550 }
551 } else {
552 for &idx in &resample_indices {
554 resample_values.push(data[idx]);
555 }
556 }
557
558 batch_statistics.clear();
560 for bootstrap_idx in 0..current_chunk_size {
561 let values_start = bootstrap_idx * n;
562 let values_slice = &resample_values[values_start..values_start + n];
563
564 let mut resample = Array1::zeros(n);
566 for (i, &val) in values_slice.iter().enumerate() {
567 resample[i] = val;
568 }
569
570 let statistic = statistic_fn(&resample.view())?.into();
571 batch_statistics.push(statistic);
572 }
573
574 for (i, &statistic) in batch_statistics.iter().enumerate() {
576 let result_idx = start_bootstrap + i;
577 if result_idx < self.config.n_bootstrap {
578 bootstrap_samples[result_idx] = statistic;
579 }
580 }
581 }
582 }
583
584 Ok(bootstrap_samples)
585 }
586
587 fn stratified_bootstrap<T>(
589 &mut self,
590 data: &ArrayView1<F>,
591 strata: &[usize],
592 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
593 ) -> StatsResult<Array1<F>>
594 where
595 T: Into<F> + Copy + Send + Sync,
596 {
597 if data.len() != strata.len() {
598 return Err(StatsError::DimensionMismatch(
599 "Data and strata must have same length".to_string(),
600 ));
601 }
602
603 let mut strata_groups: HashMap<usize, Vec<(usize, F)>> = HashMap::new();
605 for (i, (&value, &stratum)) in data.iter().zip(strata.iter()).enumerate() {
606 strata_groups.entry(stratum).or_default().push((i, value));
607 }
608
609 let n = data.len();
610 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
611
612 for i in 0..self.config.n_bootstrap {
613 let mut resample = Array1::zeros(n);
614 let mut resample_idx = 0;
615
616 for groupdata in strata_groups.values() {
618 let groupsize = groupdata.len();
619
620 for _ in 0..groupsize {
621 let idx = self.rng.gen_range(0..groupsize);
622 resample[resample_idx] = groupdata[idx].1;
623 resample_idx += 1;
624 }
625 }
626
627 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
628 }
629
630 Ok(bootstrap_samples)
631 }
632
633 fn block_bootstrap<T>(
635 &mut self,
636 data: &ArrayView1<F>,
637 block_type: &BlockType,
638 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
639 ) -> StatsResult<Array1<F>>
640 where
641 T: Into<F> + Copy + Send + Sync,
642 {
643 let n = data.len();
644 let block_length = self
645 .config
646 .block_length
647 .unwrap_or_else(|| self.optimal_block_length(n));
648
649 if block_length >= n {
650 return Err(StatsError::InvalidArgument(
651 "Block length must be less than data length".to_string(),
652 ));
653 }
654
655 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
656
657 for i in 0..self.config.n_bootstrap {
658 let resample = match block_type {
659 BlockType::Moving => self.moving_blockbootstrap(data, block_length)?,
660 BlockType::Circular => self.circular_blockbootstrap(data, block_length)?,
661 BlockType::NonOverlapping => {
662 self.non_overlapping_blockbootstrap(data, block_length)?
663 }
664 BlockType::Stationary { expected_length } => {
665 self.stationarybootstrap(data, *expected_length)?
666 }
667 BlockType::Tapered { taper_function } => {
668 self.tapered_blockbootstrap(data, block_length, taper_function)?
669 }
670 };
671
672 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
673 }
674
675 Ok(bootstrap_samples)
676 }
677
678 fn moving_blockbootstrap(
680 &mut self,
681 data: &ArrayView1<F>,
682 block_length: usize,
683 ) -> StatsResult<Array1<F>> {
684 let n = data.len();
685 let n_blocks = n.div_ceil(block_length); let mut resample = Array1::zeros(n);
687 let mut pos = 0;
688
689 for _ in 0..n_blocks {
690 if pos >= n {
691 break;
692 }
693
694 let start_idx = self.rng.gen_range(0..(n - block_length));
695 let copy_length = std::cmp::min(block_length, n - pos);
696
697 for i in 0..copy_length {
698 resample[pos + i] = data[start_idx + i];
699 }
700 pos += copy_length;
701 }
702
703 Ok(resample)
704 }
705
706 fn circular_blockbootstrap(
708 &mut self,
709 data: &ArrayView1<F>,
710 block_length: usize,
711 ) -> StatsResult<Array1<F>> {
712 let n = data.len();
713 let n_blocks = n.div_ceil(block_length);
714 let mut resample = Array1::zeros(n);
715 let mut pos = 0;
716
717 for _ in 0..n_blocks {
718 if pos >= n {
719 break;
720 }
721
722 let start_idx = self.rng.gen_range(0..n);
723 let copy_length = std::cmp::min(block_length, n - pos);
724
725 for i in 0..copy_length {
726 let idx = (start_idx + i) % n; resample[pos + i] = data[idx];
728 }
729 pos += copy_length;
730 }
731
732 Ok(resample)
733 }
734
735 fn non_overlapping_blockbootstrap(
737 &mut self,
738 data: &ArrayView1<F>,
739 block_length: usize,
740 ) -> StatsResult<Array1<F>> {
741 let n = data.len();
742 let n_complete_blocks = n / block_length;
743 let remainder = n % block_length;
744
745 let mut blocks = Vec::new();
747 for i in 0..n_complete_blocks {
748 let start = i * block_length;
749 let end = start + block_length;
750 blocks.push(data.slice(scirs2_core::ndarray::s![start..end]).to_owned());
751 }
752
753 if remainder > 0 {
755 let start = n_complete_blocks * block_length;
756 blocks.push(data.slice(scirs2_core::ndarray::s![start..]).to_owned());
757 }
758
759 let mut resample = Array1::zeros(n);
761 let mut pos = 0;
762
763 while pos < n {
764 let block_idx = self.rng.gen_range(0..blocks.len());
765 let block = &blocks[block_idx];
766 let copy_length = std::cmp::min(block.len(), n - pos);
767
768 for i in 0..copy_length {
769 resample[pos + i] = block[i];
770 }
771 pos += copy_length;
772 }
773
774 Ok(resample)
775 }
776
777 fn stationarybootstrap(
779 &mut self,
780 data: &ArrayView1<F>,
781 expected_length: f64,
782 ) -> StatsResult<Array1<F>> {
783 let n = data.len();
784 let p = 1.0 / expected_length; let mut resample = Array1::zeros(n);
786 let mut pos = 0;
787
788 while pos < n {
789 let start_idx = self.rng.gen_range(0..n);
790 let mut block_length = 1;
791
792 while self.rng.random::<f64>() > p && block_length < n - pos {
794 block_length += 1;
795 }
796
797 for i in 0..block_length {
799 if pos + i >= n {
800 break;
801 }
802 let idx = (start_idx + i) % n;
803 resample[pos + i] = data[idx];
804 }
805
806 pos += block_length;
807 }
808
809 Ok(resample)
810 }
811
812 fn tapered_blockbootstrap(
814 &mut self,
815 data: &ArrayView1<F>,
816 block_length: usize,
817 taper_function: &TaperFunction,
818 ) -> StatsResult<Array1<F>> {
819 let n = data.len();
820 let mut resample = Array1::zeros(n);
821 let n_blocks = n.div_ceil(block_length);
822 let mut pos = 0;
823
824 for _ in 0..n_blocks {
825 if pos >= n {
826 break;
827 }
828
829 let start_idx = self.rng.gen_range(0..(n - block_length));
830 let copy_length = std::cmp::min(block_length, n - pos);
831
832 for i in 0..copy_length {
834 let weight = self.compute_taper_weight(i, copy_length, taper_function);
835 let value = data[start_idx + i] * F::from(weight).unwrap();
836
837 if pos + i < resample.len() {
838 resample[pos + i] = resample[pos + i] + value;
839 }
840 }
841 pos += copy_length;
842 }
843
844 Ok(resample)
845 }
846
847 fn compute_taper_weight(
849 &self,
850 position: usize,
851 block_length: usize,
852 taper_function: &TaperFunction,
853 ) -> f64 {
854 let t = position as f64 / (block_length - 1) as f64;
855
856 match taper_function {
857 TaperFunction::Linear => {
858 if t <= 0.5 {
859 2.0 * t
860 } else {
861 2.0 * (1.0 - t)
862 }
863 }
864 TaperFunction::Cosine => 0.5 * (1.0 - (std::f64::consts::PI * t).cos()),
865 TaperFunction::Exponential { decay_rate } => {
866 let distance_from_center = (t - 0.5).abs();
867 (-decay_rate * distance_from_center).exp()
868 }
869 }
870 }
871
872 fn bayesian_bootstrap<T>(
874 &mut self,
875 data: &ArrayView1<F>,
876 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
877 ) -> StatsResult<Array1<F>>
878 where
879 T: Into<F> + Copy + Send + Sync,
880 {
881 let n = data.len();
882 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
883
884 for i in 0..self.config.n_bootstrap {
885 let mut weights = Array1::zeros(n);
887 let mut weight_sum = F::zero();
888
889 for j in 0..n {
890 let exp_sample = -self.rng.random::<f64>().ln(); weights[j] = F::from(exp_sample).unwrap();
892 weight_sum = weight_sum + weights[j];
893 }
894
895 for j in 0..n {
897 weights[j] = weights[j] / weight_sum;
898 }
899
900 let mut resample = Array1::zeros(n);
902 for j in 0..n {
903 resample[j] = data[j] * weights[j] * F::from(n).unwrap(); }
905
906 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
907 }
908
909 Ok(bootstrap_samples)
910 }
911
912 fn wild_bootstrap<T>(
914 &mut self,
915 data: &ArrayView1<F>,
916 distribution: &WildDistribution,
917 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
918 ) -> StatsResult<Array1<F>>
919 where
920 T: Into<F> + Copy + Send + Sync,
921 {
922 let n = data.len();
923 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
924
925 for i in 0..self.config.n_bootstrap {
926 let mut resample = Array1::zeros(n);
927
928 for j in 0..n {
929 let multiplier = match distribution {
930 WildDistribution::Rademacher => {
931 if self.rng.random::<f64>() < 0.5 {
932 -1.0
933 } else {
934 1.0
935 }
936 }
937 WildDistribution::Mammen => {
938 let _golden_ratio = (1.0 + 5.0_f64.sqrt()) / 2.0;
939 let p = (5.0_f64.sqrt() + 1.0) / (2.0 * 5.0_f64.sqrt());
940 if self.rng.random::<f64>() < p {
941 -(5.0_f64.sqrt() - 1.0) / 2.0
942 } else {
943 (5.0_f64.sqrt() + 1.0) / 2.0
944 }
945 }
946 WildDistribution::Normal => {
947 let u1 = self.rng.random::<f64>();
949 let u2 = self.rng.random::<f64>();
950 (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
951 }
952 WildDistribution::TwoPoint { prob_positive } => {
953 if self.rng.random::<f64>() < *prob_positive {
954 1.0
955 } else {
956 -1.0
957 }
958 }
959 };
960
961 resample[j] = data[j] * F::from(multiplier).unwrap();
962 }
963
964 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
965 }
966
967 Ok(bootstrap_samples)
968 }
969
970 fn parametric_bootstrap<T>(
972 &mut self,
973 data: &ArrayView1<F>,
974 distribution_params: &ParametricBootstrapParams,
975 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
976 ) -> StatsResult<Array1<F>>
977 where
978 T: Into<F> + Copy + Send + Sync,
979 {
980 let n = data.len();
981 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
982
983 for i in 0..self.config.n_bootstrap {
984 let resample = match distribution_params {
985 ParametricBootstrapParams::Normal { mean, std } => {
986 self.generate_normal_sample(n, *mean, *std)?
987 }
988 ParametricBootstrapParams::Exponential { rate } => {
989 self.generate_exponential_sample(n, *rate)?
990 }
991 ParametricBootstrapParams::Gamma { shape, scale } => {
992 self.generate_gamma_sample(n, *shape, *scale)?
993 }
994 ParametricBootstrapParams::Beta { alpha, beta } => {
995 self.generate_beta_sample(n, *alpha, *beta)?
996 }
997 ParametricBootstrapParams::Custom { name, .. } => {
998 return Err(StatsError::InvalidArgument(format!(
999 "Custom distribution '{}' not implemented",
1000 name
1001 )));
1002 }
1003 };
1004
1005 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
1006 }
1007
1008 Ok(bootstrap_samples)
1009 }
1010
1011 fn balanced_bootstrap<T>(
1013 &mut self,
1014 data: &ArrayView1<F>,
1015 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1016 ) -> StatsResult<Array1<F>>
1017 where
1018 T: Into<F> + Copy + Send + Sync,
1019 {
1020 let n = data.len();
1021 let mut bootstrap_samples = Array1::zeros(self.config.n_bootstrap);
1022
1023 let total_samples = self.config.n_bootstrap * n;
1025 let mut all_indices = Vec::with_capacity(total_samples);
1026
1027 for _ in 0..self.config.n_bootstrap {
1028 for i in 0..n {
1029 all_indices.push(i);
1030 }
1031 }
1032
1033 for i in (1..all_indices.len()).rev() {
1035 let j = self.rng.gen_range(0..i);
1036 all_indices.swap(i, j);
1037 }
1038
1039 for i in 0..self.config.n_bootstrap {
1041 let mut resample = Array1::zeros(n);
1042 let start_idx = i * n;
1043
1044 for j in 0..n {
1045 let data_idx = all_indices[start_idx + j];
1046 resample[j] = data[data_idx];
1047 }
1048
1049 bootstrap_samples[i] = statistic_fn(&resample.view())?.into();
1050 }
1051
1052 Ok(bootstrap_samples)
1053 }
1054
1055 fn generate_normal_sample(&mut self, n: usize, mean: f64, std: f64) -> StatsResult<Array1<F>> {
1057 let mut sample = Array1::zeros(n);
1058
1059 for i in 0..n {
1060 let u1 = self.rng.random::<f64>();
1061 let u2 = self.rng.random::<f64>();
1062 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
1063 sample[i] = F::from(mean + std * z).unwrap();
1064 }
1065
1066 Ok(sample)
1067 }
1068
1069 fn generate_exponential_sample(&mut self, n: usize, rate: f64) -> StatsResult<Array1<F>> {
1071 let mut sample = Array1::zeros(n);
1072
1073 for i in 0..n {
1074 let u = self.rng.random::<f64>();
1075 let x = -u.ln() / rate;
1076 sample[i] = F::from(x).unwrap();
1077 }
1078
1079 Ok(sample)
1080 }
1081
1082 fn generate_gamma_sample(
1084 &mut self,
1085 n: usize,
1086 shape: f64,
1087 scale: f64,
1088 ) -> StatsResult<Array1<F>> {
1089 let mut sample = Array1::zeros(n);
1091
1092 for i in 0..n {
1093 let mean = shape * scale;
1095 let std = (shape * scale * scale).sqrt();
1096 let u1 = self.rng.random::<f64>();
1097 let u2 = self.rng.random::<f64>();
1098 let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
1099 sample[i] = F::from((mean + std * z).max(0.0)).unwrap();
1100 }
1101
1102 Ok(sample)
1103 }
1104
1105 fn generate_beta_sample(&mut self, n: usize, alpha: f64, beta: f64) -> StatsResult<Array1<F>> {
1107 let mut sample = Array1::zeros(n);
1109
1110 for i in 0..n {
1111 let x = self.rng.random::<f64>().powf(1.0 / alpha);
1112 let y = self.rng.random::<f64>().powf(1.0 / beta);
1113 let value = x / (x + y);
1114 sample[i] = F::from(value).unwrap();
1115 }
1116
1117 Ok(sample)
1118 }
1119
1120 fn optimal_block_length(&self, n: usize) -> usize {
1122 let length = (n as f64).powf(1.0 / 3.0).ceil() as usize;
1124 std::cmp::max(1, std::cmp::min(length, n / 4))
1125 }
1126
1127 fn compute_effective_samplesize(&self, n: usize) -> usize {
1129 let block_length = self
1130 .config
1131 .block_length
1132 .unwrap_or_else(|| self.optimal_block_length(n));
1133
1134 let correlation_factor = 1.0 - (block_length as f64 - 1.0) / (2.0 * n as f64);
1136 (n as f64 * correlation_factor).ceil() as usize
1137 }
1138
1139 fn compute_confidence_intervals(
1141 &self,
1142 bootstrap_samples: &Array1<F>,
1143 original_statistic: F,
1144 _standard_error: F,
1145 ) -> StatsResult<BootstrapConfidenceIntervals<F>> {
1146 let alpha = 1.0 - self.config.confidence_level;
1147 let lower_percentile = alpha / 2.0;
1148 let upper_percentile = 1.0 - alpha / 2.0;
1149
1150 let mut sorted_samples = bootstrap_samples.to_vec();
1152 sorted_samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1153
1154 let n = sorted_samples.len();
1155 let lower_idx = ((n as f64) * lower_percentile).floor() as usize;
1156 let upper_idx = ((n as f64) * upper_percentile).ceil() as usize - 1;
1157
1158 let percentile = (
1160 sorted_samples[lower_idx],
1161 sorted_samples[upper_idx.min(n - 1)],
1162 );
1163
1164 let basic = (
1166 F::from(2.0).unwrap() * original_statistic - sorted_samples[upper_idx.min(n - 1)],
1167 F::from(2.0).unwrap() * original_statistic - sorted_samples[lower_idx],
1168 );
1169
1170 let bias_corrected = if self.config.bias_correction {
1172 let bias_correction =
1173 self.compute_bias_correction(bootstrap_samples, original_statistic);
1174 Some((
1175 percentile.0 + bias_correction,
1176 percentile.1 + bias_correction,
1177 ))
1178 } else {
1179 None
1180 };
1181
1182 let bias_corrected_accelerated = if self.config.acceleration_correction {
1184 bias_corrected
1186 } else {
1187 None
1188 };
1189
1190 Ok(BootstrapConfidenceIntervals {
1191 percentile,
1192 basic,
1193 bias_corrected,
1194 bias_corrected_accelerated,
1195 studentized: None, })
1197 }
1198
1199 fn compute_bias_correction(&self, bootstrap_samples: &Array1<F>, originalstatistic: F) -> F {
1201 let _count_below = bootstrap_samples
1202 .iter()
1203 .filter(|&&x| x < originalstatistic)
1204 .count();
1205
1206 let _proportion = _count_below as f64 / bootstrap_samples.len() as f64;
1207
1208 let bootstrap_mean = self.compute_mean(bootstrap_samples);
1210 bootstrap_mean - originalstatistic
1211 }
1212
1213 fn compute_diagnostics(
1215 &self,
1216 bootstrap_samples: &Array1<F>,
1217 original_statistic: F,
1218 ) -> StatsResult<BootstrapDiagnostics<F>> {
1219 let distribution_stats = self.compute_distribution_stats(bootstrap_samples)?;
1220 let quality_metrics =
1221 self.compute_quality_metrics(bootstrap_samples, original_statistic)?;
1222 let convergence_info = self.compute_convergence_info(bootstrap_samples)?;
1223 let method_specific = HashMap::new(); Ok(BootstrapDiagnostics {
1226 distribution_stats,
1227 quality_metrics,
1228 convergence_info,
1229 method_specific,
1230 })
1231 }
1232
1233 fn compute_distribution_stats(
1235 &self,
1236 samples: &Array1<F>,
1237 ) -> StatsResult<BootstrapDistributionStats<F>> {
1238 let mean = self.compute_mean(samples);
1239 let std = self.compute_std(samples);
1240
1241 let skewness = if std > F::zero() {
1243 let skew_sum = samples
1244 .iter()
1245 .map(|&x| {
1246 let z = (x - mean) / std;
1247 z * z * z
1248 })
1249 .fold(F::zero(), |acc, x| acc + x);
1250 skew_sum / F::from(samples.len()).unwrap()
1251 } else {
1252 F::zero()
1253 };
1254
1255 let kurtosis = if std > F::zero() {
1257 let kurt_sum = samples
1258 .iter()
1259 .map(|&x| {
1260 let z = (x - mean) / std;
1261 z * z * z * z
1262 })
1263 .fold(F::zero(), |acc, x| acc + x);
1264 kurt_sum / F::from(samples.len()).unwrap() - F::from(3.0).unwrap()
1265 } else {
1266 F::zero()
1267 };
1268
1269 let min_value = samples.iter().copied().fold(F::infinity(), F::min);
1271 let max_value = samples.iter().copied().fold(F::neg_infinity(), F::max);
1272
1273 Ok(BootstrapDistributionStats {
1274 skewness,
1275 kurtosis,
1276 jarque_bera: F::zero(), anderson_darling: F::zero(), min_value,
1279 max_value,
1280 })
1281 }
1282
1283 fn compute_quality_metrics(
1285 &self,
1286 samples: &Array1<F>,
1287 _original_statistic: F,
1288 ) -> StatsResult<QualityMetrics<F>> {
1289 let std_error = self.compute_std(samples);
1290 let mc_std_error = std_error / F::from((samples.len() as f64).sqrt()).unwrap();
1291
1292 Ok(QualityMetrics {
1293 mc_standard_error: mc_std_error,
1294 coverage_probability: F::from(self.config.confidence_level).unwrap(),
1295 efficiency: None, stability: F::one(), })
1298 }
1299
1300 fn compute_convergence_info(&self, samples: &Array1<F>) -> StatsResult<ConvergenceInfo<F>> {
1302 let converged = samples.len() >= 100; Ok(ConvergenceInfo {
1306 converged,
1307 convergence_samplesize: if converged { Some(samples.len()) } else { None },
1308 mean_stability: F::one(), variance_stability: F::one(), })
1311 }
1312
1313 fn compute_mean(&self, data: &Array1<F>) -> F {
1315 if data.is_empty() {
1316 F::zero()
1317 } else {
1318 data.sum() / F::from(data.len()).unwrap()
1319 }
1320 }
1321
1322 fn compute_std(&self, data: &Array1<F>) -> F {
1324 if data.len() <= 1 {
1325 return F::zero();
1326 }
1327
1328 let mean = self.compute_mean(data);
1329 let variance = data
1330 .iter()
1331 .map(|&x| (x - mean) * (x - mean))
1332 .fold(F::zero(), |acc, x| acc + x)
1333 / F::from(data.len() - 1).unwrap();
1334
1335 variance.sqrt()
1336 }
1337}
1338
1339#[allow(dead_code)]
1341pub fn stratified_bootstrap<F, T>(
1342 data: &ArrayView1<F>,
1343 strata: &[usize],
1344 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1345 config: Option<AdvancedBootstrapConfig>,
1346) -> StatsResult<AdvancedBootstrapResult<F>>
1347where
1348 F: Float
1349 + NumCast
1350 + SimdUnifiedOps
1351 + Zero
1352 + One
1353 + FromPrimitive
1354 + Copy
1355 + Send
1356 + Sync
1357 + std::fmt::Display,
1358 T: Into<F> + Copy + Send + Sync,
1359{
1360 let mut config = config.unwrap_or_default();
1361 config.bootstrap_type = BootstrapType::Stratified {
1362 strata: strata.to_vec(),
1363 };
1364
1365 let mut processor = AdvancedBootstrapProcessor::new(config);
1366 processor.bootstrap(data, statistic_fn)
1367}
1368
1369#[allow(dead_code)]
1371pub fn block_bootstrap<F, T>(
1372 data: &ArrayView1<F>,
1373 block_type: BlockType,
1374 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1375 config: Option<AdvancedBootstrapConfig>,
1376) -> StatsResult<AdvancedBootstrapResult<F>>
1377where
1378 F: Float
1379 + NumCast
1380 + SimdUnifiedOps
1381 + Zero
1382 + One
1383 + FromPrimitive
1384 + Copy
1385 + Send
1386 + Sync
1387 + std::fmt::Display,
1388 T: Into<F> + Copy + Send + Sync,
1389{
1390 let mut config = config.unwrap_or_default();
1391 config.bootstrap_type = BootstrapType::Block { block_type };
1392
1393 let mut processor = AdvancedBootstrapProcessor::new(config);
1394 processor.bootstrap(data, statistic_fn)
1395}
1396
1397#[allow(dead_code)]
1399pub fn moving_block_bootstrap<F, T>(
1400 data: &ArrayView1<F>,
1401 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1402 block_length: Option<usize>,
1403 n_bootstrap: Option<usize>,
1404) -> StatsResult<AdvancedBootstrapResult<F>>
1405where
1406 F: Float
1407 + NumCast
1408 + SimdUnifiedOps
1409 + Zero
1410 + One
1411 + FromPrimitive
1412 + Copy
1413 + Send
1414 + Sync
1415 + std::fmt::Display,
1416 T: Into<F> + Copy + Send + Sync,
1417{
1418 let mut config = AdvancedBootstrapConfig::default();
1419 config.bootstrap_type = BootstrapType::Block {
1420 block_type: BlockType::Moving,
1421 };
1422 config.block_length = block_length;
1423 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1424
1425 let mut processor = AdvancedBootstrapProcessor::new(config);
1426 processor.bootstrap(data, statistic_fn)
1427}
1428
1429#[allow(dead_code)]
1431pub fn circular_block_bootstrap<F, T>(
1432 data: &ArrayView1<F>,
1433 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1434 block_length: Option<usize>,
1435 n_bootstrap: Option<usize>,
1436) -> StatsResult<AdvancedBootstrapResult<F>>
1437where
1438 F: Float
1439 + NumCast
1440 + SimdUnifiedOps
1441 + Zero
1442 + One
1443 + FromPrimitive
1444 + Copy
1445 + Send
1446 + Sync
1447 + std::fmt::Display,
1448 T: Into<F> + Copy + Send + Sync,
1449{
1450 let mut config = AdvancedBootstrapConfig::default();
1451 config.bootstrap_type = BootstrapType::Block {
1452 block_type: BlockType::Circular,
1453 };
1454 config.block_length = block_length;
1455 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1456
1457 let mut processor = AdvancedBootstrapProcessor::new(config);
1458 processor.bootstrap(data, statistic_fn)
1459}
1460
1461#[allow(dead_code)]
1463pub fn stationary_bootstrap<F, T>(
1464 data: &ArrayView1<F>,
1465 statistic_fn: impl Fn(&ArrayView1<F>) -> StatsResult<T> + Send + Sync + Copy,
1466 expected_block_length: f64,
1467 n_bootstrap: Option<usize>,
1468) -> StatsResult<AdvancedBootstrapResult<F>>
1469where
1470 F: Float
1471 + NumCast
1472 + SimdUnifiedOps
1473 + Zero
1474 + One
1475 + FromPrimitive
1476 + Copy
1477 + Send
1478 + Sync
1479 + std::fmt::Display,
1480 T: Into<F> + Copy + Send + Sync,
1481{
1482 let mut config = AdvancedBootstrapConfig::default();
1483 config.bootstrap_type = BootstrapType::Block {
1484 block_type: BlockType::Stationary {
1485 expected_length: expected_block_length,
1486 },
1487 };
1488 config.n_bootstrap = n_bootstrap.unwrap_or(1000);
1489
1490 let mut processor = AdvancedBootstrapProcessor::new(config);
1491 processor.bootstrap(data, statistic_fn)
1492}
1493
1494#[cfg(test)]
1495mod tests {
1496 use super::*;
1497 use scirs2_core::ndarray::array;
1498
1499 #[test]
1500 fn test_basicbootstrap() {
1501 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1502 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1503
1504 let config = AdvancedBootstrapConfig {
1505 n_bootstrap: 100,
1506 seed: Some(42),
1507 ..Default::default()
1508 };
1509
1510 let mut processor = AdvancedBootstrapProcessor::new(config);
1511 let result = processor.bootstrap(&data.view(), mean_fn).unwrap();
1512
1513 assert_eq!(result.n_successful, 100);
1514 assert!(result.bootstrap_samples.len() == 100);
1515 assert!((result.original_statistic - 3.0).abs() < 1e-10);
1516 }
1517
1518 #[test]
1519 #[ignore = "timeout"]
1520 fn test_stratifiedbootstrap() {
1521 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
1522 let strata = vec![0, 0, 1, 1, 2, 2]; let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1524
1525 let result = stratified_bootstrap(
1526 &data.view(),
1527 &strata,
1528 mean_fn,
1529 Some(AdvancedBootstrapConfig {
1530 n_bootstrap: 50,
1531 seed: Some(123),
1532 ..Default::default()
1533 }),
1534 )
1535 .unwrap();
1536
1537 assert_eq!(result.n_successful, 50);
1538 assert!(matches!(result.method, BootstrapType::Stratified { .. }));
1539 }
1540
1541 #[test]
1542 #[ignore = "timeout"]
1543 fn test_moving_blockbootstrap() {
1544 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1545 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1546
1547 let result = moving_block_bootstrap(
1548 &data.view(),
1549 mean_fn,
1550 Some(3), Some(50), )
1553 .unwrap();
1554
1555 assert_eq!(result.n_successful, 50);
1556 assert!(result.effective_samplesize.is_some());
1557 }
1558
1559 #[test]
1560 #[ignore = "timeout"]
1561 fn test_circular_blockbootstrap() {
1562 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
1563 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1564
1565 let result = circular_block_bootstrap(&data.view(), mean_fn, Some(2), Some(30)).unwrap();
1566
1567 assert_eq!(result.n_successful, 30);
1568 }
1569
1570 #[test]
1571 #[ignore = "timeout"]
1572 fn test_confidence_intervals() {
1573 let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
1574 let mean_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> { Ok(x.sum() / x.len() as f64) };
1575
1576 let config = AdvancedBootstrapConfig {
1577 n_bootstrap: 200,
1578 confidence_level: 0.95,
1579 seed: Some(42),
1580 ..Default::default()
1581 };
1582
1583 let mut processor = AdvancedBootstrapProcessor::new(config);
1584 let result = processor.bootstrap(&data.view(), mean_fn).unwrap();
1585
1586 let ci = &result.confidence_intervals;
1587 assert!(ci.percentile.0 <= ci.percentile.1);
1588 assert!(ci.basic.0 <= ci.basic.1);
1589 }
1590
1591 #[test]
1592 #[ignore = "timeout"]
1593 fn test_bootstrap_diagnostics() {
1594 let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
1595 let var_fn = |x: &ArrayView1<f64>| -> StatsResult<f64> {
1596 let mean = x.sum() / x.len() as f64;
1597 let var = x.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / (x.len() - 1) as f64;
1598 Ok(var)
1599 };
1600
1601 let config = AdvancedBootstrapConfig {
1602 n_bootstrap: 100,
1603 seed: Some(456),
1604 ..Default::default()
1605 };
1606
1607 let mut processor = AdvancedBootstrapProcessor::new(config);
1608 let result = processor.bootstrap(&data.view(), var_fn).unwrap();
1609
1610 assert!(result.diagnostics.convergence_info.converged);
1611 assert!(
1612 result.diagnostics.distribution_stats.min_value
1613 <= result.diagnostics.distribution_stats.max_value
1614 );
1615 }
1616}