1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::random::Rng;
10use scirs2_core::{simd_ops::SimdUnifiedOps, validation::*};
11use statrs::statistics::Statistics;
12
13#[allow(dead_code)]
18pub fn rolling_statistics_simd<F>(
19 data: &ArrayView1<F>,
20 windowsize: usize,
21 statistics: &[RollingStatistic],
22) -> StatsResult<RollingStatsResult<F>>
23where
24 F: Float
25 + NumCast
26 + SimdUnifiedOps
27 + Zero
28 + One
29 + PartialOrd
30 + Copy
31 + Send
32 + Sync
33 + std::fmt::Display
34 + std::iter::Sum<F>,
35{
36 checkarray_finite(data, "data")?;
37 check_positive(windowsize, "windowsize")?;
38
39 if windowsize > data.len() {
40 return Err(StatsError::InvalidArgument(
41 "Window size cannot be larger than data length".to_string(),
42 ));
43 }
44
45 let n_windows = data.len() - windowsize + 1;
46 let mut results = RollingStatsResult::new(n_windows, statistics);
47
48 for i in 0..n_windows {
50 let window = data.slice(scirs2_core::ndarray::s![i..i + windowsize]);
51 compute_window_statistics(&window, statistics, &mut results, i);
52 }
53
54 Ok(results)
55}
56
57#[derive(Debug, Clone, PartialEq)]
59pub enum RollingStatistic {
60 Mean,
61 Variance,
62 Min,
63 Max,
64 Median,
65 Skewness,
66 Kurtosis,
67 Range,
68 StandardDeviation,
69 MeanAbsoluteDeviation,
70}
71
72#[derive(Debug, Clone)]
74pub struct RollingStatsResult<F> {
75 pub means: Option<Array1<F>>,
76 pub variances: Option<Array1<F>>,
77 pub mins: Option<Array1<F>>,
78 pub maxs: Option<Array1<F>>,
79 pub medians: Option<Array1<F>>,
80 pub skewness: Option<Array1<F>>,
81 pub kurtosis: Option<Array1<F>>,
82 pub ranges: Option<Array1<F>>,
83 pub std_devs: Option<Array1<F>>,
84 pub mad: Option<Array1<F>>,
85}
86
87impl<F: Zero + Clone> RollingStatsResult<F> {
88 fn new(nwindows: usize, statistics: &[RollingStatistic]) -> Self {
89 let mut result = RollingStatsResult {
90 means: None,
91 variances: None,
92 mins: None,
93 maxs: None,
94 medians: None,
95 skewness: None,
96 kurtosis: None,
97 ranges: None,
98 std_devs: None,
99 mad: None,
100 };
101
102 for stat in statistics {
103 match stat {
104 RollingStatistic::Mean => result.means = Some(Array1::zeros(nwindows)),
105 RollingStatistic::Variance => result.variances = Some(Array1::zeros(nwindows)),
106 RollingStatistic::Min => result.mins = Some(Array1::zeros(nwindows)),
107 RollingStatistic::Max => result.maxs = Some(Array1::zeros(nwindows)),
108 RollingStatistic::Median => result.medians = Some(Array1::zeros(nwindows)),
109 RollingStatistic::Skewness => result.skewness = Some(Array1::zeros(nwindows)),
110 RollingStatistic::Kurtosis => result.kurtosis = Some(Array1::zeros(nwindows)),
111 RollingStatistic::Range => result.ranges = Some(Array1::zeros(nwindows)),
112 RollingStatistic::StandardDeviation => {
113 result.std_devs = Some(Array1::zeros(nwindows))
114 }
115 RollingStatistic::MeanAbsoluteDeviation => {
116 result.mad = Some(Array1::zeros(nwindows))
117 }
118 }
119 }
120
121 result
122 }
123}
124
125#[allow(dead_code)]
126fn compute_window_statistics<F>(
127 window: &ArrayView1<F>,
128 statistics: &[RollingStatistic],
129 results: &mut RollingStatsResult<F>,
130 window_idx: usize,
131) where
132 F: Float
133 + NumCast
134 + SimdUnifiedOps
135 + Zero
136 + One
137 + PartialOrd
138 + Copy
139 + std::fmt::Display
140 + std::iter::Sum<F>,
141{
142 let windowsize = window.len();
143 let windowsize_f = F::from(windowsize).unwrap();
144
145 let (sum, sum_sq, min_val, max_val) = if windowsize > 16 {
147 let sum = F::simd_sum(window);
149 let sqdata = F::simd_mul(window, window);
150 let sum_sq = F::simd_sum(&sqdata.view());
151 let min_val = F::simd_min_element(window);
152 let max_val = F::simd_max_element(window);
153 (sum, sum_sq, min_val, max_val)
154 } else {
155 let sum = window.iter().fold(F::zero(), |acc, &x| acc + x);
157 let sum_sq = window.iter().fold(F::zero(), |acc, &x| acc + x * x);
158 let min_val = window
159 .iter()
160 .fold(window[0], |acc, &x| if x < acc { x } else { acc });
161 let max_val = window
162 .iter()
163 .fold(window[0], |acc, &x| if x > acc { x } else { acc });
164 (sum, sum_sq, min_val, max_val)
165 };
166
167 let mean = sum / windowsize_f;
168 let variance = if windowsize > 1 {
169 let n_minus_1 = F::from(windowsize - 1).unwrap();
170 (sum_sq - sum * sum / windowsize_f) / n_minus_1
171 } else {
172 F::zero()
173 };
174
175 for stat in statistics {
177 match stat {
178 RollingStatistic::Mean => {
179 if let Some(ref mut means) = results.means {
180 means[window_idx] = mean;
181 }
182 }
183 RollingStatistic::Variance => {
184 if let Some(ref mut variances) = results.variances {
185 variances[window_idx] = variance;
186 }
187 }
188 RollingStatistic::Min => {
189 if let Some(ref mut mins) = results.mins {
190 mins[window_idx] = min_val;
191 }
192 }
193 RollingStatistic::Max => {
194 if let Some(ref mut maxs) = results.maxs {
195 maxs[window_idx] = max_val;
196 }
197 }
198 RollingStatistic::Range => {
199 if let Some(ref mut ranges) = results.ranges {
200 ranges[window_idx] = max_val - min_val;
201 }
202 }
203 RollingStatistic::StandardDeviation => {
204 if let Some(ref mut std_devs) = results.std_devs {
205 std_devs[window_idx] = variance.sqrt();
206 }
207 }
208 RollingStatistic::Median => {
209 if let Some(ref mut medians) = results.medians {
210 let mut sorted_window = window.to_owned();
211 sorted_window
212 .as_slice_mut()
213 .unwrap()
214 .sort_by(|a, b| a.partial_cmp(b).unwrap());
215 medians[window_idx] = if windowsize % 2 == 1 {
216 sorted_window[windowsize / 2]
217 } else {
218 let mid1 = sorted_window[windowsize / 2 - 1];
219 let mid2 = sorted_window[windowsize / 2];
220 (mid1 + mid2) / F::from(2.0).unwrap()
221 };
222 }
223 }
224 RollingStatistic::Skewness => {
225 if let Some(ref mut skewness) = results.skewness {
226 if variance > F::zero() {
227 let std_dev = variance.sqrt();
228 let skew_sum = if windowsize > 16 {
229 let mean_vec = Array1::from_elem(windowsize, mean);
230 let centered = F::simd_sub(window, &mean_vec.view());
231 let cubed = F::simd_mul(
232 &F::simd_mul(¢ered.view(), ¢ered.view()).view(),
233 ¢ered.view(),
234 );
235 F::simd_sum(&cubed.view())
236 } else {
237 window
238 .iter()
239 .map(|&x| {
240 let dev = x - mean;
241 dev * dev * dev
242 })
243 .fold(F::zero(), |acc, x| acc + x)
244 };
245 skewness[window_idx] = skew_sum / (windowsize_f * std_dev.powi(3));
246 } else {
247 skewness[window_idx] = F::zero();
248 }
249 }
250 }
251 RollingStatistic::Kurtosis => {
252 if let Some(ref mut kurtosis) = results.kurtosis {
253 if variance > F::zero() {
254 let kurt_sum = if windowsize > 16 {
255 let mean_vec = Array1::from_elem(windowsize, mean);
256 let centered = F::simd_sub(window, &mean_vec.view());
257 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
258 let fourth = F::simd_mul(&squared.view(), &squared.view());
259 F::simd_sum(&fourth.view())
260 } else {
261 window
262 .iter()
263 .map(|&x| {
264 let dev = x - mean;
265 let dev_sq = dev * dev;
266 dev_sq * dev_sq
267 })
268 .fold(F::zero(), |acc, x| acc + x)
269 };
270 kurtosis[window_idx] = (kurt_sum / (windowsize_f * variance * variance))
271 - F::from(3.0).unwrap();
272 } else {
273 kurtosis[window_idx] = F::zero();
274 }
275 }
276 }
277 RollingStatistic::MeanAbsoluteDeviation => {
278 if let Some(ref mut mad) = results.mad {
279 let mad_sum = if windowsize > 16 {
280 let mean_vec = Array1::from_elem(windowsize, mean);
281 let centered = F::simd_sub(window, &mean_vec.view());
282 let abs_centered = F::simd_abs(¢ered.view());
283 F::simd_sum(&abs_centered.view())
284 } else {
285 window
286 .iter()
287 .map(|&x| (x - mean).abs())
288 .fold(F::zero(), |acc, x| acc + x)
289 };
290 mad[window_idx] = mad_sum / windowsize_f;
291 }
292 }
293 }
294 }
295}
296
297#[allow(dead_code)]
302pub fn matrix_statistics_simd<F>(
303 data: &ArrayView2<F>,
304 axis: Option<usize>,
305 operations: &[MatrixOperation],
306) -> StatsResult<MatrixStatsResult<F>>
307where
308 F: Float
309 + NumCast
310 + SimdUnifiedOps
311 + Zero
312 + One
313 + PartialOrd
314 + Copy
315 + Send
316 + Sync
317 + std::fmt::Display
318 + std::iter::Sum<F>,
319{
320 checkarray_finite(data, "data")?;
321
322 let (n_rows, n_cols) = data.dim();
323
324 if n_rows == 0 || n_cols == 0 {
325 return Err(StatsError::InvalidArgument(
326 "Data matrix cannot be empty".to_string(),
327 ));
328 }
329
330 match axis {
331 None => compute_global_matrix_stats(data, operations),
332 Some(0) => compute_column_wise_stats(data, operations),
333 Some(1) => compute_row_wise_stats(data, operations),
334 Some(_) => Err(StatsError::InvalidArgument(
335 "Axis must be None, 0, or 1 for 2D arrays".to_string(),
336 )),
337 }
338}
339
340#[derive(Debug, Clone, PartialEq)]
342pub enum MatrixOperation {
343 Mean,
344 Variance,
345 StandardDeviation,
346 Min,
347 Max,
348 Sum,
349 Product,
350 Median,
351 Quantile(f64),
352 L1Norm,
353 L2Norm,
354 FrobeniusNorm,
355}
356
357#[derive(Debug, Clone)]
359pub struct MatrixStatsResult<F> {
360 pub means: Option<Array1<F>>,
361 pub variances: Option<Array1<F>>,
362 pub std_devs: Option<Array1<F>>,
363 pub mins: Option<Array1<F>>,
364 pub maxs: Option<Array1<F>>,
365 pub sums: Option<Array1<F>>,
366 pub products: Option<Array1<F>>,
367 pub medians: Option<Array1<F>>,
368 pub quantiles: Option<Array1<F>>,
369 pub l1_norms: Option<Array1<F>>,
370 pub l2_norms: Option<Array1<F>>,
371 pub frobenius_norm: Option<F>,
372}
373
374#[allow(dead_code)]
375fn compute_column_wise_stats<F>(
376 data: &ArrayView2<F>,
377 operations: &[MatrixOperation],
378) -> StatsResult<MatrixStatsResult<F>>
379where
380 F: Float
381 + NumCast
382 + SimdUnifiedOps
383 + Zero
384 + One
385 + PartialOrd
386 + Copy
387 + Send
388 + Sync
389 + std::fmt::Display
390 + std::iter::Sum<F>,
391{
392 let (_n_rows, n_cols) = data.dim();
393 let mut results = MatrixStatsResult::new_column_wise(n_cols, operations);
394
395 for j in 0..n_cols {
397 let column = data.column(j);
398 compute_column_statistics(&column, operations, &mut results, j);
399 }
400
401 Ok(results)
402}
403
404#[allow(dead_code)]
405fn compute_row_wise_stats<F>(
406 data: &ArrayView2<F>,
407 operations: &[MatrixOperation],
408) -> StatsResult<MatrixStatsResult<F>>
409where
410 F: Float
411 + NumCast
412 + SimdUnifiedOps
413 + Zero
414 + One
415 + PartialOrd
416 + Copy
417 + Send
418 + Sync
419 + std::fmt::Display
420 + std::iter::Sum<F>,
421{
422 let (n_rows, n_cols) = data.dim();
423 let mut results = MatrixStatsResult::new_row_wise(n_rows, operations);
424
425 for i in 0..n_rows {
427 let row = data.row(i);
428 compute_row_statistics(&row, operations, &mut results, i);
429 }
430
431 Ok(results)
432}
433
434#[allow(dead_code)]
435fn compute_global_matrix_stats<F>(
436 data: &ArrayView2<F>,
437 operations: &[MatrixOperation],
438) -> StatsResult<MatrixStatsResult<F>>
439where
440 F: Float
441 + NumCast
442 + SimdUnifiedOps
443 + Zero
444 + One
445 + PartialOrd
446 + Copy
447 + Send
448 + Sync
449 + std::fmt::Display
450 + std::iter::Sum<F>,
451{
452 let mut results = MatrixStatsResult::new_global(operations);
453
454 for operation in operations {
455 match operation {
456 MatrixOperation::FrobeniusNorm => {
457 let flattened = Array1::from_iter(data.iter().cloned());
459 let squared_sum = if flattened.len() > 1000 {
460 let mut sum = F::zero();
462 let chunksize = 1000;
463 for i in (0..flattened.len()).step_by(chunksize) {
464 let end = (i + chunksize).min(flattened.len());
465 let chunk = flattened.slice(scirs2_core::ndarray::s![i..end]);
466 let squared = F::simd_mul(&chunk, &chunk);
467 sum = sum + F::simd_sum(&squared.view());
468 }
469 sum
470 } else {
471 let squared = F::simd_mul(&flattened.view(), &flattened.view());
472 F::simd_sum(&squared.view())
473 };
474 results.frobenius_norm = Some(squared_sum.sqrt());
475 }
476 _ => {
477 let flattened = Array1::from_iter(data.iter().cloned());
479 compute_vector_operation(&flattened.view(), operation, &mut results, 0);
480 }
481 }
482 }
483
484 Ok(results)
485}
486
487#[allow(dead_code)]
488fn compute_column_statistics<F>(
489 column: &ArrayView1<F>,
490 operations: &[MatrixOperation],
491 results: &mut MatrixStatsResult<F>,
492 col_idx: usize,
493) where
494 F: Float
495 + NumCast
496 + SimdUnifiedOps
497 + Zero
498 + One
499 + PartialOrd
500 + Copy
501 + std::fmt::Display
502 + std::iter::Sum<F>,
503{
504 for operation in operations {
505 compute_vector_operation(column, operation, results, col_idx);
506 }
507}
508
509#[allow(dead_code)]
510fn compute_row_statistics<F>(
511 row: &ArrayView1<F>,
512 operations: &[MatrixOperation],
513 results: &mut MatrixStatsResult<F>,
514 row_idx: usize,
515) where
516 F: Float
517 + NumCast
518 + SimdUnifiedOps
519 + Zero
520 + One
521 + PartialOrd
522 + Copy
523 + std::fmt::Display
524 + std::iter::Sum<F>,
525{
526 for operation in operations {
527 compute_vector_operation(row, operation, results, row_idx);
528 }
529}
530
531#[allow(dead_code)]
532fn compute_vector_operation<F>(
533 data: &ArrayView1<F>,
534 operation: &MatrixOperation,
535 results: &mut MatrixStatsResult<F>,
536 idx: usize,
537) where
538 F: Float
539 + NumCast
540 + SimdUnifiedOps
541 + Zero
542 + One
543 + PartialOrd
544 + Copy
545 + std::fmt::Display
546 + std::iter::Sum<F>,
547{
548 let n = data.len();
549 let n_f = F::from(n).unwrap();
550 let use_simd = n > 16;
551
552 match operation {
553 MatrixOperation::Mean => {
554 if let Some(ref mut means) = results.means {
555 means[idx] = if use_simd {
556 F::simd_sum(data) / n_f
557 } else {
558 data.iter().copied().sum::<F>() / n_f
559 };
560 }
561 }
562 MatrixOperation::Sum => {
563 if let Some(ref mut sums) = results.sums {
564 sums[idx] = if use_simd {
565 F::simd_sum(data)
566 } else {
567 data.iter().copied().sum::<F>()
568 };
569 }
570 }
571 MatrixOperation::Min => {
572 if let Some(ref mut mins) = results.mins {
573 mins[idx] = if use_simd {
574 F::simd_min_element(&data.view())
575 } else {
576 data.iter().copied().fold(data[0], F::min)
577 };
578 }
579 }
580 MatrixOperation::Max => {
581 if let Some(ref mut maxs) = results.maxs {
582 maxs[idx] = if use_simd {
583 F::simd_max_element(&data.view())
584 } else {
585 data.iter().copied().fold(data[0], F::max)
586 };
587 }
588 }
589 MatrixOperation::Variance => {
590 if let Some(ref mut variances) = results.variances {
591 if use_simd {
592 let sum = F::simd_sum(data);
593 let mean = sum / n_f;
594 let mean_vec = Array1::from_elem(n, mean);
595 let centered = F::simd_sub(data, &mean_vec.view());
596 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
597 let sum_sq = F::simd_sum(&squared.view());
598 variances[idx] = if n > 1 {
599 sum_sq / F::from(n - 1).unwrap()
600 } else {
601 F::zero()
602 };
603 } else {
604 let mean = data.iter().copied().sum::<F>() / n_f;
605 let sum_sq = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>();
606 variances[idx] = if n > 1 {
607 sum_sq / F::from(n - 1).unwrap()
608 } else {
609 F::zero()
610 };
611 }
612 }
613 }
614 MatrixOperation::StandardDeviation => {
615 if let Some(ref mut std_devs) = results.std_devs {
616 let variance = if use_simd {
618 let sum = F::simd_sum(data);
619 let mean = sum / n_f;
620 let mean_vec = Array1::from_elem(n, mean);
621 let centered = F::simd_sub(data, &mean_vec.view());
622 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
623 let sum_sq = F::simd_sum(&squared.view());
624 if n > 1 {
625 sum_sq / F::from(n - 1).unwrap()
626 } else {
627 F::zero()
628 }
629 } else {
630 let mean = data.iter().copied().sum::<F>() / n_f;
631 let sum_sq = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>();
632 if n > 1 {
633 sum_sq / F::from(n - 1).unwrap()
634 } else {
635 F::zero()
636 }
637 };
638 std_devs[idx] = variance.sqrt();
639 }
640 }
641 MatrixOperation::L1Norm => {
642 if let Some(ref mut l1_norms) = results.l1_norms {
643 l1_norms[idx] = if use_simd {
644 let absdata = F::simd_abs(data);
645 F::simd_sum(&absdata.view())
646 } else {
647 data.iter().map(|&x| x.abs()).sum::<F>()
648 };
649 }
650 }
651 MatrixOperation::L2Norm => {
652 if let Some(ref mut l2_norms) = results.l2_norms {
653 l2_norms[idx] = if use_simd {
654 let squared = F::simd_mul(data, data);
655 F::simd_sum(&squared.view()).sqrt()
656 } else {
657 data.iter().map(|&x| x * x).sum::<F>().sqrt()
658 };
659 }
660 }
661 MatrixOperation::Product => {
662 if let Some(ref mut products) = results.products {
663 products[idx] = data.iter().copied().fold(F::one(), |acc, x| acc * x);
664 }
665 }
666 MatrixOperation::Median => {
667 if let Some(ref mut medians) = results.medians {
668 let mut sorteddata = data.to_owned();
669 sorteddata
670 .as_slice_mut()
671 .unwrap()
672 .sort_by(|a, b| a.partial_cmp(b).unwrap());
673 medians[idx] = if n % 2 == 1 {
674 sorteddata[n / 2]
675 } else {
676 let mid1 = sorteddata[n / 2 - 1];
677 let mid2 = sorteddata[n / 2];
678 (mid1 + mid2) / F::from(2.0).unwrap()
679 };
680 }
681 }
682 MatrixOperation::Quantile(q) => {
683 if let Some(ref mut quantiles) = results.quantiles {
684 let mut sorteddata = data.to_owned();
685 sorteddata
686 .as_slice_mut()
687 .unwrap()
688 .sort_by(|a, b| a.partial_cmp(b).unwrap());
689 let pos = q * (n - 1) as f64;
690 let lower_idx = pos.floor() as usize;
691 let upper_idx = (lower_idx + 1).min(n - 1);
692 let weight = F::from(pos - lower_idx as f64).unwrap();
693 let lower_val = sorteddata[lower_idx];
694 let upper_val = sorteddata[upper_idx];
695 quantiles[idx] = lower_val + weight * (upper_val - lower_val);
696 }
697 }
698 MatrixOperation::FrobeniusNorm => {
699 }
701 }
702}
703
704impl<F: Zero + Clone> MatrixStatsResult<F> {
705 fn new_column_wise(ncols: usize, operations: &[MatrixOperation]) -> Self {
706 let mut result = MatrixStatsResult {
707 means: None,
708 variances: None,
709 std_devs: None,
710 mins: None,
711 maxs: None,
712 sums: None,
713 products: None,
714 medians: None,
715 quantiles: None,
716 l1_norms: None,
717 l2_norms: None,
718 frobenius_norm: None,
719 };
720
721 for operation in operations {
722 Self::allocate_for_operation(&mut result, operation, ncols);
723 }
724
725 result
726 }
727
728 fn new_row_wise(nrows: usize, operations: &[MatrixOperation]) -> Self {
729 let mut result = MatrixStatsResult {
730 means: None,
731 variances: None,
732 std_devs: None,
733 mins: None,
734 maxs: None,
735 sums: None,
736 products: None,
737 medians: None,
738 quantiles: None,
739 l1_norms: None,
740 l2_norms: None,
741 frobenius_norm: None,
742 };
743
744 for operation in operations {
745 Self::allocate_for_operation(&mut result, operation, nrows);
746 }
747
748 result
749 }
750
751 fn new_global(operations: &[MatrixOperation]) -> Self {
752 let mut result = MatrixStatsResult {
753 means: None,
754 variances: None,
755 std_devs: None,
756 mins: None,
757 maxs: None,
758 sums: None,
759 products: None,
760 medians: None,
761 quantiles: None,
762 l1_norms: None,
763 l2_norms: None,
764 frobenius_norm: None,
765 };
766
767 for operation in operations {
768 Self::allocate_for_operation(&mut result, operation, 1);
769 }
770
771 result
772 }
773
774 fn allocate_for_operation(result: &mut Self, operation: &MatrixOperation, size: usize) {
775 match operation {
776 MatrixOperation::Mean => result.means = Some(Array1::zeros(size)),
777 MatrixOperation::Variance => result.variances = Some(Array1::zeros(size)),
778 MatrixOperation::StandardDeviation => result.std_devs = Some(Array1::zeros(size)),
779 MatrixOperation::Min => result.mins = Some(Array1::zeros(size)),
780 MatrixOperation::Max => result.maxs = Some(Array1::zeros(size)),
781 MatrixOperation::Sum => result.sums = Some(Array1::zeros(size)),
782 MatrixOperation::Product => result.products = Some(Array1::zeros(size)),
783 MatrixOperation::Median => result.medians = Some(Array1::zeros(size)),
784 MatrixOperation::Quantile(_) => result.quantiles = Some(Array1::zeros(size)),
785 MatrixOperation::L1Norm => result.l1_norms = Some(Array1::zeros(size)),
786 MatrixOperation::L2Norm => result.l2_norms = Some(Array1::zeros(size)),
787 MatrixOperation::FrobeniusNorm => {} }
789 }
790}
791
792#[allow(dead_code)]
797pub fn bootstrap_confidence_interval_simd<F>(
798 data: &ArrayView1<F>,
799 statistic_fn: BootstrapStatistic,
800 n_bootstrap: usize,
801 confidence_level: f64,
802 random_seed: Option<u64>,
803) -> StatsResult<BootstrapResult<F>>
804where
805 F: Float
806 + NumCast
807 + SimdUnifiedOps
808 + Zero
809 + One
810 + PartialOrd
811 + Copy
812 + Send
813 + Sync
814 + std::fmt::Display
815 + std::iter::Sum<F>
816 + scirs2_core::numeric::FromPrimitive,
817{
818 checkarray_finite(data, "data")?;
819 check_positive(n_bootstrap, "n_bootstrap")?;
820 check_probability(confidence_level, "confidence_level")?;
821
822 if data.is_empty() {
823 return Err(StatsError::InvalidArgument(
824 "Data array cannot be empty".to_string(),
825 ));
826 }
827
828 use scirs2_core::random::Random;
829
830 let mut rng = match random_seed {
831 Some(_seed) => Random::seed(_seed),
832 None => Random::seed(42), };
834
835 let _ndata = data.len();
836 let mut bootstrap_stats = Array1::zeros(n_bootstrap);
837
838 for i in 0..n_bootstrap {
840 let bootstrap_sample = generate_bootstrap_sample(data, &mut rng);
841 bootstrap_stats[i] = compute_bootstrap_statistic(&bootstrap_sample.view(), &statistic_fn);
842 }
843
844 bootstrap_stats
846 .as_slice_mut()
847 .unwrap()
848 .sort_by(|a, b| a.partial_cmp(b).unwrap());
849
850 let alpha = 1.0 - confidence_level;
852 let lower_idx = ((alpha / 2.0) * n_bootstrap as f64) as usize;
853 let upper_idx = ((1.0 - alpha / 2.0) * n_bootstrap as f64) as usize;
854
855 let lower_bound = bootstrap_stats[lower_idx.min(n_bootstrap - 1)];
856 let upper_bound = bootstrap_stats[upper_idx.min(n_bootstrap - 1)];
857
858 let original_stat = compute_bootstrap_statistic(data, &statistic_fn);
860
861 let bootstrap_mean = bootstrap_stats.mean().unwrap();
863 let bootstrap_std = bootstrap_stats.var(F::from(1.0).unwrap()).sqrt(); Ok(BootstrapResult {
866 original_statistic: original_stat,
867 bootstrap_mean,
868 bootstrap_std,
869 confidence_interval: (lower_bound, upper_bound),
870 confidence_level,
871 n_bootstrap,
872 bootstrap_statistics: bootstrap_stats,
873 })
874}
875
876#[derive(Debug, Clone, PartialEq)]
878pub enum BootstrapStatistic {
879 Mean,
880 Median,
881 StandardDeviation,
882 Variance,
883 Skewness,
884 Kurtosis,
885 Min,
886 Max,
887 Range,
888 InterquartileRange,
889 Quantile(f64),
890}
891
892#[derive(Debug, Clone)]
894pub struct BootstrapResult<F> {
895 pub original_statistic: F,
896 pub bootstrap_mean: F,
897 pub bootstrap_std: F,
898 pub confidence_interval: (F, F),
899 pub confidence_level: f64,
900 pub n_bootstrap: usize,
901 pub bootstrap_statistics: Array1<F>,
902}
903
904#[allow(dead_code)]
905fn generate_bootstrap_sample<F, R>(data: &ArrayView1<F>, rng: &mut R) -> Array1<F>
906where
907 F: Copy + Zero,
908 R: Rng,
909{
910 let n = data.len();
911 let mut sample = Array1::zeros(n);
912
913 for i in 0..n {
914 let idx = rng.gen_range(0..n);
915 sample[i] = data[idx];
916 }
917
918 sample
919}
920
921#[allow(dead_code)]
922fn compute_bootstrap_statistic<F>(data: &ArrayView1<F>, statistic: &BootstrapStatistic) -> F
923where
924 F: Float
925 + NumCast
926 + SimdUnifiedOps
927 + Zero
928 + One
929 + PartialOrd
930 + Copy
931 + std::fmt::Display
932 + std::iter::Sum<F>,
933{
934 let n = data.len();
935 let n_f = F::from(n).unwrap();
936 let use_simd = n > 16;
937
938 match statistic {
939 BootstrapStatistic::Mean => {
940 if use_simd {
941 F::simd_sum(data) / n_f
942 } else {
943 data.iter().copied().sum::<F>() / n_f
944 }
945 }
946 BootstrapStatistic::StandardDeviation => {
947 let variance = if use_simd {
948 let sum = F::simd_sum(data);
949 let mean = sum / n_f;
950 let mean_vec = Array1::from_elem(n, mean);
951 let centered = F::simd_sub(data, &mean_vec.view());
952 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
953 let sum_sq = F::simd_sum(&squared.view());
954 if n > 1 {
955 sum_sq / F::from(n - 1).unwrap()
956 } else {
957 F::zero()
958 }
959 } else {
960 let mean = data.iter().copied().sum::<F>() / n_f;
961 let sum_sq = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>();
962 if n > 1 {
963 sum_sq / F::from(n - 1).unwrap()
964 } else {
965 F::zero()
966 }
967 };
968 variance.sqrt()
969 }
970 BootstrapStatistic::Variance => {
971 if use_simd {
972 let sum = F::simd_sum(data);
973 let mean = sum / n_f;
974 let mean_vec = Array1::from_elem(n, mean);
975 let centered = F::simd_sub(data, &mean_vec.view());
976 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
977 let sum_sq = F::simd_sum(&squared.view());
978 if n > 1 {
979 sum_sq / F::from(n - 1).unwrap()
980 } else {
981 F::zero()
982 }
983 } else {
984 let mean = data.iter().copied().sum::<F>() / n_f;
985 let sum_sq = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>();
986 if n > 1 {
987 sum_sq / F::from(n - 1).unwrap()
988 } else {
989 F::zero()
990 }
991 }
992 }
993 BootstrapStatistic::Min => {
994 if use_simd {
995 F::simd_min_element(&data.view())
996 } else {
997 data.iter().copied().fold(data[0], F::min)
998 }
999 }
1000 BootstrapStatistic::Max => {
1001 if use_simd {
1002 F::simd_max_element(&data.view())
1003 } else {
1004 data.iter().copied().fold(data[0], F::max)
1005 }
1006 }
1007 BootstrapStatistic::Range => {
1008 let (min_val, max_val) = if use_simd {
1009 (
1010 F::simd_min_element(&data.view()),
1011 F::simd_max_element(&data.view()),
1012 )
1013 } else {
1014 let min_val = data.iter().copied().fold(data[0], F::min);
1015 let max_val = data.iter().copied().fold(data[0], F::max);
1016 (min_val, max_val)
1017 };
1018 max_val - min_val
1019 }
1020 BootstrapStatistic::Median => {
1021 let mut sorteddata = data.to_owned();
1022 sorteddata
1023 .as_slice_mut()
1024 .unwrap()
1025 .sort_by(|a, b| a.partial_cmp(b).unwrap());
1026 if n % 2 == 1 {
1027 sorteddata[n / 2]
1028 } else {
1029 let mid1 = sorteddata[n / 2 - 1];
1030 let mid2 = sorteddata[n / 2];
1031 (mid1 + mid2) / F::from(2.0).unwrap()
1032 }
1033 }
1034 BootstrapStatistic::Quantile(q) => {
1035 let mut sorteddata = data.to_owned();
1036 sorteddata
1037 .as_slice_mut()
1038 .unwrap()
1039 .sort_by(|a, b| a.partial_cmp(b).unwrap());
1040 let pos = q * (n - 1) as f64;
1041 let lower_idx = pos.floor() as usize;
1042 let upper_idx = (lower_idx + 1).min(n - 1);
1043 let weight = F::from(pos - lower_idx as f64).unwrap();
1044 let lower_val = sorteddata[lower_idx];
1045 let upper_val = sorteddata[upper_idx];
1046 lower_val + weight * (upper_val - lower_val)
1047 }
1048 BootstrapStatistic::InterquartileRange => {
1049 let mut sorteddata = data.to_owned();
1050 sorteddata
1051 .as_slice_mut()
1052 .unwrap()
1053 .sort_by(|a, b| a.partial_cmp(b).unwrap());
1054 let q1_pos = 0.25 * (n - 1) as f64;
1055 let q3_pos = 0.75 * (n - 1) as f64;
1056
1057 let q1_lower = q1_pos.floor() as usize;
1058 let q1_upper = (q1_lower + 1).min(n - 1);
1059 let q1_weight = F::from(q1_pos - q1_lower as f64).unwrap();
1060 let q1 =
1061 sorteddata[q1_lower] + q1_weight * (sorteddata[q1_upper] - sorteddata[q1_lower]);
1062
1063 let q3_lower = q3_pos.floor() as usize;
1064 let q3_upper = (q3_lower + 1).min(n - 1);
1065 let q3_weight = F::from(q3_pos - q3_lower as f64).unwrap();
1066 let q3 =
1067 sorteddata[q3_lower] + q3_weight * (sorteddata[q3_upper] - sorteddata[q3_lower]);
1068
1069 q3 - q1
1070 }
1071 BootstrapStatistic::Skewness => {
1072 let sum = if use_simd {
1073 F::simd_sum(data)
1074 } else {
1075 data.iter().copied().sum::<F>()
1076 };
1077 let mean = sum / n_f;
1078
1079 let (variance, skew_sum) = if use_simd {
1080 let mean_vec = Array1::from_elem(n, mean);
1081 let centered = F::simd_sub(data, &mean_vec.view());
1082 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
1083 let cubed = F::simd_mul(&squared.view(), ¢ered.view());
1084 let variance = F::simd_sum(&squared.view()) / F::from(n - 1).unwrap();
1085 let skew_sum = F::simd_sum(&cubed.view());
1086 (variance, skew_sum)
1087 } else {
1088 let mut variance_sum = F::zero();
1089 let mut skew_sum = F::zero();
1090 for &x in data.iter() {
1091 let dev = x - mean;
1092 let dev_sq = dev * dev;
1093 variance_sum = variance_sum + dev_sq;
1094 skew_sum = skew_sum + dev_sq * dev;
1095 }
1096 let variance = variance_sum / F::from(n - 1).unwrap();
1097 (variance, skew_sum)
1098 };
1099
1100 if variance > F::zero() {
1101 let std_dev = variance.sqrt();
1102 skew_sum / (n_f * std_dev.powi(3))
1103 } else {
1104 F::zero()
1105 }
1106 }
1107 BootstrapStatistic::Kurtosis => {
1108 let sum = if use_simd {
1109 F::simd_sum(data)
1110 } else {
1111 data.iter().copied().sum::<F>()
1112 };
1113 let mean = sum / n_f;
1114
1115 let (variance, kurt_sum) = if use_simd {
1116 let mean_vec = Array1::from_elem(n, mean);
1117 let centered = F::simd_sub(data, &mean_vec.view());
1118 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
1119 let fourth = F::simd_mul(&squared.view(), &squared.view());
1120 let variance = F::simd_sum(&squared.view()) / F::from(n - 1).unwrap();
1121 let kurt_sum = F::simd_sum(&fourth.view());
1122 (variance, kurt_sum)
1123 } else {
1124 let mut variance_sum = F::zero();
1125 let mut kurt_sum = F::zero();
1126 for &x in data.iter() {
1127 let dev = x - mean;
1128 let dev_sq = dev * dev;
1129 variance_sum = variance_sum + dev_sq;
1130 kurt_sum = kurt_sum + dev_sq * dev_sq;
1131 }
1132 let variance = variance_sum / F::from(n - 1).unwrap();
1133 (variance, kurt_sum)
1134 };
1135
1136 if variance > F::zero() {
1137 (kurt_sum / (n_f * variance * variance)) - F::from(3.0).unwrap()
1138 } else {
1139 F::zero()
1140 }
1141 }
1142 }
1143}
1144
1145#[allow(dead_code)]
1150pub fn kernel_density_estimation_simd<F>(
1151 data: &ArrayView1<F>,
1152 eval_points: &ArrayView1<F>,
1153 bandwidth: Option<F>,
1154 kernel: KernelType,
1155) -> StatsResult<Array1<F>>
1156where
1157 F: Float
1158 + NumCast
1159 + SimdUnifiedOps
1160 + Zero
1161 + One
1162 + PartialOrd
1163 + Copy
1164 + Send
1165 + Sync
1166 + std::fmt::Display
1167 + std::iter::Sum<F>,
1168{
1169 checkarray_finite(data, "data")?;
1170 checkarray_finite(eval_points, "eval_points")?;
1171
1172 if data.is_empty() {
1173 return Err(StatsError::InvalidArgument(
1174 "Data array cannot be empty".to_string(),
1175 ));
1176 }
1177
1178 let ndata = data.len();
1179 let n_eval = eval_points.len();
1180
1181 let h = match bandwidth {
1183 Some(bw) => {
1184 check_positive(bw.to_f64().unwrap(), "bandwidth")?;
1185 bw
1186 }
1187 None => {
1188 let std_dev = if ndata > 16 {
1190 let sum = F::simd_sum(data);
1191 let mean = sum / F::from(ndata).unwrap();
1192 let mean_vec = Array1::from_elem(ndata, mean);
1193 let centered = F::simd_sub(data, &mean_vec.view());
1194 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
1195 let variance = F::simd_sum(&squared.view()) / F::from(ndata - 1).unwrap();
1196 variance.sqrt()
1197 } else {
1198 let mean = data.iter().copied().sum::<F>() / F::from(ndata).unwrap();
1199 let variance = data.iter().map(|&x| (x - mean) * (x - mean)).sum::<F>()
1200 / F::from(ndata - 1).unwrap();
1201 variance.sqrt()
1202 };
1203
1204 let scott_factor = F::from(ndata as f64).unwrap().powf(F::from(-0.2).unwrap());
1205 std_dev * scott_factor
1206 }
1207 };
1208
1209 let mut density = Array1::zeros(n_eval);
1210 let ndata_f = F::from(ndata).unwrap();
1211 let normalization = F::one() / (ndata_f * h);
1212
1213 {
1215 for (eval_idx, &eval_point) in eval_points.iter().enumerate() {
1216 let mut density_sum = F::zero();
1217
1218 if ndata > 16 {
1219 let eval_vec = Array1::from_elem(ndata, eval_point);
1221 let differences = F::simd_sub(data, &eval_vec.view());
1222 let h_vec = Array1::from_elem(ndata, h);
1223 let standardized = F::simd_div(&differences.view(), &h_vec.view());
1224
1225 for &z in standardized.iter() {
1226 density_sum = density_sum + kernel_function(z, &kernel);
1227 }
1228 } else {
1229 for &data_point in data.iter() {
1231 let z = (data_point - eval_point) / h;
1232 density_sum = density_sum + kernel_function(z, &kernel);
1233 }
1234 }
1235
1236 density[eval_idx] = density_sum * normalization;
1237 }
1238 }
1239
1240 Ok(density)
1241}
1242
1243#[derive(Debug, Clone, PartialEq)]
1245pub enum KernelType {
1246 Gaussian,
1247 Epanechnikov,
1248 Uniform,
1249 Triangle,
1250 Biweight,
1251 Triweight,
1252 Cosine,
1253}
1254
1255#[allow(dead_code)]
1256fn kernel_function<F>(z: F, kernel: &KernelType) -> F
1257where
1258 F: Float + NumCast + std::fmt::Display,
1259{
1260 match kernel {
1261 KernelType::Gaussian => {
1262 let sqrt_2pi = F::from(2.5066282746310005).unwrap(); let z_squared = z * z;
1265 let exp_term = (-z_squared / F::from(2.0).unwrap()).exp();
1266 exp_term / sqrt_2pi
1267 }
1268 KernelType::Epanechnikov => {
1269 let abs_z = z.abs();
1271 if abs_z <= F::one() {
1272 let z_squared = z * z;
1273 F::from(0.75).unwrap() * (F::one() - z_squared)
1274 } else {
1275 F::zero()
1276 }
1277 }
1278 KernelType::Uniform => {
1279 let abs_z = z.abs();
1281 if abs_z <= F::one() {
1282 F::from(0.5).unwrap()
1283 } else {
1284 F::zero()
1285 }
1286 }
1287 KernelType::Triangle => {
1288 let abs_z = z.abs();
1290 if abs_z <= F::one() {
1291 F::one() - abs_z
1292 } else {
1293 F::zero()
1294 }
1295 }
1296 KernelType::Biweight => {
1297 let abs_z = z.abs();
1299 if abs_z <= F::one() {
1300 let z_squared = z * z;
1301 let term = F::one() - z_squared;
1302 F::from(15.0 / 16.0).unwrap() * term * term
1303 } else {
1304 F::zero()
1305 }
1306 }
1307 KernelType::Triweight => {
1308 let abs_z = z.abs();
1310 if abs_z <= F::one() {
1311 let z_squared = z * z;
1312 let term = F::one() - z_squared;
1313 F::from(35.0 / 32.0).unwrap() * term * term * term
1314 } else {
1315 F::zero()
1316 }
1317 }
1318 KernelType::Cosine => {
1319 let abs_z = z.abs();
1321 if abs_z <= F::one() {
1322 let pi = F::from(std::f64::consts::PI).unwrap();
1323 let pi_4 = pi / F::from(4.0).unwrap();
1324 let pi_z_2 = pi * z / F::from(2.0).unwrap();
1325 pi_4 * pi_z_2.cos()
1326 } else {
1327 F::zero()
1328 }
1329 }
1330 }
1331}