1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis};
8use scirs2_core::numeric::{Float, NumCast, One, Zero};
9use scirs2_core::{simd_ops::SimdUnifiedOps, validation::*};
10use statrs::statistics::Statistics;
11
12#[allow(dead_code)]
17pub fn comprehensive_stats_simd<F>(data: &ArrayView1<F>) -> StatsResult<ComprehensiveStats<F>>
18where
19 F: Float
20 + NumCast
21 + SimdUnifiedOps
22 + Zero
23 + One
24 + std::fmt::Display
25 + std::iter::Sum<F>
26 + scirs2_core::numeric::FromPrimitive,
27{
28 checkarray_finite(data, "data")?;
29
30 if data.is_empty() {
31 return Err(StatsError::InvalidArgument(
32 "Data array cannot be empty".to_string(),
33 ));
34 }
35
36 let n = data.len();
37 let n_f = F::from(n).expect("Failed to convert to float");
38
39 let (sum, sum_sq, min_val, max_val) = if n > 32 {
41 let sum = F::simd_sum(&data.view());
43 let sqdata = F::simd_mul(&data.view(), &data.view());
44 let sum_sq = F::simd_sum(&sqdata.view());
45 let min_val = F::simd_min_element(&data.view());
46 let max_val = F::simd_max_element(&data.view());
47 (sum, sum_sq, min_val, max_val)
48 } else {
49 let sum = data.iter().fold(F::zero(), |acc, &x| acc + x);
51 let sum_sq = data.iter().fold(F::zero(), |acc, &x| acc + x * x);
52 let min_val = data
53 .iter()
54 .fold(data[0], |acc, &x| if x < acc { x } else { acc });
55 let max_val = data
56 .iter()
57 .fold(data[0], |acc, &x| if x > acc { x } else { acc });
58 (sum, sum_sq, min_val, max_val)
59 };
60
61 let mean = sum / n_f;
62 let variance = if n > 1 {
63 let n_minus_1 = F::from(n - 1).expect("Failed to convert to float");
64 (sum_sq - sum * sum / n_f) / n_minus_1
65 } else {
66 F::zero()
67 };
68 let std_dev = variance.sqrt();
69 let range = max_val - min_val;
70
71 let (sum_cubed_dev, sum_fourth_dev) = if n > 32 {
73 let mean_vec = Array1::from_elem(n, mean);
75 let centered = F::simd_sub(&data.view(), &mean_vec.view());
76 let centered_sq = F::simd_mul(¢ered.view(), ¢ered.view());
77 let centered_cubed = F::simd_mul(¢ered_sq.view(), ¢ered.view());
78 let centered_fourth = F::simd_mul(¢ered_sq.view(), ¢ered_sq.view());
79
80 let sum_cubed_dev = F::simd_sum(¢ered_cubed.view());
81 let sum_fourth_dev = F::simd_sum(¢ered_fourth.view());
82 (sum_cubed_dev, sum_fourth_dev)
83 } else {
84 let mut sum_cubed_dev = F::zero();
86 let mut sum_fourth_dev = F::zero();
87 for &x in data.iter() {
88 let dev = x - mean;
89 let dev_sq = dev * dev;
90 sum_cubed_dev = sum_cubed_dev + dev_sq * dev;
91 sum_fourth_dev = sum_fourth_dev + dev_sq * dev_sq;
92 }
93 (sum_cubed_dev, sum_fourth_dev)
94 };
95
96 let skewness = if std_dev > F::zero() {
97 sum_cubed_dev / (n_f * std_dev.powi(3))
98 } else {
99 F::zero()
100 };
101
102 let kurtosis = if variance > F::zero() {
103 (sum_fourth_dev / (n_f * variance * variance))
104 - F::from(3.0).expect("Failed to convert constant to float")
105 } else {
106 F::zero()
107 };
108
109 Ok(ComprehensiveStats {
110 count: n,
111 mean,
112 variance,
113 std_dev,
114 min: min_val,
115 max: max_val,
116 range,
117 skewness,
118 kurtosis,
119 sum,
120 })
121}
122
123#[derive(Debug, Clone)]
125pub struct ComprehensiveStats<F> {
126 pub count: usize,
127 pub mean: F,
128 pub variance: F,
129 pub std_dev: F,
130 pub min: F,
131 pub max: F,
132 pub range: F,
133 pub skewness: F,
134 pub kurtosis: F,
135 pub sum: F,
136}
137
138#[allow(dead_code)]
143pub fn sliding_window_stats_simd<F>(
144 data: &ArrayView1<F>,
145 windowsize: usize,
146) -> StatsResult<SlidingWindowStats<F>>
147where
148 F: Float
149 + NumCast
150 + SimdUnifiedOps
151 + Zero
152 + One
153 + std::fmt::Display
154 + std::iter::Sum<F>
155 + scirs2_core::numeric::FromPrimitive,
156{
157 checkarray_finite(data, "data")?;
158 check_positive(windowsize, "windowsize")?;
159
160 if windowsize > data.len() {
161 return Err(StatsError::InvalidArgument(
162 "Window size cannot be larger than data length".to_string(),
163 ));
164 }
165
166 let n_windows = data.len() - windowsize + 1;
167 let mut means = Array1::zeros(n_windows);
168 let mut variances = Array1::zeros(n_windows);
169 let mut mins = Array1::zeros(n_windows);
170 let mut maxs = Array1::zeros(n_windows);
171
172 let windowsize_f = F::from(windowsize).expect("Failed to convert to float");
173
174 for i in 0..n_windows {
176 let window = data.slice(scirs2_core::ndarray::s![i..i + windowsize]);
177
178 if windowsize > 16 {
180 let sum = F::simd_sum(&window);
181 let mean = sum / windowsize_f;
182 means[i] = mean;
183
184 let sqdata = F::simd_mul(&window, &window);
185 let sum_sq = F::simd_sum(&sqdata.view());
186 let variance = if windowsize > 1 {
187 let n_minus_1 = F::from(windowsize - 1).expect("Failed to convert to float");
188 (sum_sq - sum * sum / windowsize_f) / n_minus_1
189 } else {
190 F::zero()
191 };
192 variances[i] = variance;
193
194 mins[i] = F::simd_min_element(&window);
195 maxs[i] = F::simd_max_element(&window);
196 } else {
197 let sum: F = window.iter().copied().sum();
199 let mean = sum / windowsize_f;
200 means[i] = mean;
201
202 let sum_sq: F = window.iter().map(|&x| x * x).sum();
203 let variance = if windowsize > 1 {
204 let n_minus_1 = F::from(windowsize - 1).expect("Failed to convert to float");
205 (sum_sq - sum * sum / windowsize_f) / n_minus_1
206 } else {
207 F::zero()
208 };
209 variances[i] = variance;
210
211 mins[i] = window.iter().copied().fold(window[0], F::min);
212 maxs[i] = window.iter().copied().fold(window[0], F::max);
213 }
214 }
215
216 Ok(SlidingWindowStats {
217 windowsize,
218 means,
219 variances,
220 mins,
221 maxs,
222 })
223}
224
225#[derive(Debug, Clone)]
227pub struct SlidingWindowStats<F> {
228 pub windowsize: usize,
229 pub means: Array1<F>,
230 pub variances: Array1<F>,
231 pub mins: Array1<F>,
232 pub maxs: Array1<F>,
233}
234
235#[allow(dead_code)]
239pub fn covariance_matrix_simd<F>(data: &ArrayView2<F>) -> StatsResult<Array2<F>>
240where
241 F: Float
242 + NumCast
243 + SimdUnifiedOps
244 + Zero
245 + One
246 + std::fmt::Display
247 + std::iter::Sum<F>
248 + scirs2_core::numeric::FromPrimitive,
249{
250 checkarray_finite(data, "data")?;
251
252 let (n_samples_, n_features) = data.dim();
253
254 if n_samples_ < 2 {
255 return Err(StatsError::InvalidArgument(
256 "At least 2 samples required for covariance".to_string(),
257 ));
258 }
259
260 let means = if n_samples_ > 32 {
262 let n_samples_f = F::from(n_samples_).expect("Failed to convert to float");
263 let mut feature_means = Array1::zeros(n_features);
264
265 for j in 0..n_features {
266 let column = data.column(j);
267 feature_means[j] = F::simd_sum(&column) / n_samples_f;
268 }
269 feature_means
270 } else {
271 data.mean_axis(Axis(0)).expect("Operation failed")
273 };
274
275 let mut centereddata = Array2::zeros((n_samples_, n_features));
277 for j in 0..n_features {
278 let column = data.column(j);
279 let mean_vec = Array1::from_elem(n_samples_, means[j]);
280
281 if n_samples_ > 32 {
282 let centered_column = F::simd_sub(&column, &mean_vec.view());
283 centereddata.column_mut(j).assign(¢ered_column);
284 } else {
285 for i in 0..n_samples_ {
286 centereddata[(i, j)] = column[i] - means[j];
287 }
288 }
289 }
290
291 let mut cov_matrix = Array2::zeros((n_features, n_features));
293 let n_minus_1 = F::from(n_samples_ - 1).expect("Failed to convert to float");
294
295 for i in 0..n_features {
296 for j in i..n_features {
297 let col_i = centereddata.column(i);
298 let col_j = centereddata.column(j);
299
300 let covariance = if n_samples_ > 32 {
301 let products = F::simd_mul(&col_i, &col_j);
302 F::simd_sum(&products.view()) / n_minus_1
303 } else {
304 col_i
305 .iter()
306 .zip(col_j.iter())
307 .map(|(&x, &y)| x * y)
308 .sum::<F>()
309 / n_minus_1
310 };
311
312 cov_matrix[(i, j)] = covariance;
313 if i != j {
314 cov_matrix[(j, i)] = covariance; }
316 }
317 }
318
319 Ok(cov_matrix)
320}
321
322#[allow(dead_code)]
326pub fn quantiles_batch_simd<F>(data: &ArrayView1<F>, quantiles: &[f64]) -> StatsResult<Array1<F>>
327where
328 F: Float + NumCast + SimdUnifiedOps + PartialOrd + Copy + std::fmt::Display + std::iter::Sum<F>,
329{
330 checkarray_finite(data, "data")?;
331
332 if data.is_empty() {
333 return Err(StatsError::InvalidArgument(
334 "Data array cannot be empty".to_string(),
335 ));
336 }
337
338 for &q in quantiles {
339 if !(0.0..=1.0).contains(&q) {
340 return Err(StatsError::InvalidArgument(
341 "Quantiles must be between 0 and 1".to_string(),
342 ));
343 }
344 }
345
346 let mut sorteddata = data.to_owned();
348 sorteddata
349 .as_slice_mut()
350 .expect("Operation failed")
351 .sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
352
353 let n = sorteddata.len();
354 let mut results = Array1::zeros(quantiles.len());
355
356 for (i, &q) in quantiles.iter().enumerate() {
357 if q == 0.0 {
358 results[i] = sorteddata[0];
359 } else if q == 1.0 {
360 results[i] = sorteddata[n - 1];
361 } else {
362 let pos = q * (n - 1) as f64;
364 let lower_idx = pos.floor() as usize;
365 let upper_idx = (lower_idx + 1).min(n - 1);
366 let weight = F::from(pos - lower_idx as f64).expect("Failed to convert to float");
367
368 let lower_val = sorteddata[lower_idx];
369 let upper_val = sorteddata[upper_idx];
370
371 results[i] = lower_val + weight * (upper_val - lower_val);
372 }
373 }
374
375 Ok(results)
376}
377
378#[allow(dead_code)]
383pub fn exponential_moving_average_simd<F>(data: &ArrayView1<F>, alpha: F) -> StatsResult<Array1<F>>
384where
385 F: Float
386 + NumCast
387 + SimdUnifiedOps
388 + Zero
389 + One
390 + std::fmt::Display
391 + std::iter::Sum<F>
392 + scirs2_core::numeric::FromPrimitive,
393{
394 checkarray_finite(data, "data")?;
395
396 if data.is_empty() {
397 return Err(StatsError::InvalidArgument(
398 "Data array cannot be empty".to_string(),
399 ));
400 }
401
402 if alpha <= F::zero() || alpha > F::one() {
403 return Err(StatsError::InvalidArgument(
404 "Alpha must be between 0 and 1".to_string(),
405 ));
406 }
407
408 let n = data.len();
409 let mut ema = Array1::zeros(n);
410 ema[0] = data[0];
411
412 let one_minus_alpha = F::one() - alpha;
413
414 if n > 64 {
416 for i in 1..n {
418 ema[i] = alpha * data[i] + one_minus_alpha * ema[i - 1];
420 }
421 } else {
422 for i in 1..n {
424 ema[i] = alpha * data[i] + one_minus_alpha * ema[i - 1];
425 }
426 }
427
428 Ok(ema)
429}
430
431#[allow(dead_code)]
435pub fn batch_normalize_simd<F>(data: &ArrayView2<F>, axis: Option<usize>) -> StatsResult<Array2<F>>
436where
437 F: Float
438 + NumCast
439 + SimdUnifiedOps
440 + Zero
441 + One
442 + std::fmt::Display
443 + scirs2_core::numeric::FromPrimitive,
444{
445 checkarray_finite(data, "data")?;
446
447 let (n_samples_, n_features) = data.dim();
448
449 if n_samples_ == 0 || n_features == 0 {
450 return Err(StatsError::InvalidArgument(
451 "Data matrix cannot be empty".to_string(),
452 ));
453 }
454
455 let mut normalized = data.to_owned();
456
457 match axis {
458 Some(0) | None => {
459 for j in 0..n_features {
461 let column = data.column(j);
462
463 let (mean, std_dev) = if n_samples_ > 32 {
464 let sum = F::simd_sum(&column);
466 let mean = sum / F::from(n_samples_).expect("Failed to convert to float");
467
468 let mean_vec = Array1::from_elem(n_samples_, mean);
469 let centered = F::simd_sub(&column, &mean_vec.view());
470 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
471 let variance = F::simd_sum(&squared.view())
472 / F::from(n_samples_ - 1).expect("Failed to convert to float");
473 let std_dev = variance.sqrt();
474
475 (mean, std_dev)
476 } else {
477 let mean = column.mean().expect("Operation failed");
479 let variance = column.var(F::one()); let std_dev = variance.sqrt();
481 (mean, std_dev)
482 };
483
484 if std_dev > F::zero() {
486 for i in 0..n_samples_ {
487 normalized[(i, j)] = (data[(i, j)] - mean) / std_dev;
488 }
489 }
490 }
491 }
492 Some(1) => {
493 for i in 0..n_samples_ {
495 let row = data.row(i);
496
497 let (mean, std_dev) = if n_features > 32 {
498 let sum = F::simd_sum(&row);
500 let mean = sum / F::from(n_features).expect("Failed to convert to float");
501
502 let mean_vec = Array1::from_elem(n_features, mean);
503 let centered = F::simd_sub(&row, &mean_vec.view());
504 let squared = F::simd_mul(¢ered.view(), ¢ered.view());
505 let variance = F::simd_sum(&squared.view())
506 / F::from(n_features - 1).expect("Failed to convert to float");
507 let std_dev = variance.sqrt();
508
509 (mean, std_dev)
510 } else {
511 let mean = row.mean().expect("Operation failed");
513 let variance = row.var(F::one()); let std_dev = variance.sqrt();
515 (mean, std_dev)
516 };
517
518 if std_dev > F::zero() {
520 for j in 0..n_features {
521 normalized[(i, j)] = (data[(i, j)] - mean) / std_dev;
522 }
523 }
524 }
525 }
526 Some(_) => {
527 return Err(StatsError::InvalidArgument(
528 "Axis must be 0 or 1 for 2D arrays".to_string(),
529 ));
530 }
531 }
532
533 Ok(normalized)
534}
535
536#[allow(dead_code)]
540pub fn outlier_detection_zscore_simd<F>(
541 data: &ArrayView1<F>,
542 threshold: F,
543) -> StatsResult<(Array1<bool>, ComprehensiveStats<F>)>
544where
545 F: Float
546 + NumCast
547 + SimdUnifiedOps
548 + Zero
549 + One
550 + PartialOrd
551 + std::fmt::Display
552 + std::iter::Sum<F>
553 + scirs2_core::numeric::FromPrimitive,
554{
555 let stats = comprehensive_stats_simd(data)?;
556
557 if stats.std_dev <= F::zero() {
558 let outliers = Array1::from_elem(data.len(), false);
560 return Ok((outliers, stats));
561 }
562
563 let n = data.len();
564 let mut outliers = Array1::from_elem(n, false);
565
566 if n > 32 {
568 let mean_vec = Array1::from_elem(n, stats.mean);
570 let std_vec = Array1::from_elem(n, stats.std_dev);
571
572 let centered = F::simd_sub(&data.view(), &mean_vec.view());
573 let z_scores = F::simd_div(¢ered.view(), &std_vec.view());
574
575 for (i, &z_score) in z_scores.iter().enumerate() {
576 outliers[i] = z_score.abs() > threshold;
577 }
578 } else {
579 for (i, &value) in data.iter().enumerate() {
581 let z_score = (value - stats.mean) / stats.std_dev;
582 outliers[i] = z_score.abs() > threshold;
583 }
584 }
585
586 Ok((outliers, stats))
587}
588
589#[allow(dead_code)]
593pub fn robust_statistics_simd<F>(data: &ArrayView1<F>) -> StatsResult<RobustStats<F>>
594where
595 F: Float + NumCast + SimdUnifiedOps + PartialOrd + Copy + std::fmt::Display,
596{
597 checkarray_finite(data, "data")?;
598
599 if data.is_empty() {
600 return Err(StatsError::InvalidArgument(
601 "Data array cannot be empty".to_string(),
602 ));
603 }
604
605 let mut sorteddata = data.to_owned();
607 sorteddata
608 .as_slice_mut()
609 .expect("Operation failed")
610 .sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
611
612 let n = sorteddata.len();
613
614 let median = if n % 2 == 1 {
616 sorteddata[n / 2]
617 } else {
618 let mid1 = sorteddata[n / 2 - 1];
619 let mid2 = sorteddata[n / 2];
620 (mid1 + mid2) / F::from(2.0).expect("Failed to convert constant to float")
621 };
622
623 let mut deviations = Array1::zeros(n);
625
626 if n > 32 {
627 let median_vec = Array1::from_elem(n, median);
629 let centered = F::simd_sub(&data.view(), &median_vec.view());
630 deviations = F::simd_abs(¢ered.view());
631 } else {
632 for (i, &value) in data.iter().enumerate() {
634 deviations[i] = (value - median).abs();
635 }
636 }
637
638 deviations
640 .as_slice_mut()
641 .expect("Operation failed")
642 .sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
643
644 let mad = if n % 2 == 1 {
645 deviations[n / 2]
646 } else {
647 let mid1 = deviations[n / 2 - 1];
648 let mid2 = deviations[n / 2];
649 (mid1 + mid2) / F::from(2.0).expect("Failed to convert constant to float")
650 };
651
652 let mad_scaled = mad * F::from(1.4826).expect("Failed to convert constant to float");
654
655 let q1_idx = (n as f64 * 0.25) as usize;
657 let q3_idx = (n as f64 * 0.75) as usize;
658 let q1 = sorteddata[q1_idx.min(n - 1)];
659 let q3 = sorteddata[q3_idx.min(n - 1)];
660 let iqr = q3 - q1;
661
662 Ok(RobustStats {
663 median,
664 mad,
665 mad_scaled,
666 q1,
667 q3,
668 iqr,
669 min: sorteddata[0],
670 max: sorteddata[n - 1],
671 })
672}
673
674#[derive(Debug, Clone)]
676pub struct RobustStats<F> {
677 pub median: F,
678 pub mad: F, pub mad_scaled: F, pub q1: F, pub q3: F, pub iqr: F, pub min: F,
684 pub max: F,
685}