1use crate::error::{StatsError, StatsResult};
7use scirs2_core::ndarray::ArrayView1;
8use scirs2_core::numeric::Float;
9use std::collections::HashMap;
10use std::fmt::Debug;
11use std::hash::Hash;
12use std::iter::Sum;
13
14pub enum ModeMethod {
16 Unimodal,
18 MultiModal,
20}
21
22#[derive(Debug, Clone)]
24pub struct Mode<T>
25where
26 T: Copy + Debug,
27{
28 pub values: Vec<T>,
30 pub counts: Vec<usize>,
32}
33
34#[allow(dead_code)]
64pub fn mode<T>(x: &ArrayView1<T>, method: ModeMethod) -> StatsResult<Mode<T>>
65where
66 T: Copy + Eq + Hash + Debug + Ord,
67{
68 if x.is_empty() {
69 return Err(StatsError::InvalidArgument(
70 "Empty array provided".to_string(),
71 ));
72 }
73
74 let mut counts: HashMap<T, usize> = HashMap::new();
76 for &value in x.iter() {
77 *counts.entry(value).or_insert(0) += 1;
78 }
79
80 let max_count = counts.values().cloned().max().unwrap_or(0);
82
83 match method {
84 ModeMethod::Unimodal => {
85 let mode_value = counts
87 .iter()
88 .filter(|(_, &count)| count == max_count)
89 .map(|(&value_, _)| value_)
90 .min()
91 .ok_or_else(|| StatsError::InvalidArgument("Failed to compute mode".to_string()))?;
92
93 Ok(Mode {
94 values: vec![mode_value],
95 counts: vec![max_count],
96 })
97 }
98 ModeMethod::MultiModal => {
99 let mut mode_values: Vec<T> = counts
101 .iter()
102 .filter(|(_, &count)| count == max_count)
103 .map(|(&value_, _)| value_)
104 .collect();
105
106 mode_values.sort();
108
109 let mode_counts = vec![max_count; mode_values.len()];
110
111 Ok(Mode {
112 values: mode_values,
113 counts: mode_counts,
114 })
115 }
116 }
117}
118
119#[allow(dead_code)]
147pub fn entropy<T>(x: &ArrayView1<T>, base: Option<f64>) -> StatsResult<f64>
148where
149 T: Eq + Hash + Copy,
150{
151 if x.is_empty() {
152 return Err(StatsError::InvalidArgument(
153 "Empty array provided".to_string(),
154 ));
155 }
156
157 let base_val = base.unwrap_or(std::f64::consts::E);
158 if base_val <= 0.0 {
159 return Err(StatsError::InvalidArgument(
160 "Base must be positive".to_string(),
161 ));
162 }
163
164 let mut counts: HashMap<T, usize> = HashMap::new();
166 for &value in x.iter() {
167 *counts.entry(value).or_insert(0) += 1;
168 }
169
170 let n = x.len() as f64;
171
172 let mut entropy = 0.0;
174 for (_, &count) in counts.iter() {
175 let p = count as f64 / n;
176 if p > 0.0 {
177 entropy -= p * p.log(base_val);
178 }
179 }
180
181 Ok(entropy)
182}
183
184#[allow(dead_code)]
213pub fn kl_divergence<F>(p: &ArrayView1<F>, q: &ArrayView1<F>) -> StatsResult<F>
214where
215 F: Float + std::fmt::Debug + Sum + std::fmt::Display,
216{
217 if p.is_empty() || q.is_empty() {
218 return Err(StatsError::InvalidArgument(
219 "Empty array provided".to_string(),
220 ));
221 }
222
223 if p.len() != q.len() {
224 return Err(StatsError::DimensionMismatch(format!(
225 "Distributions must have same length, got p({}) and q({})",
226 p.len(),
227 q.len()
228 )));
229 }
230
231 let p_sum: F = p.iter().cloned().sum();
233 let q_sum: F = q.iter().cloned().sum();
234
235 let one = F::one();
236 let tol = F::from(1e-10).expect("Failed to convert constant to float");
237
238 if (p_sum - one).abs() > tol || (q_sum - one).abs() > tol {
239 return Err(StatsError::InvalidArgument(
240 "Inputs must be valid probability distributions that sum to 1".to_string(),
241 ));
242 }
243
244 let mut divergence = F::zero();
246
247 for (p_i, q_i) in p.iter().zip(q.iter()) {
248 if *p_i < F::zero() || *q_i < F::zero() {
249 return Err(StatsError::InvalidArgument(
250 "Probability values must be non-negative".to_string(),
251 ));
252 }
253
254 if *p_i > F::zero() {
255 if *q_i <= F::zero() {
256 return Err(StatsError::DomainError(
257 "KL divergence undefined when q_i = 0 and p_i > 0".to_string(),
258 ));
259 }
260
261 let ratio = *p_i / *q_i;
262 divergence = divergence + *p_i * ratio.ln();
263 }
264 }
265
266 Ok(divergence)
267}
268
269#[allow(dead_code)]
293pub fn cross_entropy<F>(p: &ArrayView1<F>, q: &ArrayView1<F>) -> StatsResult<F>
294where
295 F: Float + std::fmt::Debug + Sum + std::fmt::Display,
296{
297 if p.is_empty() || q.is_empty() {
298 return Err(StatsError::InvalidArgument(
299 "Empty array provided".to_string(),
300 ));
301 }
302
303 if p.len() != q.len() {
304 return Err(StatsError::DimensionMismatch(format!(
305 "Distributions must have same length, got p({}) and q({})",
306 p.len(),
307 q.len()
308 )));
309 }
310
311 let p_sum: F = p.iter().cloned().sum();
313 let q_sum: F = q.iter().cloned().sum();
314
315 let one = F::one();
316 let tol = F::from(1e-10).expect("Failed to convert constant to float");
317
318 if (p_sum - one).abs() > tol || (q_sum - one).abs() > tol {
319 return Err(StatsError::InvalidArgument(
320 "Inputs must be valid probability distributions that sum to 1".to_string(),
321 ));
322 }
323
324 let mut cross_ent = F::zero();
326
327 for (p_i, q_i) in p.iter().zip(q.iter()) {
328 if *p_i < F::zero() || *q_i < F::zero() {
329 return Err(StatsError::InvalidArgument(
330 "Probability values must be non-negative".to_string(),
331 ));
332 }
333
334 if *p_i > F::zero() {
335 if *q_i <= F::zero() {
336 return Err(StatsError::DomainError(
337 "Cross-entropy undefined when q_i = 0 and p_i > 0".to_string(),
338 ));
339 }
340
341 cross_ent = cross_ent - *p_i * q_i.ln();
342 }
343 }
344
345 Ok(cross_ent)
346}
347
348#[derive(Debug, Clone, Copy)]
350pub struct ConfidenceInterval<F>
351where
352 F: Float + std::fmt::Display,
353{
354 pub estimate: F,
356 pub lower: F,
358 pub upper: F,
360 pub confidence: F,
362}
363
364#[allow(dead_code)]
390pub fn skewness_ci<F>(
391 x: &ArrayView1<F>,
392 bias: bool,
393 confidence: Option<F>,
394 n_bootstrap: Option<usize>,
395 seed: Option<u64>,
396) -> StatsResult<ConfidenceInterval<F>>
397where
398 F: Float
399 + std::iter::Sum<F>
400 + std::ops::Div<Output = F>
401 + std::fmt::Debug
402 + std::fmt::Display
403 + Send
404 + Sync
405 + scirs2_core::simd_ops::SimdUnifiedOps,
406{
407 use crate::sampling::bootstrap;
408 use crate::skew;
409
410 if x.is_empty() {
411 return Err(StatsError::InvalidArgument(
412 "Empty array provided".to_string(),
413 ));
414 }
415
416 if x.len() < 3 {
417 return Err(StatsError::DomainError(
418 "At least 3 data points required to calculate skewness".to_string(),
419 ));
420 }
421
422 let conf = confidence.unwrap_or(F::from(0.95).expect("Failed to convert constant to float"));
423 let n_boot = n_bootstrap.unwrap_or(1000);
424
425 if conf <= F::zero() || conf >= F::one() {
426 return Err(StatsError::InvalidArgument(
427 "Confidence level must be between 0 and 1 exclusive".to_string(),
428 ));
429 }
430
431 let estimate = skew(x, bias, None)?;
433
434 let samples = bootstrap(x, n_boot, seed)?;
436
437 let mut bootstrap_skew = Vec::with_capacity(n_boot);
439
440 for i in 0..n_boot {
441 let sample_view = samples.slice(scirs2_core::ndarray::s![i, ..]).to_owned();
442 if let Ok(sk) = skew(&sample_view.view(), bias, None) {
443 bootstrap_skew.push(sk);
444 }
445 }
446
447 bootstrap_skew.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
449
450 let alpha = (F::one() - conf) / (F::one() + F::one());
452 let lower_idx = (alpha * F::from(bootstrap_skew.len()).expect("Operation failed"))
453 .to_usize()
454 .expect("Operation failed");
455 let upper_idx = ((F::one() - alpha) * F::from(bootstrap_skew.len()).expect("Operation failed"))
456 .to_usize()
457 .expect("Operation failed");
458
459 let lower = bootstrap_skew.get(lower_idx).cloned().unwrap_or(F::zero());
461 let upper = bootstrap_skew.get(upper_idx).cloned().unwrap_or(F::zero());
462
463 Ok(ConfidenceInterval {
464 estimate,
465 lower,
466 upper,
467 confidence: conf,
468 })
469}
470
471#[allow(dead_code)]
498pub fn kurtosis_ci<F>(
499 x: &ArrayView1<F>,
500 fisher: bool,
501 bias: bool,
502 confidence: Option<F>,
503 n_bootstrap: Option<usize>,
504 seed: Option<u64>,
505) -> StatsResult<ConfidenceInterval<F>>
506where
507 F: Float
508 + std::iter::Sum<F>
509 + std::ops::Div<Output = F>
510 + std::fmt::Debug
511 + std::fmt::Display
512 + Send
513 + Sync
514 + scirs2_core::simd_ops::SimdUnifiedOps,
515{
516 use crate::kurtosis;
517 use crate::sampling::bootstrap;
518
519 if x.is_empty() {
520 return Err(StatsError::InvalidArgument(
521 "Empty array provided".to_string(),
522 ));
523 }
524
525 if x.len() < 4 {
526 return Err(StatsError::DomainError(
527 "At least 4 data points required to calculate kurtosis".to_string(),
528 ));
529 }
530
531 let conf = confidence.unwrap_or(F::from(0.95).expect("Failed to convert constant to float"));
532 let n_boot = n_bootstrap.unwrap_or(1000);
533
534 if conf <= F::zero() || conf >= F::one() {
535 return Err(StatsError::InvalidArgument(
536 "Confidence level must be between 0 and 1 exclusive".to_string(),
537 ));
538 }
539
540 let estimate = kurtosis(x, fisher, bias, None)?;
542
543 let samples = bootstrap(x, n_boot, seed)?;
545
546 let mut bootstrap_kurt = Vec::with_capacity(n_boot);
548
549 for i in 0..n_boot {
550 let sample_view = samples.slice(scirs2_core::ndarray::s![i, ..]).to_owned();
551 if let Ok(k) = kurtosis(&sample_view.view(), fisher, bias, None) {
552 bootstrap_kurt.push(k);
553 }
554 }
555
556 bootstrap_kurt.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
558
559 let alpha = (F::one() - conf) / (F::one() + F::one());
561 let lower_idx = (alpha * F::from(bootstrap_kurt.len()).expect("Operation failed"))
562 .to_usize()
563 .expect("Operation failed");
564 let upper_idx = ((F::one() - alpha) * F::from(bootstrap_kurt.len()).expect("Operation failed"))
565 .to_usize()
566 .expect("Operation failed");
567
568 let lower = bootstrap_kurt.get(lower_idx).cloned().unwrap_or(F::zero());
570 let upper = bootstrap_kurt.get(upper_idx).cloned().unwrap_or(F::zero());
571
572 Ok(ConfidenceInterval {
573 estimate,
574 lower,
575 upper,
576 confidence: conf,
577 })
578}
579
580#[cfg(test)]
581mod tests {
582 use super::*;
583 use crate::{kurtosis, skew};
584 use approx::assert_relative_eq;
585 use scirs2_core::ndarray::array;
586
587 #[test]
588 fn test_mode_unimodal() {
589 let data = array![1, 2, 2, 3, 2, 4, 5];
590 let result = mode(&data.view(), ModeMethod::Unimodal).expect("Operation failed");
591 assert_eq!(result.values.len(), 1);
592 assert_eq!(result.values[0], 2);
593 assert_eq!(result.counts[0], 3);
594 }
595
596 #[test]
597 fn test_mode_multimodal() {
598 let data = array![1, 2, 2, 3, 3, 4];
599 let result = mode(&data.view(), ModeMethod::MultiModal).expect("Operation failed");
600 assert_eq!(result.values.len(), 2);
601 assert_eq!(result.values, vec![2, 3]);
602 assert_eq!(result.counts, vec![2, 2]);
603 }
604
605 #[test]
606 fn test_entropy() {
607 let uniform = array![1, 2, 3, 4, 5, 6];
609 let entropy_uniform = entropy(&uniform.view(), Some(2.0)).expect("Operation failed");
610 assert_relative_eq!(entropy_uniform, 2.58496, epsilon = 1e-5);
611
612 let less_uniform = array![1, 1, 1, 2, 3, 4];
614 let entropy_less = entropy(&less_uniform.view(), Some(2.0)).expect("Operation failed");
615 assert!(entropy_less < entropy_uniform);
616
617 let single = array![1, 1, 1, 1, 1];
619 let entropy_single = entropy(&single.view(), Some(2.0)).expect("Operation failed");
620 assert_relative_eq!(entropy_single, 0.0, epsilon = 1e-10);
621 }
622
623 #[test]
624 fn test_kl_divergence() {
625 let p = array![0.5f64, 0.5];
627 let q = array![0.9f64, 0.1];
628
629 let div = kl_divergence(&p.view(), &q.view()).expect("Operation failed");
630 assert_relative_eq!(div, 0.5108256238, epsilon = 1e-10);
631
632 let div_reverse = kl_divergence(&q.view(), &p.view()).expect("Operation failed");
634 assert!(div != div_reverse);
635
636 let same = kl_divergence(&p.view(), &p.view()).expect("Operation failed");
638 assert_relative_eq!(same, 0.0, epsilon = 1e-10);
639 }
640
641 #[test]
642 fn test_cross_entropy() {
643 let p = array![0.5f64, 0.5];
645 let q = array![0.9f64, 0.1];
646
647 let cross_ent = cross_entropy(&p.view(), &q.view()).expect("Operation failed");
648
649 let entropy_p = -0.5f64 * (0.5f64.ln()) - 0.5 * (0.5f64.ln());
651 let kl = kl_divergence(&p.view(), &q.view()).expect("Operation failed");
652
653 assert_relative_eq!(cross_ent, entropy_p + kl, epsilon = 1e-10);
654 }
655
656 #[test]
657 fn test_skewness_ci() {
658 let data = array![1.0f64, 2.0, 3.0, 4.0, 5.0, 10.0];
659 let result =
660 skewness_ci(&data.view(), false, None, Some(100), Some(42)).expect("Operation failed");
661
662 let direct_skew = skew(&data.view(), false, None).expect("Operation failed");
664 assert_relative_eq!(result.estimate, direct_skew, epsilon = 1e-10);
665
666 assert!(result.lower <= result.estimate);
668 assert!(result.upper >= result.estimate);
669
670 assert_relative_eq!(result.confidence, 0.95, epsilon = 1e-10);
672 }
673
674 #[test]
675 fn test_kurtosis_ci() {
676 let data = array![1.0f64, 2.0, 3.0, 4.0, 5.0, 10.0];
677 let result = kurtosis_ci(&data.view(), true, false, None, Some(100), Some(42))
678 .expect("Operation failed");
679
680 let direct_kurt = kurtosis(&data.view(), true, false, None).expect("Operation failed");
682 assert_relative_eq!(result.estimate, direct_kurt, epsilon = 1e-10);
683
684 assert!(result.lower <= result.estimate);
686 assert!(result.upper >= result.estimate);
687
688 assert_relative_eq!(result.confidence, 0.95, epsilon = 1e-10);
690 }
691}