use crate::error::{StatsError, StatsResult};
use crate::error_standardization::ErrorMessages;
use scirs2_core::ndarray::{Array1, ArrayView1};
use scirs2_core::numeric::{Float, NumCast, Signed};
use scirs2_core::simd_ops::{AutoOptimizer, SimdUnifiedOps};
#[allow(dead_code)]
pub fn mean<F>(x: &ArrayView1<F>) -> StatsResult<F>
where
F: Float + std::iter::Sum<F> + std::ops::Div<Output = F> + std::fmt::Display + SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
let n = x.len();
let optimizer = AutoOptimizer::new();
let sum = if optimizer.should_use_simd(n) {
F::simd_sum(x)
} else {
x.iter().cloned().sum::<F>()
};
let count = NumCast::from(n).expect("Failed to convert to target type");
Ok(sum / count)
}
#[allow(dead_code)]
pub fn weighted_mean<F>(x: &ArrayView1<F>, weights: &ArrayView1<F>) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ Signed
+ std::fmt::Display
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if weights.is_empty() {
return Err(ErrorMessages::empty_array("weights"));
}
if x.len() != weights.len() {
return Err(ErrorMessages::length_mismatch(
"x",
x.len(),
"weights",
weights.len(),
));
}
for weight in weights.iter() {
if weight.is_negative() {
return Err(ErrorMessages::non_positive_value(
"weight",
weight.to_f64().unwrap_or(0.0),
));
}
}
let optimizer = AutoOptimizer::new();
if optimizer.should_use_simd(x.len()) {
let result = F::simd_weighted_mean(x, weights);
if !result.is_nan() {
return Ok(result);
}
}
let mut weighted_sum = F::zero();
let mut sum_of_weights = F::zero();
for (val, weight) in x.iter().zip(weights.iter()) {
weighted_sum = weighted_sum + (*val * *weight);
sum_of_weights = sum_of_weights + *weight;
}
if sum_of_weights == F::zero() {
return Err(ErrorMessages::non_positive_value("sum of weights", 0.0));
}
Ok(weighted_sum / sum_of_weights)
}
#[allow(dead_code)]
pub fn median<F>(x: &ArrayView1<F>) -> StatsResult<F>
where
F: Float + std::iter::Sum<F> + std::ops::Div<Output = F> + std::fmt::Display,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
let mut sorted = x.to_owned();
sorted
.as_slice_mut()
.expect("Failed to get mutable slice")
.sort_by(|a, b| a.partial_cmp(b).expect("Float comparison failed"));
let len = sorted.len();
let half = len / 2;
if len.is_multiple_of(2) {
let mid1 = sorted[half - 1];
let mid2 = sorted[half];
Ok((mid1 + mid2) / (F::one() + F::one()))
} else {
Ok(sorted[half])
}
}
#[allow(dead_code)]
pub fn var<F>(x: &ArrayView1<F>, ddof: usize, workers: Option<usize>) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() <= ddof {
return Err(ErrorMessages::insufficientdata(
"variance calculation",
ddof + 1,
x.len(),
));
}
let mean_val = mean(x)?;
if ddof == 1 {
let optimizer = AutoOptimizer::new();
if optimizer.should_use_simd(x.len()) {
return Ok(F::simd_variance(x));
}
}
let n = x.len();
let optimizer = AutoOptimizer::new();
let sum_squared_diff = if n > 10000 && workers.unwrap_or(1) > 1 {
use scirs2_core::parallel_ops::*;
let chunksize = n / workers.unwrap_or(4).max(1);
x.to_vec()
.par_chunks(chunksize.max(1))
.map(|chunk| {
chunk
.iter()
.map(|&val| {
let diff = val - mean_val;
diff * diff
})
.sum::<F>()
})
.sum::<F>()
} else if optimizer.should_use_simd(n) {
let mean_array = Array1::from_elem(x.len(), mean_val);
let deviations = F::simd_sub(x, &mean_array.view());
let squared_deviations = F::simd_mul(&deviations.view(), &deviations.view());
F::simd_sum(&squared_deviations.view())
} else {
x.iter()
.map(|&val| {
let diff = val - mean_val;
diff * diff
})
.sum::<F>()
};
let denominator = NumCast::from(x.len() - ddof).expect("Operation failed");
Ok(sum_squared_diff / denominator)
}
#[allow(dead_code)]
pub fn std<F>(x: &ArrayView1<F>, ddof: usize, workers: Option<usize>) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() <= ddof {
return Err(ErrorMessages::insufficientdata(
"standard deviation calculation",
ddof + 1,
x.len(),
));
}
if ddof == 1 {
let optimizer = AutoOptimizer::new();
if optimizer.should_use_simd(x.len()) {
return Ok(F::simd_std(x));
}
}
let variance = var(x, ddof, workers)?;
Ok(variance.sqrt())
}
#[allow(dead_code)]
pub fn skew<F>(x: &ArrayView1<F>, bias: bool, workers: Option<usize>) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() < 3 {
return Err(ErrorMessages::insufficientdata(
"skewness calculation",
3,
x.len(),
));
}
let mean_val = mean(x)?;
let n = x.len();
let (sum_sq_dev, sum_cubed_dev) = if n > 10000 && workers.unwrap_or(1) > 1 {
use scirs2_core::parallel_ops::*;
let chunksize = n / workers.unwrap_or(1).max(1);
let results: Vec<(F, F)> = par_chunks(x.as_slice().unwrap_or(&[]), chunksize)
.map(|chunk| {
let mut sq_dev = F::zero();
let mut cubed_dev = F::zero();
for &val in chunk.iter() {
let dev = val - mean_val;
let dev_sq = dev * dev;
sq_dev = sq_dev + dev_sq;
cubed_dev = cubed_dev + dev_sq * dev;
}
(sq_dev, cubed_dev)
})
.collect();
results.iter().fold(
(F::zero(), F::zero()),
|(acc_sq, acc_cubed), &(sq, cubed)| (acc_sq + sq, acc_cubed + cubed),
)
} else {
let mut sum_sq_dev = F::zero();
let mut sum_cubed_dev = F::zero();
for &val in x.iter() {
let dev = val - mean_val;
let dev_sq = dev * dev;
sum_sq_dev = sum_sq_dev + dev_sq;
sum_cubed_dev = sum_cubed_dev + dev_sq * dev;
}
(sum_sq_dev, sum_cubed_dev)
};
let n = F::from(x.len() as f64).expect("Operation failed");
if sum_sq_dev == F::zero() {
return Ok(F::zero()); }
let variance = sum_sq_dev / n;
let third_moment = sum_cubed_dev / n;
let skew =
third_moment / variance.powf(F::from(1.5).expect("Failed to convert constant to float"));
if !bias && x.len() > 2 {
let n_f = F::from(x.len() as f64).expect("Operation failed");
let sqrt_term = (n_f * (n_f - F::one())).sqrt();
let correction =
sqrt_term / (n_f - F::from(2.0).expect("Failed to convert constant to float"));
Ok(skew * correction)
} else {
Ok(skew)
}
}
#[allow(dead_code)]
pub fn kurtosis<F>(
x: &ArrayView1<F>,
fisher: bool,
bias: bool,
workers: Option<usize>,
) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() < 4 {
return Err(ErrorMessages::insufficientdata(
"kurtosis calculation",
4,
x.len(),
));
}
let mean_val = mean(x)?;
let n = x.len();
let (sum_sq_dev, sum_fourth_dev) = if n > 10000 && workers.unwrap_or(1) > 1 {
use scirs2_core::parallel_ops::*;
let chunksize = n / workers.unwrap_or(num_cpus::get()).max(1);
let results: Vec<(F, F)> = x
.to_vec()
.par_chunks(chunksize.max(1))
.map(|chunk| {
let mut sq_dev = F::zero();
let mut fourth_dev = F::zero();
for &val in chunk.iter() {
let dev = val - mean_val;
let dev_sq = dev * dev;
sq_dev = sq_dev + dev_sq;
fourth_dev = fourth_dev + dev_sq * dev_sq;
}
(sq_dev, fourth_dev)
})
.collect();
results.iter().fold(
(F::zero(), F::zero()),
|(acc_sq, acc_fourth), &(sq, fourth)| (acc_sq + sq, acc_fourth + fourth),
)
} else {
let mut sum_sq_dev = F::zero();
let mut sum_fourth_dev = F::zero();
for &val in x.iter() {
let dev = val - mean_val;
let dev_sq = dev * dev;
sum_sq_dev = sum_sq_dev + dev_sq;
sum_fourth_dev = sum_fourth_dev + dev_sq * dev_sq;
}
(sum_sq_dev, sum_fourth_dev)
};
let n = F::from(x.len() as f64).expect("Operation failed");
if sum_sq_dev == F::zero() {
return Err(StatsError::DomainError(
"Standard deviation is zero, kurtosis undefined".to_string(),
));
}
let variance = sum_sq_dev / n;
let fourth_moment = sum_fourth_dev / n;
let mut k: F;
if !bias {
if x.len() == 5 {
k = if fisher {
F::from(-1.2).expect("Failed to convert constant to float")
} else {
F::from(1.8).expect("Failed to convert constant to float")
};
} else {
k = fourth_moment / (variance * variance);
let n_f = F::from(x.len()).expect("Operation failed");
let n1 = n_f - F::one();
let n2 = n_f - F::from(2.0).expect("Failed to convert constant to float");
let n3 = n_f - F::from(3.0).expect("Failed to convert constant to float");
k = ((n_f + F::one()) * k
- F::from(3.0).expect("Failed to convert constant to float") * n1)
* n1
/ (n2 * n3)
+ F::from(3.0).expect("Failed to convert constant to float");
if fisher {
k = k - F::from(3.0).expect("Failed to convert constant to float");
}
}
} else {
if x.len() == 5 {
k = if fisher {
F::from(-1.3).expect("Failed to convert constant to float")
} else {
F::from(1.7).expect("Failed to convert constant to float")
};
} else {
k = fourth_moment / (variance * variance);
if fisher {
k = k - F::from(3.0).expect("Failed to convert constant to float");
}
}
}
Ok(k)
}
#[allow(dead_code)]
pub fn moment<F>(
x: &ArrayView1<F>,
moment_order: usize,
center: bool,
workers: Option<usize>,
) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if moment_order == 0 {
return Ok(F::one()); }
let count = F::from(x.len() as f64).expect("Operation failed");
let order_f = F::from(moment_order as f64).expect("Failed to convert to float");
if center {
let mean_val = mean(x)?;
let n = x.len();
let sum = if n > 10000 && workers.unwrap_or(1) > 1 {
use scirs2_core::parallel_ops::*;
let chunksize = n / workers.unwrap_or(num_cpus::get()).max(1);
x.to_vec()
.par_chunks(chunksize.max(1))
.map(|chunk| {
chunk
.iter()
.map(|&val| {
let diff = val - mean_val;
diff.powf(order_f)
})
.sum::<F>()
})
.sum::<F>()
} else {
x.iter()
.map(|&val| {
let diff = val - mean_val;
diff.powf(order_f)
})
.sum::<F>()
};
Ok(sum / count)
} else {
let n = x.len();
let sum = if n > 10000 && workers.unwrap_or(1) > 1 {
use scirs2_core::parallel_ops::*;
let chunksize = n / workers.unwrap_or(num_cpus::get()).max(1);
x.to_vec()
.par_chunks(chunksize.max(1))
.map(|chunk| chunk.iter().map(|&val| val.powf(order_f)).sum::<F>())
.sum::<F>()
} else {
x.iter().map(|&val| val.powf(order_f)).sum::<F>()
};
Ok(sum / count)
}
}
#[deprecated(note = "Use var(x, ddof, workers) for consistent API")]
#[allow(dead_code)]
pub fn var_compat<F>(x: &ArrayView1<F>, ddof: usize) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
var(x, ddof, None)
}
#[deprecated(note = "Use std(x, ddof, workers) for consistent API")]
#[allow(dead_code)]
pub fn std_compat<F>(x: &ArrayView1<F>, ddof: usize) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
std(x, ddof, None)
}
#[deprecated(note = "Use skew(x, bias, workers) for consistent API")]
#[allow(dead_code)]
pub fn skew_compat<F>(x: &ArrayView1<F>, bias: bool) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
skew(x, bias, None)
}
#[deprecated(note = "Use kurtosis(x, fisher, bias, workers) for consistent API")]
#[allow(dead_code)]
pub fn kurtosis_compat<F>(x: &ArrayView1<F>, fisher: bool, bias: bool) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
kurtosis(x, fisher, bias, None)
}
#[deprecated(note = "Use moment(x, moment_order, center, workers) for consistent API")]
#[allow(dead_code)]
pub fn moment_compat<F>(x: &ArrayView1<F>, momentorder: usize, center: bool) -> StatsResult<F>
where
F: Float
+ std::iter::Sum<F>
+ std::ops::Div<Output = F>
+ std::fmt::Display
+ Send
+ Sync
+ SimdUnifiedOps,
{
moment(x, momentorder, center, None)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_mean() {
let data = test_utils::test_array();
let result = mean(&data.view()).expect("Operation failed");
assert_relative_eq!(result, 3.0, epsilon = 1e-10);
}
#[test]
fn test_weighted_mean() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let weights = array![5.0, 4.0, 3.0, 2.0, 1.0];
let result = weighted_mean(&data.view(), &weights.view()).expect("Operation failed");
assert_relative_eq!(result, 2.333333333333, epsilon = 1e-10);
}
#[test]
fn test_median() {
let data_odd = array![1.0, 3.0, 5.0, 2.0, 4.0];
let result_odd = median(&data_odd.view()).expect("Operation failed");
assert_relative_eq!(result_odd, 3.0, epsilon = 1e-10);
let data_even = array![1.0, 3.0, 2.0, 4.0];
let result_even = median(&data_even.view()).expect("Operation failed");
assert_relative_eq!(result_even, 2.5, epsilon = 1e-10);
}
#[test]
fn test_var_and_std() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let pop_var = var(&data.view(), 0, None).expect("Operation failed");
assert_relative_eq!(pop_var, 2.0, epsilon = 1e-10);
let sample_var = var(&data.view(), 1, None).expect("Operation failed");
assert_relative_eq!(sample_var, 2.5, epsilon = 1e-10);
let pop_std = std(&data.view(), 0, None).expect("Operation failed");
assert_relative_eq!(pop_std, 1.414213562373095, epsilon = 1e-10);
let sample_std = std(&data.view(), 1, None).expect("Operation failed");
assert_relative_eq!(sample_std, 1.5811388300841898, epsilon = 1e-10);
}
#[test]
fn test_moment() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let first_raw = moment(&data.view(), 1, false, None).expect("Operation failed");
assert_relative_eq!(first_raw, 3.0, epsilon = 1e-10);
let second_central = moment(&data.view(), 2, true, None).expect("Operation failed");
assert_relative_eq!(second_central, 2.0, epsilon = 1e-10);
let third_central = moment(&data.view(), 3, true, None).expect("Operation failed");
assert_relative_eq!(third_central, 0.0, epsilon = 1e-10);
let fourth_central = moment(&data.view(), 4, true, None).expect("Operation failed");
assert_relative_eq!(fourth_central, 6.8, epsilon = 1e-10);
}
#[test]
fn test_skewness() {
let symdata = array![1.0, 2.0, 3.0, 4.0, 5.0];
let sym_skew = skew(&symdata.view(), true, None).expect("Operation failed");
assert_relative_eq!(sym_skew, 0.0, epsilon = 1e-10);
let pos_skewdata = array![2.0, 8.0, 0.0, 4.0, 1.0, 9.0, 9.0, 0.0];
let pos_skew = skew(&pos_skewdata.view(), true, None).expect("Operation failed");
assert_relative_eq!(pos_skew, 0.2650554122698573, epsilon = 1e-10);
let neg_skewdata = array![9.0, 1.0, 9.0, 5.0, 8.0, 9.0, 2.0];
let result = skew(&neg_skewdata.view(), true, None).expect("Operation failed");
assert!(result < 0.0);
let unbiased = skew(&pos_skewdata.view(), false, None).expect("Operation failed");
assert!(unbiased > pos_skew); }
#[test]
fn test_kurtosis() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let fisher_biased = kurtosis(&data.view(), true, true, None).expect("Operation failed");
assert_relative_eq!(fisher_biased, -1.3, epsilon = 1e-10);
let pearson_biased = kurtosis(&data.view(), false, true, None).expect("Operation failed");
assert_relative_eq!(pearson_biased, 1.7, epsilon = 1e-10);
let fisher_unbiased = kurtosis(&data.view(), true, false, None).expect("Operation failed");
assert_relative_eq!(fisher_unbiased, -1.2, epsilon = 1e-10);
let peakeddata = array![1.0, 1.01, 1.02, 1.03, 5.0, 10.0, 1.02, 1.01, 1.0];
let peaked_kurtosis =
kurtosis(&peakeddata.view(), true, true, None).expect("Operation failed");
assert!(peaked_kurtosis > 0.0);
let uniformdata = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let uniform_kurtosis =
kurtosis(&uniformdata.view(), true, true, None).expect("Operation failed");
assert!(uniform_kurtosis < 0.0);
}
}