use crate::error::{StatsError, StatsResult};
use crate::error_standardization::ErrorMessages;
use scirs2_core::ndarray::{Array1, ArrayBase, Data, Ix1};
use scirs2_core::numeric::{Float, NumCast, One, Zero};
use scirs2_core::{
simd_ops::{AutoOptimizer, SimdUnifiedOps},
validation::*,
};
#[inline(always)]
fn const_f64<F: Float + NumCast>(value: f64) -> F {
F::from(value).expect("Failed to convert constant to target float type")
}
#[allow(dead_code)]
pub fn skewness_simd<F, D>(x: &ArrayBase<D, Ix1>, bias: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() < 3 && !bias {
return Err(ErrorMessages::insufficientdata(
"unbiased skewness calculation",
3,
x.len(),
));
}
let n = x.len();
let n_f = F::from(n).expect("Failed to convert to float");
let optimizer = AutoOptimizer::new();
let mean = if optimizer.should_use_simd(n) {
F::simd_sum(&x.view()) / n_f
} else {
x.iter().fold(F::zero(), |acc, &val| acc + val) / n_f
};
let (sum_sq_dev, sum_cubed_dev) = if optimizer.should_use_simd(n) {
compute_moments_simd(x, mean, n)
} else {
compute_moments_scalar(x, mean)
};
if sum_sq_dev == F::zero() {
return Ok(F::zero()); }
let variance = sum_sq_dev / n_f;
let third_moment = sum_cubed_dev / n_f;
let skew = third_moment / variance.powf(const_f64::<F>(1.5));
if !bias && n > 2 {
let sqrt_term = (n_f * (n_f - F::one())).sqrt();
let correction = sqrt_term / (n_f - const_f64::<F>(2.0));
Ok(skew * correction)
} else {
Ok(skew)
}
}
#[allow(dead_code)]
pub fn kurtosis_simd<F, D>(x: &ArrayBase<D, Ix1>, fisher: bool, bias: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if x.len() < 4 {
return Err(StatsError::DomainError(
"At least 4 data points required to calculate kurtosis".to_string(),
));
}
let n = x.len();
let n_f = F::from(n).expect("Failed to convert to float");
let optimizer = AutoOptimizer::new();
let mean = if optimizer.should_use_simd(n) {
F::simd_sum(&x.view()) / n_f
} else {
x.iter().fold(F::zero(), |acc, &val| acc + val) / n_f
};
let (sum_sq_dev, sum_fourth_dev) = if optimizer.should_use_simd(n) {
compute_fourth_moments_simd(x, mean, n)
} else {
compute_fourth_moments_scalar(x, mean)
};
let variance = sum_sq_dev / n_f;
if variance == F::zero() {
return Err(StatsError::DomainError(
"Standard deviation is zero, kurtosis undefined".to_string(),
));
}
let fourth_moment = sum_fourth_dev / n_f;
let mut k = fourth_moment / (variance * variance);
if !bias && n > 3 {
let n_f = F::from(n).expect("Failed to convert to float");
let n1 = n_f - F::one();
let n2 = n_f - const_f64::<F>(2.0);
let n3 = n_f - const_f64::<F>(3.0);
k = ((n_f + F::one()) * k - const_f64::<F>(3.0) * n1) * n1 / (n2 * n3)
+ const_f64::<F>(3.0);
}
if fisher {
k = k - const_f64::<F>(3.0);
}
Ok(k)
}
#[allow(dead_code)]
pub fn moment_simd<F, D>(x: &ArrayBase<D, Ix1>, momentorder: usize, center: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if momentorder == 0 {
return Ok(F::one()); }
let n = x.len();
let n_f = F::from(n).expect("Failed to convert to float");
let _order_f = F::from(momentorder as f64).expect("Failed to convert to float");
let optimizer = AutoOptimizer::new();
if center {
let mean = if optimizer.should_use_simd(n) {
F::simd_sum(&x.view()) / n_f
} else {
x.iter().fold(F::zero(), |acc, &val| acc + val) / n_f
};
let moment_sum = if optimizer.should_use_simd(n) {
compute_central_moment_simd(x, mean, momentorder)
} else {
compute_central_moment_scalar(x, mean, momentorder)
};
Ok(moment_sum / n_f)
} else {
let moment_sum = if optimizer.should_use_simd(n) {
compute_raw_moment_simd(x, momentorder)
} else {
compute_raw_moment_scalar(x, momentorder)
};
Ok(moment_sum / n_f)
}
}
#[allow(dead_code)]
pub fn moments_batch_simd<F, D>(
x: &ArrayBase<D, Ix1>,
moments: &[usize],
center: bool,
) -> StatsResult<Vec<F>>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let n = x.len();
let n_f = F::from(n).expect("Failed to convert to float");
let optimizer = AutoOptimizer::new();
let mut results = Vec::with_capacity(moments.len());
if center {
let mean = if optimizer.should_use_simd(n) {
F::simd_sum(&x.view()) / n_f
} else {
x.iter().fold(F::zero(), |acc, &val| acc + val) / n_f
};
for &order in moments {
if order == 0 {
results.push(F::one());
} else {
let moment_sum = if optimizer.should_use_simd(n) {
compute_central_moment_simd(x, mean, order)
} else {
compute_central_moment_scalar(x, mean, order)
};
results.push(moment_sum / n_f);
}
}
} else {
for &order in moments {
if order == 0 {
results.push(F::one());
} else {
let moment_sum = if optimizer.should_use_simd(n) {
compute_raw_moment_simd(x, order)
} else {
compute_raw_moment_scalar(x, order)
};
results.push(moment_sum / n_f);
}
}
}
Ok(results)
}
#[allow(dead_code)]
fn compute_moments_simd<F, D>(x: &ArrayBase<D, Ix1>, mean: F, n: usize) -> (F, F)
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let mean_array = Array1::from_elem(n, mean);
let deviations = F::simd_sub(&x.view(), &mean_array.view());
let sq_deviations = F::simd_mul(&deviations.view(), &deviations.view());
let cubed_deviations = F::simd_mul(&sq_deviations.view(), &deviations.view());
let sum_sq_dev = F::simd_sum(&sq_deviations.view());
let sum_cubed_dev = F::simd_sum(&cubed_deviations.view());
(sum_sq_dev, sum_cubed_dev)
}
#[allow(dead_code)]
fn compute_moments_scalar<F, D>(x: &ArrayBase<D, Ix1>, mean: F) -> (F, F)
where
F: Float + NumCast + Zero + One + Copy,
D: Data<Elem = F>,
{
let mut sum_sq_dev = F::zero();
let mut sum_cubed_dev = F::zero();
for &val in x.iter() {
let dev = val - mean;
let dev_sq = dev * dev;
sum_sq_dev = sum_sq_dev + dev_sq;
sum_cubed_dev = sum_cubed_dev + dev_sq * dev;
}
(sum_sq_dev, sum_cubed_dev)
}
#[allow(dead_code)]
fn compute_fourth_moments_simd<F, D>(x: &ArrayBase<D, Ix1>, mean: F, n: usize) -> (F, F)
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let mean_array = Array1::from_elem(n, mean);
let deviations = F::simd_sub(&x.view(), &mean_array.view());
let sq_deviations = F::simd_mul(&deviations.view(), &deviations.view());
let fourth_deviations = F::simd_mul(&sq_deviations.view(), &sq_deviations.view());
let sum_sq_dev = F::simd_sum(&sq_deviations.view());
let sum_fourth_dev = F::simd_sum(&fourth_deviations.view());
(sum_sq_dev, sum_fourth_dev)
}
#[allow(dead_code)]
fn compute_fourth_moments_scalar<F, D>(x: &ArrayBase<D, Ix1>, mean: F) -> (F, F)
where
F: Float + NumCast + Zero + One + Copy,
D: Data<Elem = F>,
{
let mut sum_sq_dev = F::zero();
let mut sum_fourth_dev = F::zero();
for &val in x.iter() {
let dev = val - mean;
let dev_sq = dev * dev;
sum_sq_dev = sum_sq_dev + dev_sq;
sum_fourth_dev = sum_fourth_dev + dev_sq * dev_sq;
}
(sum_sq_dev, sum_fourth_dev)
}
#[allow(dead_code)]
fn compute_central_moment_simd<F, D>(x: &ArrayBase<D, Ix1>, mean: F, order: usize) -> F
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let n = x.len();
let mean_array = Array1::from_elem(n, mean);
let deviations = F::simd_sub(&x.view(), &mean_array.view());
match order {
1 => F::simd_sum(&deviations.view()),
2 => {
let squared = F::simd_mul(&deviations.view(), &deviations.view());
F::simd_sum(&squared.view())
}
3 => {
let squared = F::simd_mul(&deviations.view(), &deviations.view());
let cubed = F::simd_mul(&squared.view(), &deviations.view());
F::simd_sum(&cubed.view())
}
4 => {
let squared = F::simd_mul(&deviations.view(), &deviations.view());
let fourth = F::simd_mul(&squared.view(), &squared.view());
F::simd_sum(&fourth.view())
}
_ => {
let order_f = F::from(order as f64).expect("Failed to convert to float");
let powered: Array1<F> = deviations.mapv(|x| x.powf(order_f));
F::simd_sum(&powered.view())
}
}
}
#[allow(dead_code)]
fn compute_central_moment_scalar<F, D>(x: &ArrayBase<D, Ix1>, mean: F, order: usize) -> F
where
F: Float + NumCast + Zero + One + Copy,
D: Data<Elem = F>,
{
let order_f = F::from(order as f64).expect("Failed to convert to float");
x.iter()
.map(|&val| (val - mean).powf(order_f))
.fold(F::zero(), |acc, val| acc + val)
}
#[allow(dead_code)]
fn compute_raw_moment_simd<F, D>(x: &ArrayBase<D, Ix1>, order: usize) -> F
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
match order {
1 => F::simd_sum(&x.view()),
2 => {
let squared = F::simd_mul(&x.view(), &x.view());
F::simd_sum(&squared.view())
}
3 => {
let squared = F::simd_mul(&x.view(), &x.view());
let cubed = F::simd_mul(&squared.view(), &x.view());
F::simd_sum(&cubed.view())
}
4 => {
let squared = F::simd_mul(&x.view(), &x.view());
let fourth = F::simd_mul(&squared.view(), &squared.view());
F::simd_sum(&fourth.view())
}
_ => {
let order_f = F::from(order as f64).expect("Failed to convert to float");
let powered: Array1<F> = x.mapv(|val| val.powf(order_f));
F::simd_sum(&powered.view())
}
}
}
#[allow(dead_code)]
fn compute_raw_moment_scalar<F, D>(x: &ArrayBase<D, Ix1>, order: usize) -> F
where
F: Float + NumCast + Zero + One + Copy,
D: Data<Elem = F>,
{
let order_f = F::from(order as f64).expect("Failed to convert to float");
x.iter()
.map(|&val| val.powf(order_f))
.fold(F::zero(), |acc, val| acc + val)
}
#[allow(dead_code)]
pub fn skewness_ultra_simd<F, D>(x: &ArrayBase<D, Ix1>, bias: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(ErrorMessages::empty_array("x"));
}
if x.len() < 3 && !bias {
return Err(ErrorMessages::insufficientdata(
"unbiased skewness calculation",
3,
x.len(),
));
}
let n = x.len();
let capabilities = scirs2_core::simd_ops::PlatformCapabilities::detect();
if n >= 128 && capabilities.has_avx2() {
bandwidth_saturated_skewness_ultra(x, bias)
} else if n >= 64 {
skewness_simd(x, bias)
} else {
skewness_simd(x, bias)
}
}
#[allow(dead_code)]
pub fn kurtosis_ultra_simd<F, D>(x: &ArrayBase<D, Ix1>, fisher: bool, bias: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if x.len() < 4 {
return Err(StatsError::DomainError(
"At least 4 data points required to calculate kurtosis".to_string(),
));
}
let n = x.len();
let capabilities = scirs2_core::simd_ops::PlatformCapabilities::detect();
if n >= 128 && capabilities.has_avx2() {
bandwidth_saturated_kurtosis_ultra(x, fisher, bias)
} else if n >= 64 {
kurtosis_simd(x, fisher, bias)
} else {
kurtosis_simd(x, fisher, bias)
}
}
#[allow(dead_code)]
pub fn moment_ultra_simd<F, D>(
x: &ArrayBase<D, Ix1>,
moment_order: usize,
center: bool,
) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if moment_order == 0 {
return Ok(F::one());
}
let n = x.len();
let capabilities = scirs2_core::simd_ops::PlatformCapabilities::detect();
if n >= 128 && capabilities.has_avx2() {
bandwidth_saturated_moment_ultra(x, moment_order, center)
} else if n >= 64 {
moment_simd(x, moment_order, center)
} else {
moment_simd(x, moment_order, center)
}
}
#[allow(dead_code)]
pub fn moments_batch_ultra_simd<F, D>(
x: &ArrayBase<D, Ix1>,
moments: &[usize],
center: bool,
) -> StatsResult<Vec<F>>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy + Send + Sync + std::fmt::Display,
D: Data<Elem = F>,
{
checkarray_finite(x, "x")?;
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let n = x.len();
let capabilities = scirs2_core::simd_ops::PlatformCapabilities::detect();
if n >= 256 && capabilities.has_avx2() {
bandwidth_saturated_moments_batch_ultra(x, moments, center)
} else if n >= 64 {
moments_batch_simd(x, moments, center)
} else {
moments_batch_simd(x, moments, center)
}
}
#[allow(dead_code)]
fn bandwidth_saturated_skewness_ultra<F, D>(x: &ArrayBase<D, Ix1>, bias: bool) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let n = x.len();
let chunk_size = 16;
let mut sum = F::zero();
let mut sum_sq = F::zero();
let mut sum_cube = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let mut chunk_squared: Array1<f32> = Array1::zeros(chunk_size);
let mut chunk_cubed: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&chunk_data.view(),
&chunk_data.view(),
&mut chunk_squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&chunk_squared.view(),
&chunk_data.view(),
&mut chunk_cubed.view_mut(),
);
let chunk_sum = f32::simd_sum_f32_ultra(&chunk_data.view());
let chunk_sum_sq = f32::simd_sum_f32_ultra(&chunk_squared.view());
let chunk_sum_cube = f32::simd_sum_f32_ultra(&chunk_cubed.view());
sum = sum + F::from(chunk_sum as f64).expect("Failed to convert to float");
sum_sq = sum_sq + F::from(chunk_sum_sq as f64).expect("Failed to convert to float");
sum_cube =
sum_cube + F::from(chunk_sum_cube as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
let val = x[i];
let val_sq = val * val;
sum = sum + val;
sum_sq = sum_sq + val_sq;
sum_cube = sum_cube + val_sq * val;
}
}
}
let n_f = F::from(n).expect("Failed to convert to float");
let mean = sum / n_f;
let mean_sq = mean * mean;
let mean_cube = mean_sq * mean;
let m2 = (sum_sq / n_f) - mean_sq;
let m3 = (sum_cube / n_f) - const_f64::<F>(3.0) * mean * m2 - mean_cube;
if m2 == F::zero() {
return Ok(F::zero());
}
let skew = m3 / m2.powf(const_f64::<F>(1.5));
if !bias && n > 2 {
let sqrt_term = (n_f * (n_f - F::one())).sqrt();
let correction = sqrt_term / (n_f - const_f64::<F>(2.0));
Ok(skew * correction)
} else {
Ok(skew)
}
}
#[allow(dead_code)]
fn bandwidth_saturated_kurtosis_ultra<F, D>(
x: &ArrayBase<D, Ix1>,
fisher: bool,
bias: bool,
) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let n = x.len();
let chunk_size = 16;
let mut sum = F::zero();
let mut sum_sq = F::zero();
let mut sum_fourth = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let mut chunk_squared: Array1<f32> = Array1::zeros(chunk_size);
let mut chunk_fourth: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&chunk_data.view(),
&chunk_data.view(),
&mut chunk_squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&chunk_squared.view(),
&chunk_squared.view(),
&mut chunk_fourth.view_mut(),
);
let chunk_sum = f32::simd_sum_f32_ultra(&chunk_data.view());
let chunk_sum_sq = f32::simd_sum_f32_ultra(&chunk_squared.view());
let chunk_sum_fourth = f32::simd_sum_f32_ultra(&chunk_fourth.view());
sum = sum + F::from(chunk_sum as f64).expect("Failed to convert to float");
sum_sq = sum_sq + F::from(chunk_sum_sq as f64).expect("Failed to convert to float");
sum_fourth =
sum_fourth + F::from(chunk_sum_fourth as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
let val = x[i];
let val_sq = val * val;
sum = sum + val;
sum_sq = sum_sq + val_sq;
sum_fourth = sum_fourth + val_sq * val_sq;
}
}
}
let n_f = F::from(n).expect("Failed to convert to float");
let mean = sum / n_f;
let mean_sq = mean * mean;
let variance = (sum_sq / n_f) - mean_sq;
if variance == F::zero() {
return Err(StatsError::DomainError(
"Standard deviation is zero, kurtosis undefined".to_string(),
));
}
let mean_fourth = mean_sq * mean_sq;
let m4 = (sum_fourth / n_f)
- const_f64::<F>(4.0) * mean * (sum_sq / n_f - mean_sq) * mean
- const_f64::<F>(6.0) * mean_sq * variance
- mean_fourth;
let mut k = m4 / (variance * variance);
if !bias && n > 3 {
let n_f = F::from(n).expect("Failed to convert to float");
let n1 = n_f - F::one();
let n2 = n_f - const_f64::<F>(2.0);
let n3 = n_f - const_f64::<F>(3.0);
k = ((n_f + F::one()) * k - const_f64::<F>(3.0) * n1) * n1 / (n2 * n3)
+ const_f64::<F>(3.0);
}
if fisher {
k = k - const_f64::<F>(3.0);
}
Ok(k)
}
#[allow(dead_code)]
fn bandwidth_saturated_moment_ultra<F, D>(
x: &ArrayBase<D, Ix1>,
order: usize,
center: bool,
) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let n = x.len();
let chunk_size = 16;
if order == 0 {
return Ok(F::one());
}
if center {
let mut sum = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let chunk_sum = f32::simd_sum_f32_ultra(&chunk_data.view());
sum = sum + F::from(chunk_sum as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
sum = sum + x[i];
}
}
}
let mean = sum / F::from(n).expect("Failed to convert to float");
let mean_f32 = mean.to_f64().expect("Operation failed") as f32;
let mut moment_sum = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let mean_array: Array1<f32> = Array1::from_elem(chunk_size, mean_f32);
let mut deviations: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_sub_f32_ultra(
&chunk_data.view(),
&mean_array.view(),
&mut deviations.view_mut(),
);
let powered = match order {
1 => deviations.clone(),
2 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&deviations.view(),
&deviations.view(),
&mut squared.view_mut(),
);
squared
}
3 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
let mut cubed: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&deviations.view(),
&deviations.view(),
&mut squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&squared.view(),
&deviations.view(),
&mut cubed.view_mut(),
);
cubed
}
4 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
let mut fourth: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&deviations.view(),
&deviations.view(),
&mut squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&squared.view(),
&squared.view(),
&mut fourth.view_mut(),
);
fourth
}
_ => {
let order_f = order as f32;
deviations
.iter()
.map(|&x| x.powf(order_f))
.collect::<Array1<f32>>()
}
};
let chunk_moment_sum = f32::simd_sum_f32_ultra(&powered.view());
moment_sum = moment_sum
+ F::from(chunk_moment_sum as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
let dev = x[i] - mean;
moment_sum = moment_sum
+ dev.powf(F::from(order as f64).expect("Failed to convert to float"));
}
}
}
Ok(moment_sum / F::from(n).expect("Failed to convert to float"))
} else {
let mut moment_sum = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let powered = match order {
1 => chunk_data.clone(),
2 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&chunk_data.view(),
&chunk_data.view(),
&mut squared.view_mut(),
);
squared
}
3 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
let mut cubed: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&chunk_data.view(),
&chunk_data.view(),
&mut squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&squared.view(),
&chunk_data.view(),
&mut cubed.view_mut(),
);
cubed
}
4 => {
let mut squared: Array1<f32> = Array1::zeros(chunk_size);
let mut fourth: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_mul_f32_ultra(
&chunk_data.view(),
&chunk_data.view(),
&mut squared.view_mut(),
);
f32::simd_mul_f32_ultra(
&squared.view(),
&squared.view(),
&mut fourth.view_mut(),
);
fourth
}
_ => {
let order_f = order as f32;
chunk_data
.iter()
.map(|&x| x.powf(order_f))
.collect::<Array1<f32>>()
}
};
let chunk_moment_sum = f32::simd_sum_f32_ultra(&powered.view());
moment_sum = moment_sum
+ F::from(chunk_moment_sum as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
moment_sum = moment_sum
+ x[i].powf(F::from(order as f64).expect("Failed to convert to float"));
}
}
}
Ok(moment_sum / F::from(n).expect("Failed to convert to float"))
}
}
#[allow(dead_code)]
fn bandwidth_saturated_moments_batch_ultra<F, D>(
x: &ArrayBase<D, Ix1>,
moments: &[usize],
center: bool,
) -> StatsResult<Vec<F>>
where
F: Float + NumCast + SimdUnifiedOps + Zero + One + Copy,
D: Data<Elem = F>,
{
let n = x.len();
let chunk_size = 16;
let max_order = *moments.iter().max().unwrap_or(&0);
let mut results = vec![F::zero(); moments.len()];
if center {
let mut sum = F::zero();
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let chunk_sum = f32::simd_sum_f32_ultra(&chunk_data.view());
sum = sum + F::from(chunk_sum as f64).expect("Failed to convert to float");
} else {
for i in chunk_start..chunk_end {
sum = sum + x[i];
}
}
}
let mean = sum / F::from(n).expect("Failed to convert to float");
let mean_f32 = mean.to_f64().expect("Operation failed") as f32;
let mut moment_sums = vec![F::zero(); moments.len()];
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let mean_array: Array1<f32> = Array1::from_elem(chunk_size, mean_f32);
let mut deviations: Array1<f32> = Array1::zeros(chunk_size);
f32::simd_sub_f32_ultra(
&chunk_data.view(),
&mean_array.view(),
&mut deviations.view_mut(),
);
let mut powers: Vec<Array1<f32>> = vec![Array1::zeros(chunk_size); max_order + 1];
powers[0].fill(1.0);
if max_order >= 1 {
powers[1] = deviations.clone();
}
for order in 2..=max_order {
let (left, right) = powers.split_at_mut(order);
f32::simd_mul_f32_ultra(
&left[order - 1].view(),
&deviations.view(),
&mut right[0].view_mut(),
);
}
for (i, &order) in moments.iter().enumerate() {
if order <= max_order {
let chunk_moment_sum = f32::simd_sum_f32_ultra(&powers[order].view());
moment_sums[i] = moment_sums[i]
+ F::from(chunk_moment_sum as f64).expect("Failed to convert to float");
}
}
} else {
for idx in chunk_start..chunk_end {
let dev = x[idx] - mean;
for (i, &order) in moments.iter().enumerate() {
if order == 0 {
moment_sums[i] = moment_sums[i] + F::one();
} else {
moment_sums[i] = moment_sums[i]
+ dev.powf(
F::from(order as f64).expect("Failed to convert to float"),
);
}
}
}
}
}
let n_f = F::from(n).expect("Failed to convert to float");
for (i, &order) in moments.iter().enumerate() {
results[i] = if order == 0 {
F::one()
} else {
moment_sums[i] / n_f
};
}
} else {
let mut moment_sums = vec![F::zero(); moments.len()];
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let chunk_data: Array1<f32> = x
.slice(scirs2_core::ndarray::s![chunk_start..chunk_end])
.iter()
.map(|&val| val.to_f64().expect("Operation failed") as f32)
.collect();
let mut powers: Vec<Array1<f32>> = vec![Array1::zeros(chunk_size); max_order + 1];
powers[0].fill(1.0);
if max_order >= 1 {
powers[1] = chunk_data.clone();
}
for order in 2..=max_order {
let (left, right) = powers.split_at_mut(order);
f32::simd_mul_f32_ultra(
&left[order - 1].view(),
&chunk_data.view(),
&mut right[0].view_mut(),
);
}
for (i, &order) in moments.iter().enumerate() {
if order <= max_order {
let chunk_moment_sum = f32::simd_sum_f32_ultra(&powers[order].view());
moment_sums[i] = moment_sums[i]
+ F::from(chunk_moment_sum as f64).expect("Failed to convert to float");
}
}
} else {
for idx in chunk_start..chunk_end {
for (i, &order) in moments.iter().enumerate() {
if order == 0 {
moment_sums[i] = moment_sums[i] + F::one();
} else {
moment_sums[i] = moment_sums[i]
+ x[idx].powf(
F::from(order as f64).expect("Failed to convert to float"),
);
}
}
}
}
}
let n_f = F::from(n).expect("Failed to convert to float");
for (i, &order) in moments.iter().enumerate() {
results[i] = if order == 0 {
F::one()
} else {
moment_sums[i] / n_f
};
}
}
Ok(results)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::descriptive::{kurtosis, moment, skew};
use scirs2_core::ndarray::array;
#[test]
fn test_skewness_simd_consistency() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let simd_result = skewness_simd(&data.view(), false).expect("Test/example failed");
let scalar_result = skew(&data.view(), false, None).expect("Test/example failed");
assert!((simd_result - scalar_result).abs() < 1e-10);
}
#[test]
fn test_kurtosis_simd_consistency() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let simd_result = kurtosis_simd(&data.view(), true, false).expect("Test/example failed");
let scalar_result = kurtosis(&data.view(), true, false, None).expect("Test/example failed");
assert!((simd_result - scalar_result).abs() < 1e-10);
}
#[test]
fn test_moment_simd_consistency() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
for order in 1..=4 {
for center in [true, false] {
let simd_result =
moment_simd(&data.view(), order, center).expect("Test/example failed");
let scalar_result =
moment(&data.view(), order, center, None).expect("Test/example failed");
assert!(
(simd_result - scalar_result).abs() < 1e-10,
"Mismatch for order {} center {}: SIMD {} vs Scalar {}",
order,
center,
simd_result,
scalar_result
);
}
}
}
#[test]
fn test_moments_batch_simd() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let orders = vec![1, 2, 3, 4];
let batch_results =
moments_batch_simd(&data.view(), &orders, true).expect("Test/example failed");
for (i, &order) in orders.iter().enumerate() {
let individual_result =
moment_simd(&data.view(), order, true).expect("Test/example failed");
assert!((batch_results[i] - individual_result).abs() < 1e-10);
}
}
}