use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::{Float, NumCast, Zero};
use scirs2_core::ndarray::Array1;
use scirs2_core::parallel_ops::*;
use scirs2_core::simd_ops::SimdUnifiedOps;
use super::quantile::quantile;
pub const PARALLEL_THRESHOLD: usize = 10000;
pub trait Statistics<T> {
fn mean(&self) -> T;
fn var(&self) -> T;
fn std(&self) -> T;
fn min(&self) -> T;
fn max(&self) -> T;
fn percentile(&self, q: T) -> T;
}
impl<T: Float + Clone + Zero + NumCast + std::fmt::Display + Send + Sync + 'static> Statistics<T>
for Array<T>
{
fn mean(&self) -> T {
let data = self.to_vec();
if data.is_empty() {
return T::zero();
}
let sum = if data.len() >= PARALLEL_THRESHOLD {
data.par_iter()
.map(|&x| x)
.reduce(|| T::zero(), |acc, x| acc + x)
} else {
data.iter().fold(T::zero(), |acc, &x| acc + x)
};
sum / T::from(data.len()).expect("data length should be representable")
}
fn var(&self) -> T {
if self.len() >= 64 && std::any::TypeId::of::<T>() == std::any::TypeId::of::<f64>() {
let data: Vec<f64> = self
.to_vec()
.iter()
.map(|x| {
let ptr = x as *const T as *const f64;
unsafe { *ptr }
})
.collect();
let nd_array = Array1::from_vec(data);
let result = f64::simd_variance(&nd_array.view());
return unsafe { std::mem::transmute_copy(&result) };
}
let data = self.to_vec();
if data.is_empty() {
return T::zero();
}
let mean = self.mean();
let sum_squared_diff = if data.len() >= PARALLEL_THRESHOLD {
data.par_iter()
.map(|&x| (x - mean) * (x - mean))
.reduce(|| T::zero(), |acc, x| acc + x)
} else {
data.iter()
.fold(T::zero(), |acc, &x| acc + (x - mean) * (x - mean))
};
sum_squared_diff / T::from(data.len()).expect("data length should be representable")
}
fn std(&self) -> T {
if self.len() >= 64 && std::any::TypeId::of::<T>() == std::any::TypeId::of::<f64>() {
let data: Vec<f64> = self
.to_vec()
.iter()
.map(|x| {
let ptr = x as *const T as *const f64;
unsafe { *ptr }
})
.collect();
let nd_array = Array1::from_vec(data);
let result = f64::simd_std(&nd_array.view());
return unsafe { std::mem::transmute_copy(&result) };
}
self.var().sqrt()
}
fn min(&self) -> T {
if self.len() >= 64 && std::any::TypeId::of::<T>() == std::any::TypeId::of::<f64>() {
let data: Vec<f64> = self
.to_vec()
.iter()
.map(|x| {
let ptr = x as *const T as *const f64;
unsafe { *ptr }
})
.collect();
let nd_array = Array1::from_vec(data);
let result = f64::simd_min_element(&nd_array.view());
return unsafe { std::mem::transmute_copy(&result) };
}
let data = self.to_vec();
if data.is_empty() {
return T::zero();
}
if data.len() >= PARALLEL_THRESHOLD {
data.par_iter()
.cloned()
.reduce(|| data[0], |acc, x| if x < acc { x } else { acc })
} else {
data.iter()
.fold(data[0], |acc, &x| if x < acc { x } else { acc })
}
}
fn max(&self) -> T {
if self.len() >= 64 && std::any::TypeId::of::<T>() == std::any::TypeId::of::<f64>() {
let data: Vec<f64> = self
.to_vec()
.iter()
.map(|x| {
let ptr = x as *const T as *const f64;
unsafe { *ptr }
})
.collect();
let nd_array = Array1::from_vec(data);
let result = f64::simd_max_element(&nd_array.view());
return unsafe { std::mem::transmute_copy(&result) };
}
let data = self.to_vec();
if data.is_empty() {
return T::zero();
}
if data.len() >= PARALLEL_THRESHOLD {
data.par_iter()
.cloned()
.reduce(|| data[0], |acc, x| if x > acc { x } else { acc })
} else {
data.iter()
.fold(data[0], |acc, &x| if x > acc { x } else { acc })
}
}
fn percentile(&self, q: T) -> T {
let quantile_val = q;
let q_array = Array::from_vec(vec![quantile_val]);
match quantile(self, &q_array, Some("linear")) {
Ok(result) => result.to_vec()[0],
Err(_) => T::zero(),
}
}
}
pub fn ptp<T: Float + Clone + NumCast + Default + Send + Sync>(
a: &Array<T>,
axis: Option<usize>,
) -> Result<Array<T>> {
if axis.is_none() {
let data = a.to_vec();
let min_val = data
.iter()
.fold(data[0], |min, &val| if val < min { val } else { min });
let max_val = data
.iter()
.fold(data[0], |max, &val| if val > max { val } else { max });
let result = vec![max_val - min_val];
return Ok(Array::from_vec(result));
}
let axis_val = axis.expect("axis should be Some at this point");
let min_array = min_along_axis(a, axis_val)?;
let max_array = max_along_axis(a, axis_val)?;
let min_data = min_array.to_vec();
let max_data = max_array.to_vec();
let mut result = Vec::with_capacity(min_data.len());
for i in 0..min_data.len() {
result.push(max_data[i] - min_data[i]);
}
Ok(Array::from_vec(result).reshape(&min_array.shape()))
}
pub fn min_along_axis<T: Float + Clone + NumCast + Default + Send + Sync>(
a: &Array<T>,
axis: usize,
) -> Result<Array<T>> {
if axis >= a.ndim() {
return Err(NumRs2Error::DimensionMismatch(format!(
"Axis {} out of bounds for array of dimension {}",
axis,
a.ndim()
)));
}
let shape = a.shape();
let axis_size = shape[axis];
let mut result_shape = shape.clone();
result_shape.remove(axis);
let data = a.to_vec();
let mut result = Array::<T>::empty_like(a);
result = result.reshape(&result_shape);
let result_size = result.size();
let mut min_values = vec![T::zero(); result_size];
let mut indices = vec![0; shape.len()];
let mut result_indices = vec![0; result_shape.len()];
if result_size >= PARALLEL_THRESHOLD {
min_values
.par_iter_mut()
.enumerate()
.for_each(|(i, min_val)| {
let mut remainder = i;
let mut result_indices = vec![0; result_shape.len()];
for j in (0..result_shape.len()).rev() {
result_indices[j] = remainder % result_shape[j];
remainder /= result_shape[j];
}
let mut indices = vec![0; shape.len()];
let mut result_idx = 0;
for j in 0..shape.len() {
if j == axis {
indices[j] = 0; } else {
indices[j] = result_indices[result_idx];
result_idx += 1;
}
}
let mut flat_idx = 0;
let mut stride = 1;
for j in (0..shape.len()).rev() {
flat_idx += indices[j] * stride;
stride *= shape[j];
}
*min_val = data[flat_idx];
for k in 1..axis_size {
indices[axis] = k;
let mut new_idx = 0;
let mut new_stride = 1;
for j in (0..shape.len()).rev() {
new_idx += indices[j] * new_stride;
new_stride *= shape[j];
}
if data[new_idx] < *min_val {
*min_val = data[new_idx];
}
}
});
} else {
#[allow(clippy::needless_range_loop)]
for i in 0..result_size {
let mut remainder = i;
for j in (0..result_shape.len()).rev() {
result_indices[j] = remainder % result_shape[j];
remainder /= result_shape[j];
}
let mut result_idx = 0;
#[allow(clippy::needless_range_loop)]
for j in 0..shape.len() {
if j == axis {
indices[j] = 0; } else {
indices[j] = result_indices[result_idx];
result_idx += 1;
}
}
let mut flat_idx = 0;
let mut stride = 1;
for j in (0..shape.len()).rev() {
flat_idx += indices[j] * stride;
stride *= shape[j];
}
min_values[i] = data[flat_idx];
for k in 1..axis_size {
indices[axis] = k;
let mut new_idx = 0;
let mut new_stride = 1;
for j in (0..shape.len()).rev() {
new_idx += indices[j] * new_stride;
new_stride *= shape[j];
}
if data[new_idx] < min_values[i] {
min_values[i] = data[new_idx];
}
}
}
}
Ok(Array::from_vec(min_values).reshape(&result_shape))
}
pub fn max_along_axis<T: Float + Clone + NumCast + Default + Send + Sync>(
a: &Array<T>,
axis: usize,
) -> Result<Array<T>> {
if axis >= a.ndim() {
return Err(NumRs2Error::DimensionMismatch(format!(
"Axis {} out of bounds for array of dimension {}",
axis,
a.ndim()
)));
}
let shape = a.shape();
let axis_size = shape[axis];
let mut result_shape = shape.clone();
result_shape.remove(axis);
let data = a.to_vec();
let mut result = Array::<T>::empty_like(a);
result = result.reshape(&result_shape);
let result_size = result.size();
let mut max_values = vec![T::zero(); result_size];
let mut indices = vec![0; shape.len()];
let mut result_indices = vec![0; result_shape.len()];
if result_size >= PARALLEL_THRESHOLD {
max_values
.par_iter_mut()
.enumerate()
.for_each(|(i, max_val)| {
let mut remainder = i;
let mut result_indices = vec![0; result_shape.len()];
for j in (0..result_shape.len()).rev() {
result_indices[j] = remainder % result_shape[j];
remainder /= result_shape[j];
}
let mut indices = vec![0; shape.len()];
let mut result_idx = 0;
for j in 0..shape.len() {
if j == axis {
indices[j] = 0; } else {
indices[j] = result_indices[result_idx];
result_idx += 1;
}
}
let mut flat_idx = 0;
let mut stride = 1;
for j in (0..shape.len()).rev() {
flat_idx += indices[j] * stride;
stride *= shape[j];
}
*max_val = data[flat_idx];
for k in 1..axis_size {
indices[axis] = k;
let mut new_idx = 0;
let mut new_stride = 1;
for j in (0..shape.len()).rev() {
new_idx += indices[j] * new_stride;
new_stride *= shape[j];
}
if data[new_idx] > *max_val {
*max_val = data[new_idx];
}
}
});
} else {
#[allow(clippy::needless_range_loop)]
for i in 0..result_size {
let mut remainder = i;
for j in (0..result_shape.len()).rev() {
result_indices[j] = remainder % result_shape[j];
remainder /= result_shape[j];
}
let mut result_idx = 0;
#[allow(clippy::needless_range_loop)]
for j in 0..shape.len() {
if j == axis {
indices[j] = 0; } else {
indices[j] = result_indices[result_idx];
result_idx += 1;
}
}
let mut flat_idx = 0;
let mut stride = 1;
for j in (0..shape.len()).rev() {
flat_idx += indices[j] * stride;
stride *= shape[j];
}
max_values[i] = data[flat_idx];
for k in 1..axis_size {
indices[axis] = k;
let mut new_idx = 0;
let mut new_stride = 1;
for j in (0..shape.len()).rev() {
new_idx += indices[j] * new_stride;
new_stride *= shape[j];
}
if data[new_idx] > max_values[i] {
max_values[i] = data[new_idx];
}
}
}
}
Ok(Array::from_vec(max_values).reshape(&result_shape))
}
pub fn average<T: Float + Clone + Zero + NumCast + Send + Sync>(
a: &Array<T>,
weights: Option<&Array<T>>,
axis: Option<usize>,
returned: Option<bool>,
) -> Result<Array<T>> {
if weights.is_none() {
if let Some(ax) = axis {
return a.sum_axis(ax).map(|sum| {
sum.scalar_div(T::from(a.shape()[ax]).expect("axis size should be representable"))
});
} else {
let data = a.to_vec();
if data.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"Cannot average empty array".to_string(),
));
}
let sum = data.iter().fold(T::zero(), |acc, &val| acc + val);
let mean = sum / T::from(data.len()).expect("data length should be representable");
return Ok(Array::from_vec(vec![mean]));
}
}
let w = weights.expect("weights should be Some at this point");
if a.shape() != w.shape() {
return Err(NumRs2Error::ShapeMismatch {
expected: a.shape(),
actual: w.shape(),
});
}
let a_data = a.to_vec();
let w_data = w.to_vec();
if let Some(ax) = axis {
let weighted_sum = weighted_sum_along_axis(a, w, ax)?;
let weight_sum = w.sum_axis(ax)?;
let w_sum_data = weight_sum.to_vec();
let weighted_sum_data = weighted_sum.to_vec();
let mut result = Vec::with_capacity(w_sum_data.len());
for i in 0..w_sum_data.len() {
if w_sum_data[i] == T::zero() {
result.push(T::zero());
} else {
result.push(weighted_sum_data[i] / w_sum_data[i]);
}
}
let avg = Array::from_vec(result).reshape(&weight_sum.shape());
if returned.unwrap_or(false) {
Ok(avg)
} else {
Ok(avg)
}
} else {
let mut weighted_sum = T::zero();
let mut weight_sum = T::zero();
for i in 0..a_data.len() {
weighted_sum = weighted_sum + a_data[i] * w_data[i];
weight_sum = weight_sum + w_data[i];
}
let avg = if weight_sum == T::zero() {
T::zero()
} else {
weighted_sum / weight_sum
};
if returned.unwrap_or(false) {
Ok(Array::from_vec(vec![avg]))
} else {
Ok(Array::from_vec(vec![avg]))
}
}
}
fn weighted_sum_along_axis<T: Float + Clone + Zero + NumCast + Send + Sync>(
a: &Array<T>,
weights: &Array<T>,
axis: usize,
) -> Result<Array<T>> {
if axis >= a.ndim() {
return Err(NumRs2Error::DimensionMismatch(format!(
"Axis {} out of bounds for array of dimension {}",
axis,
a.ndim()
)));
}
let shape = a.shape();
let axis_size = shape[axis];
let mut result_shape = shape.clone();
result_shape.remove(axis);
let mut result = Array::zeros(&result_shape);
let a_data = a.to_vec();
let w_data = weights.to_vec();
let mut indices = vec![0; shape.len()];
let mut result_indices = vec![0; result_shape.len()];
let result_size = result.size();
for i in 0..result_size {
let mut remainder = i;
for j in (0..result_shape.len()).rev() {
result_indices[j] = remainder % result_shape[j];
remainder /= result_shape[j];
}
let mut result_idx = 0;
#[allow(clippy::needless_range_loop)]
for j in 0..shape.len() {
if j == axis {
indices[j] = 0; } else {
indices[j] = result_indices[result_idx];
result_idx += 1;
}
}
let mut sum = T::zero();
for k in 0..axis_size {
indices[axis] = k;
let mut flat_idx = 0;
let mut stride = 1;
for j in (0..shape.len()).rev() {
flat_idx += indices[j] * stride;
stride *= shape[j];
}
sum = sum + a_data[flat_idx] * w_data[flat_idx];
}
result.set(&result_indices, sum)?;
}
Ok(result)
}