use crate::error::{Result, SklearsError};
use crate::types::{Array1, Array2, FloatBounds};
use scirs2_core::ndarray::Axis;
pub struct ArrayStats;
impl ArrayStats {
pub fn weighted_mean<T>(array: &Array1<T>, weights: &Array1<T>) -> Result<T>
where
T: FloatBounds,
{
if array.len() != weights.len() {
return Err(SklearsError::ShapeMismatch {
expected: format!("{}", array.len()),
actual: format!("{}", weights.len()),
});
}
let weight_sum = weights.sum();
if weight_sum == T::zero() {
return Err(SklearsError::InvalidInput(
"Weight sum cannot be zero".to_string(),
));
}
let weighted_sum = array
.iter()
.zip(weights.iter())
.map(|(&x, &w)| x * w)
.fold(T::zero(), |acc, x| acc + x);
Ok(weighted_sum / weight_sum)
}
pub fn robust_covariance<T>(data: &Array2<T>, shrinkage: Option<T>) -> Result<Array2<T>>
where
T: FloatBounds + scirs2_core::ndarray::ScalarOperand,
{
let (n_samples, n_features) = data.dim();
if n_samples < 2 {
return Err(SklearsError::InvalidInput(
"Need at least 2 samples for covariance".to_string(),
));
}
let means = data.mean_axis(Axis(0)).ok_or_else(|| {
SklearsError::NumericalError("mean_axis computation failed on empty axis".to_string())
})?;
let centered = data - &means.insert_axis(Axis(0));
let cov_empirical =
centered.t().dot(¢ered) / T::from_usize(n_samples - 1).unwrap_or_else(|| T::zero());
if let Some(shrink) = shrinkage {
let identity = Array2::<T>::eye(n_features);
let trace = (0..n_features)
.map(|i| cov_empirical[[i, i]])
.fold(T::zero(), |acc, x| acc + x);
let target =
identity * (trace / T::from_usize(n_features).unwrap_or_else(|| T::zero()));
Ok(&cov_empirical * (T::one() - shrink) + &target * shrink)
} else {
Ok(cov_empirical)
}
}
pub fn percentile<T>(array: &Array1<T>, q: T) -> Result<T>
where
T: FloatBounds + PartialOrd,
{
if array.is_empty() {
return Err(SklearsError::InvalidInput(
"Array cannot be empty".to_string(),
));
}
if q < T::zero() || q > T::from_f64(100.0).unwrap_or_else(|| T::zero()) {
return Err(SklearsError::InvalidInput(
"Percentile must be between 0 and 100".to_string(),
));
}
let mut sorted = array.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let index = q / T::from_f64(100.0).unwrap_or_else(|| T::zero())
* T::from_usize(n - 1).unwrap_or_else(|| T::zero());
let lower_idx = index.floor().to_usize().unwrap_or(0);
let upper_idx = index.ceil().to_usize().unwrap_or(0).min(n - 1);
if lower_idx == upper_idx {
Ok(sorted[lower_idx])
} else {
let lower_val = sorted[lower_idx];
let upper_val = sorted[upper_idx];
let weight = index - T::from_usize(lower_idx).unwrap_or_else(|| T::zero());
Ok(lower_val * (T::one() - weight) + upper_val * weight)
}
}
}
pub struct MatrixOps;
impl MatrixOps {
pub fn condition_number<T>(matrix: &Array2<T>) -> Result<T>
where
T: FloatBounds,
{
let (rows, cols) = matrix.dim();
if rows != cols {
return Err(SklearsError::InvalidInput(
"Matrix must be square for condition number".to_string(),
));
}
let mut min_diag = T::infinity();
let mut max_diag = T::neg_infinity();
for i in 0..rows {
let diag_val = matrix[[i, i]].abs();
if diag_val < min_diag {
min_diag = diag_val;
}
if diag_val > max_diag {
max_diag = diag_val;
}
}
if min_diag == T::zero() {
Ok(T::infinity())
} else {
Ok(max_diag / min_diag)
}
}
pub fn rank<T>(matrix: &Array2<T>, tolerance: Option<T>) -> usize
where
T: FloatBounds,
{
let (rows, cols) = matrix.dim();
let tol = tolerance.unwrap_or_else(|| {
T::from_f64(1e-12).unwrap_or_else(|| T::zero())
* T::from_usize(rows.max(cols)).unwrap_or_else(|| T::zero())
});
let min_dim = rows.min(cols);
let mut rank = 0;
for i in 0..min_dim {
if matrix[[i, i]].abs() > tol {
rank += 1;
}
}
rank
}
pub fn pinv<T>(matrix: &Array2<T>, _tolerance: Option<T>) -> Result<Array2<T>>
where
T: FloatBounds,
{
let (rows, cols) = matrix.dim();
if rows == cols {
if let Ok(inv) = Self::try_inverse(matrix) {
return Ok(inv);
}
}
let gram = if rows >= cols {
let at = matrix.t().to_owned();
let ata = at.dot(matrix);
let ata_inv = Self::try_inverse(&ata)?;
ata_inv.dot(&at)
} else {
let at = matrix.t().to_owned();
let aat = matrix.dot(&at);
let aat_inv = Self::try_inverse(&aat)?;
at.dot(&aat_inv)
};
Ok(gram)
}
fn try_inverse<T>(matrix: &Array2<T>) -> Result<Array2<T>>
where
T: FloatBounds,
{
let (rows, cols) = matrix.dim();
if rows != cols {
return Err(SklearsError::InvalidInput(
"Matrix must be square".to_string(),
));
}
let mut inv = Array2::<T>::zeros((rows, cols));
for i in 0..rows {
let diag_val = matrix[[i, i]];
if diag_val.abs() < T::from_f64(1e-15).unwrap_or_else(|| T::zero()) {
return Err(SklearsError::InvalidInput("Matrix is singular".to_string()));
}
inv[[i, i]] = T::one() / diag_val;
}
Ok(inv)
}
}
pub struct MemoryOps;
impl MemoryOps {
pub fn chunked_dot<T>(a: &Array1<T>, b: &Array1<T>, chunk_size: Option<usize>) -> Result<T>
where
T: FloatBounds,
{
if a.len() != b.len() {
return Err(SklearsError::ShapeMismatch {
expected: format!("{}", a.len()),
actual: format!("{}", b.len()),
});
}
let chunk_size = chunk_size.unwrap_or(1024);
let mut result = T::zero();
let a_chunks: Vec<_> = a.exact_chunks(chunk_size).into_iter().collect();
let b_chunks: Vec<_> = b.exact_chunks(chunk_size).into_iter().collect();
for (a_chunk, b_chunk) in a_chunks.iter().zip(b_chunks.iter()) {
result += a_chunk
.iter()
.zip(b_chunk.iter())
.map(|(&x, &y)| x * y)
.fold(T::zero(), |acc, x| acc + x);
}
let remainder_len = a.len() % chunk_size;
if remainder_len > 0 {
let start_idx = a.len() - remainder_len;
for i in 0..remainder_len {
result += a[start_idx + i] * b[start_idx + i];
}
}
Ok(result)
}
pub fn streaming_stats<T>(values: impl Iterator<Item = T>) -> (T, T, usize)
where
T: FloatBounds,
{
let mut count = 0;
let mut mean = T::zero();
let mut m2 = T::zero();
for value in values {
count += 1;
let delta = value - mean;
mean += delta / T::from_usize(count).unwrap_or_else(|| T::zero());
let delta2 = value - mean;
m2 += delta * delta2;
}
let variance = if count > 1 {
m2 / T::from_usize(count - 1).unwrap_or_else(|| T::zero())
} else {
T::zero()
};
(mean, variance, count)
}
}
#[allow(non_snake_case)]
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_weighted_mean() {
let data = array![1.0, 2.0, 3.0, 4.0];
let weights = array![1.0, 2.0, 3.0, 4.0];
let result = ArrayStats::weighted_mean(&data, &weights).expect("expected valid value");
let expected = (1.0 * 1.0 + 2.0 * 2.0 + 3.0 * 3.0 + 4.0 * 4.0) / (1.0 + 2.0 + 3.0 + 4.0);
assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
}
#[test]
fn test_percentile() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let median = ArrayStats::percentile(&data, 50.0).expect("expected valid value");
assert_abs_diff_eq!(median, 3.0, epsilon = 1e-10);
let q25 = ArrayStats::percentile(&data, 25.0).expect("expected valid value");
assert_abs_diff_eq!(q25, 2.0, epsilon = 1e-10);
}
#[test]
fn test_chunked_dot() {
let a = array![1.0, 2.0, 3.0, 4.0, 5.0];
let b = array![2.0, 3.0, 4.0, 5.0, 6.0];
let result = MemoryOps::chunked_dot(&a, &b, Some(2)).expect("expected valid value");
let expected: f64 = a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum();
assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
}
#[test]
fn test_streaming_stats() {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let (mean, variance, count) = MemoryOps::streaming_stats(values.into_iter());
assert_eq!(count, 5);
assert_abs_diff_eq!(mean, 3.0, epsilon = 1e-10);
assert_abs_diff_eq!(variance, 2.5, epsilon = 1e-10);
}
#[test]
fn test_robust_covariance() {
use scirs2_core::ndarray::array;
let data = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let cov = ArrayStats::robust_covariance(&data, None).expect("expected valid value");
assert_eq!(cov.dim(), (2, 2));
assert!(cov[[0, 0]] > 0.0);
assert!(cov[[1, 1]] > 0.0);
assert_abs_diff_eq!(cov[[0, 1]], cov[[1, 0]], epsilon = 1e-10);
}
}