use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{ArrayBase, ArrayView1, Data, Ix1, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;
#[allow(dead_code)]
pub fn distance_matrix_simd<F, D>(
data: &ArrayBase<D, Ix2>,
metric: &str,
) -> StatsResult<scirs2_core::ndarray::Array2<F>>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
D: Data<Elem = F>,
{
let (n_samples_, n_features) = data.dim();
if n_samples_ == 0 || n_features == 0 {
return Err(StatsError::InvalidArgument(
"Data matrix cannot be empty".to_string(),
));
}
let mut distances = scirs2_core::ndarray::Array2::zeros((n_samples_, n_samples_));
for i in 0..n_samples_ {
for j in (i + 1)..n_samples_ {
let row_i = data.row(i);
let row_j = data.row(j);
let distance = match metric {
"euclidean" => euclidean_distance_simd(&row_i, &row_j)?,
"manhattan" => manhattan_distance_simd(&row_i, &row_j)?,
"cosine" => cosine_distance_simd(&row_i, &row_j)?,
_ => {
return Err(StatsError::InvalidArgument(format!(
"Unknown metric: {}",
metric
)))
}
};
distances[(i, j)] = distance;
distances[(j, i)] = distance; }
}
Ok(distances)
}
#[allow(dead_code)]
pub fn euclidean_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
if x.len() != y.len() {
return Err(StatsError::DimensionMismatch(format!(
"Vectors must have the same length: {} vs {}",
x.len(),
y.len()
)));
}
let n = x.len();
let sum_sq_diff = if n > 16 {
let diff = F::simd_sub(&x.view(), &y.view());
let sq_diff = F::simd_mul(&diff.view(), &diff.view());
F::simd_sum(&sq_diff.view())
} else {
x.iter()
.zip(y.iter())
.map(|(&xi, &yi)| {
let diff = xi - yi;
diff * diff
})
.fold(F::zero(), |acc, x| acc + x)
};
Ok(sum_sq_diff.sqrt())
}
#[allow(dead_code)]
pub fn manhattan_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
if x.len() != y.len() {
return Err(StatsError::DimensionMismatch(format!(
"Vectors must have the same length: {} vs {}",
x.len(),
y.len()
)));
}
let n = x.len();
let sum_abs_diff = if n > 16 {
let diff = F::simd_sub(&x.view(), &y.view());
let abs_diff = F::simd_abs(&diff.view());
F::simd_sum(&abs_diff.view())
} else {
x.iter()
.zip(y.iter())
.map(|(&xi, &yi)| (xi - yi).abs())
.fold(F::zero(), |acc, x| acc + x)
};
Ok(sum_abs_diff)
}
#[allow(dead_code)]
pub fn cosine_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
if x.len() != y.len() {
return Err(StatsError::DimensionMismatch(format!(
"Vectors must have the same length: {} vs {}",
x.len(),
y.len()
)));
}
let n = x.len();
let (dot_product, norm_x_sq, norm_y_sq) = if n > 16 {
let dot = F::simd_mul(&x.view(), &y.view());
let dot_product = F::simd_sum(&dot.view());
let x_sq = F::simd_mul(&x.view(), &x.view());
let norm_x_sq = F::simd_sum(&x_sq.view());
let y_sq = F::simd_mul(&y.view(), &y.view());
let norm_y_sq = F::simd_sum(&y_sq.view());
(dot_product, norm_x_sq, norm_y_sq)
} else {
let mut dot_product = F::zero();
let mut norm_x_sq = F::zero();
let mut norm_y_sq = F::zero();
for (&xi, &yi) in x.iter().zip(y.iter()) {
dot_product = dot_product + xi * yi;
norm_x_sq = norm_x_sq + xi * xi;
norm_y_sq = norm_y_sq + yi * yi;
}
(dot_product, norm_x_sq, norm_y_sq)
};
let norm_product = (norm_x_sq * norm_y_sq).sqrt();
if norm_product <= F::epsilon() {
return Err(StatsError::InvalidArgument(
"Cannot compute cosine distance for zero vectors".to_string(),
));
}
let cosine_similarity = dot_product / norm_product;
Ok(F::one() - cosine_similarity)
}
pub struct MovingWindowSIMD<F> {
windowsize: usize,
_phantom: std::marker::PhantomData<F>,
}
impl<F> MovingWindowSIMD<F>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
pub fn new(_windowsize: usize) -> Self {
Self {
windowsize: _windowsize,
_phantom: std::marker::PhantomData,
}
}
pub fn moving_mean<D>(
&self,
data: &ArrayBase<D, Ix1>,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
D: Data<Elem = F>,
{
if data.len() < self.windowsize {
return Err(StatsError::InvalidArgument(
"Data length must be >= window size".to_string(),
));
}
let n_windows = data.len() - self.windowsize + 1;
let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
if self.windowsize > 16 {
for i in 0..n_windows {
let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
let sum = F::simd_sum(&window);
result[i] = sum / F::from(self.windowsize).expect("Failed to convert to float");
}
} else {
let mut window_sum = data
.slice(scirs2_core::ndarray::s![0..self.windowsize])
.iter()
.fold(F::zero(), |acc, &x| acc + x);
result[0] = window_sum / F::from(self.windowsize).expect("Failed to convert to float");
for i in 1..n_windows {
window_sum = window_sum - data[i - 1] + data[i + self.windowsize - 1];
result[i] =
window_sum / F::from(self.windowsize).expect("Failed to convert to float");
}
}
Ok(result)
}
pub fn moving_variance<D>(
&self,
data: &ArrayBase<D, Ix1>,
ddof: usize,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
D: Data<Elem = F>,
{
if data.len() < self.windowsize {
return Err(StatsError::InvalidArgument(
"Data length must be >= window size".to_string(),
));
}
if self.windowsize <= ddof {
return Err(StatsError::InvalidArgument(
"Window size must be > ddof".to_string(),
));
}
let n_windows = data.len() - self.windowsize + 1;
let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
for i in 0..n_windows {
let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
if self.windowsize > 16 {
let mean = F::simd_sum(&window.view())
/ F::from(self.windowsize).expect("Failed to convert to float");
let mean_array = scirs2_core::ndarray::Array1::from_elem(self.windowsize, mean);
let diff = F::simd_sub(&window, &mean_array.view());
let sq_diff = F::simd_mul(&diff.view(), &diff.view());
let sum_sq_diff = F::simd_sum(&sq_diff.view());
result[i] = sum_sq_diff
/ F::from(self.windowsize - ddof).expect("Failed to convert to float");
} else {
let mut mean = F::zero();
let mut m2 = F::zero();
let mut count = 0;
for &val in window.iter() {
count += 1;
let delta = val - mean;
mean = mean + delta / F::from(count).expect("Failed to convert to float");
let delta2 = val - mean;
m2 = m2 + delta * delta2;
}
result[i] = m2 / F::from(count - ddof).expect("Failed to convert to float");
}
}
Ok(result)
}
pub fn moving_min<D>(
&self,
data: &ArrayBase<D, Ix1>,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
D: Data<Elem = F>,
{
if data.len() < self.windowsize {
return Err(StatsError::InvalidArgument(
"Data length must be >= window size".to_string(),
));
}
let n_windows = data.len() - self.windowsize + 1;
let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
for i in 0..n_windows {
let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
result[i] = window.iter().fold(
window[0],
|min_val, &x| if x < min_val { x } else { min_val },
);
}
Ok(result)
}
pub fn moving_max<D>(
&self,
data: &ArrayBase<D, Ix1>,
) -> StatsResult<scirs2_core::ndarray::Array1<F>>
where
D: Data<Elem = F>,
{
if data.len() < self.windowsize {
return Err(StatsError::InvalidArgument(
"Data length must be >= window size".to_string(),
));
}
let n_windows = data.len() - self.windowsize + 1;
let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);
for i in 0..n_windows {
let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
result[i] = window.iter().fold(
window[0],
|max_val, &x| if x > max_val { x } else { max_val },
);
}
Ok(result)
}
}
#[allow(dead_code)]
pub fn histogram_simd<F, D>(
data: &ArrayBase<D, Ix1>,
bins: usize,
range: Option<(F, F)>,
) -> StatsResult<(
scirs2_core::ndarray::Array1<usize>,
scirs2_core::ndarray::Array1<F>,
)>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
D: Data<Elem = F>,
{
if data.is_empty() {
return Err(StatsError::InvalidArgument(
"Data cannot be empty".to_string(),
));
}
if bins == 0 {
return Err(StatsError::InvalidArgument(
"Number of bins must be positive".to_string(),
));
}
let (min_val, max_val) = if let Some((min, max)) = range {
(min, max)
} else {
let min_val = data
.iter()
.fold(data[0], |min, &x| if x < min { x } else { min });
let max_val = data
.iter()
.fold(data[0], |max, &x| if x > max { x } else { max });
(min_val, max_val)
};
if max_val <= min_val {
return Err(StatsError::InvalidArgument(
"Invalid range: max must be > min".to_string(),
));
}
let mut bin_edges = scirs2_core::ndarray::Array1::zeros(bins + 1);
let range_width = max_val - min_val;
let bin_width = range_width / F::from(bins).expect("Failed to convert to float");
for i in 0..=bins {
bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
}
let mut counts = scirs2_core::ndarray::Array1::zeros(bins);
for &value in data.iter() {
if value >= min_val && value <= max_val {
let bin_idx = if value == max_val {
bins - 1 } else {
let normalized = (value - min_val) / bin_width;
normalized
.floor()
.to_usize()
.expect("Operation failed")
.min(bins - 1)
};
counts[bin_idx] += 1;
}
}
Ok((counts, bin_edges))
}
#[allow(dead_code)]
pub fn detect_outliers_zscore_simd<F, D>(
data: &ArrayBase<D, Ix1>,
threshold: F,
) -> StatsResult<scirs2_core::ndarray::Array1<bool>>
where
F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
D: Data<Elem = F>,
{
if data.is_empty() {
return Err(StatsError::InvalidArgument(
"Data cannot be empty".to_string(),
));
}
let n = data.len();
let mean = data.iter().fold(F::zero(), |acc, &x| acc + x)
/ F::from(n).expect("Failed to convert to float");
let variance = {
let sum_sq_diff = data
.iter()
.map(|&x| {
let diff = x - mean;
diff * diff
})
.fold(F::zero(), |acc, x| acc + x);
sum_sq_diff / F::from(n - 1).expect("Failed to convert to float")
};
let std_dev = variance.sqrt();
if std_dev <= F::epsilon() {
return Ok(scirs2_core::ndarray::Array1::from_elem(n, false));
}
let mut outliers = scirs2_core::ndarray::Array1::from_elem(n, false);
for (i, &value) in data.iter().enumerate() {
let z_score = (value - mean) / std_dev;
outliers[i] = z_score.abs() > threshold;
}
Ok(outliers)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_euclidean_distance_simd() {
let x = array![1.0, 2.0, 3.0];
let y = array![4.0, 5.0, 6.0];
let distance = euclidean_distance_simd(&x.view(), &y.view()).expect("Operation failed");
let expected = ((4.0 - 1.0).powi(2) + (5.0 - 2.0).powi(2) + (6.0 - 3.0).powi(2)).sqrt();
assert_relative_eq!(distance, expected, epsilon = 1e-10);
}
#[test]
fn test_moving_window_simd() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let window = MovingWindowSIMD::new(3);
let moving_mean = window.moving_mean(&data.view()).expect("Operation failed");
assert_relative_eq!(moving_mean[0], 2.0, epsilon = 1e-10); assert_relative_eq!(moving_mean[1], 3.0, epsilon = 1e-10); assert_relative_eq!(moving_mean[7], 9.0, epsilon = 1e-10); }
#[test]
fn test_histogram_simd() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
let (counts, edges) = histogram_simd(&data.view(), 5, None).expect("Operation failed");
assert_eq!(counts.len(), 5);
assert_eq!(edges.len(), 6);
assert_eq!(counts.sum(), 5); }
#[test]
fn test_outlier_detection_simd() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let outliers = detect_outliers_zscore_simd(&data.view(), 2.0).expect("Operation failed");
assert!(!outliers[0]); assert!(!outliers[4]); assert!(outliers[5]); }
}