use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Dimension, Ix1, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;
use crate::error::{MetricsError, Result};
pub fn euclidean_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let squared_sum = if x.is_standard_layout() && y.is_standard_layout() {
let x_view = x.view();
let y_view = y.view();
let x_reshaped = x_view
.to_shape(x.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape x: {e}")))?;
let y_reshaped = y_view
.to_shape(y.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape y: {e}")))?;
let x_1d = x_reshaped.view();
let y_1d = y_reshaped.view();
let diff = F::simd_sub(&x_1d, &y_1d);
let squared_diff = F::simd_mul(&diff.view(), &diff.view());
F::simd_sum(&squared_diff.view())
} else {
let mut sum = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
let diff = *xi - *yi;
sum = sum + diff * diff;
}
sum
};
Ok(squared_sum.sqrt())
}
pub fn squared_euclidean_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let squared_sum = if x.is_standard_layout() && y.is_standard_layout() {
let x_view = x.view();
let y_view = y.view();
let x_reshaped = x_view
.to_shape(x.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape x: {e}")))?;
let y_reshaped = y_view
.to_shape(y.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape y: {e}")))?;
let x_1d = x_reshaped.view();
let y_1d = y_reshaped.view();
let diff = F::simd_sub(&x_1d, &y_1d);
let squared_diff = F::simd_mul(&diff.view(), &diff.view());
F::simd_sum(&squared_diff.view())
} else {
let mut sum = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
let diff = *xi - *yi;
sum = sum + diff * diff;
}
sum
};
Ok(squared_sum)
}
pub fn manhattan_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let abs_sum = if x.is_standard_layout() && y.is_standard_layout() {
let x_view = x.view();
let y_view = y.view();
let x_reshaped = x_view
.to_shape(x.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape x: {e}")))?;
let y_reshaped = y_view
.to_shape(y.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape y: {e}")))?;
let x_1d = x_reshaped.view();
let y_1d = y_reshaped.view();
let diff = F::simd_sub(&x_1d, &y_1d);
let abs_diff = F::simd_abs(&diff.view());
F::simd_sum(&abs_diff.view())
} else {
let mut sum = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
sum = sum + (*xi - *yi).abs();
}
sum
};
Ok(abs_sum)
}
pub fn minkowski_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
p: F,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
let one = F::one();
if p < one {
return Err(MetricsError::InvalidInput(format!(
"p must be >= 1.0, got {p:?}"
)));
}
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let two = NumCast::from(2.0)
.ok_or_else(|| MetricsError::InvalidInput("Failed to convert 2.0".to_string()))?;
if p == one {
return manhattan_distance(x, y);
} else if p == two {
return euclidean_distance(x, y);
}
let mut sum = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
let diff = (*xi - *yi).abs();
sum = sum + diff.powf(p);
}
Ok(sum.powf(one / p))
}
pub fn chebyshev_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let mut max_diff = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
let diff = (*xi - *yi).abs();
if diff > max_diff {
max_diff = diff;
}
}
Ok(max_diff)
}
pub fn cosine_similarity<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let (dot, norm_x, norm_y) = if x.is_standard_layout() && y.is_standard_layout() {
let x_view = x.view();
let y_view = y.view();
let x_reshaped = x_view
.to_shape(x.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape x: {e}")))?;
let y_reshaped = y_view
.to_shape(y.len())
.map_err(|e| MetricsError::InvalidInput(format!("Failed to reshape y: {e}")))?;
let x_1d = x_reshaped.view();
let y_1d = y_reshaped.view();
let dot_product = F::simd_mul(&x_1d, &y_1d);
let dot_sum = F::simd_sum(&dot_product.view());
let x_squared = F::simd_mul(&x_1d, &x_1d);
let norm_x_sq = F::simd_sum(&x_squared.view());
let y_squared = F::simd_mul(&y_1d, &y_1d);
let norm_y_sq = F::simd_sum(&y_squared.view());
(dot_sum, norm_x_sq.sqrt(), norm_y_sq.sqrt())
} else {
let mut dot = F::zero();
let mut norm_x_sq = F::zero();
let mut norm_y_sq = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
dot = dot + *xi * *yi;
norm_x_sq = norm_x_sq + *xi * *xi;
norm_y_sq = norm_y_sq + *yi * *yi;
}
(dot, norm_x_sq.sqrt(), norm_y_sq.sqrt())
};
let epsilon = NumCast::from(1e-10)
.ok_or_else(|| MetricsError::InvalidInput("Failed to convert epsilon".to_string()))?;
if norm_x < epsilon || norm_y < epsilon {
return Err(MetricsError::InvalidInput(
"Cannot compute cosine similarity for zero vectors".to_string(),
));
}
Ok(dot / (norm_x * norm_y))
}
pub fn cosine_distance<F, S1, S2, D1, D2>(x: &ArrayBase<S1, D1>, y: &ArrayBase<S2, D2>) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug + SimdUnifiedOps,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
let similarity = cosine_similarity(x, y)?;
Ok(F::one() - similarity)
}
pub fn mahalanobis_distance<F, S1, S2, S3>(
x: &ArrayBase<S1, Ix1>,
y: &ArrayBase<S2, Ix1>,
cov_inv: &ArrayBase<S3, Ix2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
S3: Data<Elem = F>,
{
let n = x.len();
if y.len() != n {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same length: {} vs {}",
n,
y.len()
)));
}
let (n_rows, n_cols) = cov_inv.dim();
if n_rows != n || n_cols != n {
return Err(MetricsError::InvalidInput(format!(
"Covariance matrix must be {n}x{n}, got {n_rows}x{n_cols}"
)));
}
let diff: Array1<F> = x.iter().zip(y.iter()).map(|(xi, yi)| *xi - *yi).collect();
let mut s_inv_d = Array1::zeros(n);
for i in 0..n {
let mut sum = F::zero();
for j in 0..n {
sum = sum + cov_inv[[i, j]] * diff[j];
}
s_inv_d[i] = sum;
}
let mut result = F::zero();
for i in 0..n {
result = result + diff[i] * s_inv_d[i];
}
if result < F::zero() {
return Err(MetricsError::InvalidInput(
"Mahalanobis distance squared is negative (invalid covariance matrix?)".to_string(),
));
}
Ok(result.sqrt())
}
pub fn hamming_distance<T, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<f64>
where
T: PartialEq + Copy,
S1: Data<Elem = T>,
S2: Data<Elem = T>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
let n = x.len();
if n == 0 {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let mut mismatches = 0;
for (xi, yi) in x.iter().zip(y.iter()) {
if xi != yi {
mismatches += 1;
}
}
Ok(mismatches as f64 / n as f64)
}
pub fn jaccard_similarity<T, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<f64>
where
T: PartialEq + Copy + Default,
S1: Data<Elem = T>,
S2: Data<Elem = T>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
if x.is_empty() {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let zero = T::default();
let mut intersection = 0;
let mut union = 0;
for (xi, yi) in x.iter().zip(y.iter()) {
let x_nonzero = *xi != zero;
let y_nonzero = *yi != zero;
if x_nonzero && y_nonzero {
intersection += 1;
}
if x_nonzero || y_nonzero {
union += 1;
}
}
if union == 0 {
return Ok(1.0);
}
Ok(intersection as f64 / union as f64)
}
pub fn jaccard_distance<T, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<f64>
where
T: PartialEq + Copy + Default,
S1: Data<Elem = T>,
S2: Data<Elem = T>,
D1: Dimension,
D2: Dimension,
{
let similarity = jaccard_similarity(x, y)?;
Ok(1.0 - similarity)
}
pub fn pearson_correlation<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
if x.shape() != y.shape() {
return Err(MetricsError::InvalidInput(format!(
"Vectors must have the same shape: {:?} vs {:?}",
x.shape(),
y.shape()
)));
}
let n = x.len();
if n == 0 {
return Err(MetricsError::InvalidInput(
"Empty vectors provided".to_string(),
));
}
let n_f = NumCast::from(n)
.ok_or_else(|| MetricsError::InvalidInput("Failed to convert n".to_string()))?;
let mut sum_x = F::zero();
let mut sum_y = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
sum_x = sum_x + *xi;
sum_y = sum_y + *yi;
}
let mean_x = sum_x / n_f;
let mean_y = sum_y / n_f;
let mut numerator = F::zero();
let mut sum_x_sq = F::zero();
let mut sum_y_sq = F::zero();
for (xi, yi) in x.iter().zip(y.iter()) {
let dx = *xi - mean_x;
let dy = *yi - mean_y;
numerator = numerator + dx * dy;
sum_x_sq = sum_x_sq + dx * dx;
sum_y_sq = sum_y_sq + dy * dy;
}
let epsilon = NumCast::from(1e-10)
.ok_or_else(|| MetricsError::InvalidInput("Failed to convert epsilon".to_string()))?;
if sum_x_sq < epsilon || sum_y_sq < epsilon {
return Err(MetricsError::InvalidInput(
"Cannot compute correlation for constant vectors".to_string(),
));
}
let denominator = sum_x_sq.sqrt() * sum_y_sq.sqrt();
Ok(numerator / denominator)
}
pub fn pearson_distance<F, S1, S2, D1, D2>(
x: &ArrayBase<S1, D1>,
y: &ArrayBase<S2, D2>,
) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug,
S1: Data<Elem = F>,
S2: Data<Elem = F>,
D1: Dimension,
D2: Dimension,
{
let correlation = pearson_correlation(x, y)?;
Ok(F::one() - correlation.abs())
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_euclidean_distance() {
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let dist: f64 = euclidean_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, 5.0, epsilon = 1e-10);
}
#[test]
fn test_manhattan_distance() {
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let dist: f64 = manhattan_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, 7.0, epsilon = 1e-10);
}
#[test]
fn test_cosine_similarity_identical() {
let x = array![1.0, 2.0, 3.0];
let y = array![1.0, 2.0, 3.0];
let sim: f64 = cosine_similarity(&x, &y).expect("Failed to compute similarity");
assert_relative_eq!(sim, 1.0, epsilon = 1e-10);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let x = array![1.0, 0.0];
let y = array![0.0, 1.0];
let sim: f64 = cosine_similarity(&x, &y).expect("Failed to compute similarity");
assert_relative_eq!(sim, 0.0, epsilon = 1e-10);
}
#[test]
fn test_hamming_distance() {
let x = array![1, 0, 1, 0];
let y = array![1, 1, 0, 0];
let dist = hamming_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, 0.5, epsilon = 1e-10); }
#[test]
fn test_jaccard_similarity() {
let x = array![1, 1, 0, 0];
let y = array![1, 0, 1, 0];
let sim = jaccard_similarity(&x, &y).expect("Failed to compute similarity");
assert_relative_eq!(sim, 1.0 / 3.0, epsilon = 1e-10);
}
#[test]
fn test_minkowski_distance_p1() {
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let dist: f64 = minkowski_distance(&x, &y, 1.0).expect("Failed to compute distance");
let manhattan: f64 = manhattan_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, manhattan, epsilon = 1e-10);
}
#[test]
fn test_minkowski_distance_p2() {
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let dist: f64 = minkowski_distance(&x, &y, 2.0).expect("Failed to compute distance");
let euclidean: f64 = euclidean_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, euclidean, epsilon = 1e-10);
}
#[test]
fn test_chebyshev_distance() {
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let dist: f64 = chebyshev_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(dist, 4.0, epsilon = 1e-10); }
#[test]
fn test_mahalanobis_identity() {
use scirs2_core::ndarray::Array2;
let x = array![0.0, 0.0];
let y = array![3.0, 4.0];
let cov_inv = Array2::eye(2);
let mahal: f64 =
mahalanobis_distance(&x, &y, &cov_inv).expect("Failed to compute distance");
let eucl: f64 = euclidean_distance(&x, &y).expect("Failed to compute distance");
assert_relative_eq!(mahal, eucl, epsilon = 1e-10);
}
#[test]
fn test_pearson_correlation_perfect() {
let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
let y = array![2.0, 4.0, 6.0, 8.0, 10.0]; let corr: f64 = pearson_correlation(&x, &y).expect("Failed to compute correlation");
assert_relative_eq!(corr, 1.0, epsilon = 1e-10);
}
#[test]
fn test_cosine_zero_vector_error() {
let x = array![0.0, 0.0];
let y = array![1.0, 2.0];
let result: std::result::Result<f64, _> = cosine_similarity(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_different_shapes_error() {
let x = array![1.0, 2.0];
let y = array![1.0, 2.0, 3.0];
let result: std::result::Result<f64, _> = euclidean_distance(&x, &y);
assert!(result.is_err());
}
}