use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::{Float, One, Zero};
use std::fmt::Debug;
#[inline]
fn cast_f64<T: Float>(val: f64) -> T {
T::from(val).unwrap_or_else(T::zero)
}
#[derive(Debug, Clone, Copy)]
pub enum DistanceMetric {
Euclidean,
Manhattan,
Chebyshev,
Minkowski(f64),
Cosine,
Correlation,
Hamming,
}
pub fn euclidean<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let sum_sq: T = x_vec
.iter()
.zip(y_vec.iter())
.map(|(&xi, &yi)| {
let diff = xi - yi;
diff * diff
})
.fold(T::zero(), |acc, x| acc + x);
Ok(sum_sq.sqrt())
}
pub fn manhattan<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let sum: T = x_vec
.iter()
.zip(y_vec.iter())
.map(|(&xi, &yi)| (xi - yi).abs())
.fold(T::zero(), |acc, x| acc + x);
Ok(sum)
}
pub fn chebyshev<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let max_diff = x_vec
.iter()
.zip(y_vec.iter())
.map(|(&xi, &yi)| (xi - yi).abs())
.fold(T::zero(), |acc, x| if x > acc { x } else { acc });
Ok(max_diff)
}
pub fn minkowski<T>(x: &Array<T>, y: &Array<T>, p: f64) -> Result<T>
where
T: Float + Debug,
{
if p < 1.0 {
return Err(NumRs2Error::ValueError(
"Minkowski p must be >= 1".to_string(),
));
}
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let p_t = cast_f64(p);
let inv_p = cast_f64(1.0 / p);
let sum: T = x_vec
.iter()
.zip(y_vec.iter())
.map(|(&xi, &yi)| (xi - yi).abs().powf(p_t))
.fold(T::zero(), |acc, x| acc + x);
Ok(sum.powf(inv_p))
}
pub fn cosine<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let dot: T = x_vec
.iter()
.zip(y_vec.iter())
.map(|(&xi, &yi)| xi * yi)
.fold(T::zero(), |acc, x| acc + x);
let norm_x: T = x_vec
.iter()
.map(|&xi| xi * xi)
.fold(T::zero(), |acc, x| acc + x)
.sqrt();
let norm_y: T = y_vec
.iter()
.map(|&yi| yi * yi)
.fold(T::zero(), |acc, x| acc + x)
.sqrt();
if norm_x == T::zero() || norm_y == T::zero() {
return Err(NumRs2Error::ValueError(
"Cosine distance undefined for zero vectors".to_string(),
));
}
let similarity = dot / (norm_x * norm_y);
Ok(T::one() - similarity)
}
pub fn correlation<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let n = T::from(x_vec.len()).unwrap_or_else(T::zero);
let mean_x: T = x_vec.iter().copied().fold(T::zero(), |acc, x| acc + x) / n;
let mean_y: T = y_vec.iter().copied().fold(T::zero(), |acc, x| acc + x) / n;
let x_centered: Vec<T> = x_vec.iter().map(|&xi| xi - mean_x).collect();
let y_centered: Vec<T> = y_vec.iter().map(|&yi| yi - mean_y).collect();
let cov: T = x_centered
.iter()
.zip(y_centered.iter())
.map(|(&xi, &yi)| xi * yi)
.fold(T::zero(), |acc, x| acc + x);
let std_x: T = x_centered
.iter()
.map(|&xi| xi * xi)
.fold(T::zero(), |acc, x| acc + x)
.sqrt();
let std_y: T = y_centered
.iter()
.map(|&yi| yi * yi)
.fold(T::zero(), |acc, x| acc + x)
.sqrt();
if std_x == T::zero() || std_y == T::zero() {
return Err(NumRs2Error::ValueError(
"Correlation undefined for constant vectors".to_string(),
));
}
let corr = cov / (std_x * std_y);
Ok(T::one() - corr)
}
pub fn hamming<T>(x: &Array<T>, y: &Array<T>) -> Result<T>
where
T: Float + Debug,
{
hamming_threshold(x, y, cast_f64(1e-10))
}
pub fn hamming_threshold<T>(x: &Array<T>, y: &Array<T>, threshold: T) -> Result<T>
where
T: Float + Debug,
{
validate_vectors(x, y)?;
let x_vec = x.to_vec();
let y_vec = y.to_vec();
let n_different = x_vec
.iter()
.zip(y_vec.iter())
.filter(|(&xi, &yi)| (xi - yi).abs() > threshold)
.count();
let n = T::from(x_vec.len()).unwrap_or_else(T::zero);
Ok(T::from(n_different).unwrap_or_else(T::zero) / n)
}
pub fn pdist<T>(x: &Array<T>, metric: DistanceMetric) -> Result<Array<T>>
where
T: Float + Debug,
{
if x.shape().len() != 2 {
return Err(NumRs2Error::DimensionMismatch(
"Input must be 2D array".to_string(),
));
}
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
let n_dists = n_samples * (n_samples - 1) / 2;
let mut distances = Vec::with_capacity(n_dists);
for i in 0..n_samples {
for j in (i + 1)..n_samples {
let mut xi = Vec::with_capacity(n_features);
let mut xj = Vec::with_capacity(n_features);
for k in 0..n_features {
xi.push(x.get(&[i, k])?);
xj.push(x.get(&[j, k])?);
}
let xi_arr = Array::from_vec(xi);
let xj_arr = Array::from_vec(xj);
let dist = compute_distance(&xi_arr, &xj_arr, metric)?;
distances.push(dist);
}
}
Ok(Array::from_vec(distances))
}
pub fn cdist<T>(xa: &Array<T>, xb: &Array<T>, metric: DistanceMetric) -> Result<Array<T>>
where
T: Float + Debug,
{
if xa.shape().len() != 2 || xb.shape().len() != 2 {
return Err(NumRs2Error::DimensionMismatch(
"Inputs must be 2D arrays".to_string(),
));
}
let n_features_a = xa.shape()[1];
let n_features_b = xb.shape()[1];
if n_features_a != n_features_b {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![xa.shape()[0], n_features_a],
actual: xb.shape(),
});
}
let n_samples_a = xa.shape()[0];
let n_samples_b = xb.shape()[0];
let n_features = n_features_a;
let mut distances = Vec::with_capacity(n_samples_a * n_samples_b);
for i in 0..n_samples_a {
for j in 0..n_samples_b {
let mut xi = Vec::with_capacity(n_features);
let mut xj = Vec::with_capacity(n_features);
for k in 0..n_features {
xi.push(xa.get(&[i, k])?);
xj.push(xb.get(&[j, k])?);
}
let xi_arr = Array::from_vec(xi);
let xj_arr = Array::from_vec(xj);
let dist = compute_distance(&xi_arr, &xj_arr, metric)?;
distances.push(dist);
}
}
Ok(Array::from_vec(distances).reshape(&[n_samples_a, n_samples_b]))
}
fn validate_vectors<T: Float + Debug>(x: &Array<T>, y: &Array<T>) -> Result<()> {
if x.shape().len() != 1 || y.shape().len() != 1 {
return Err(NumRs2Error::DimensionMismatch(
"Inputs must be 1D arrays".to_string(),
));
}
if x.size() != y.size() {
return Err(NumRs2Error::ShapeMismatch {
expected: x.shape(),
actual: y.shape(),
});
}
Ok(())
}
fn compute_distance<T>(x: &Array<T>, y: &Array<T>, metric: DistanceMetric) -> Result<T>
where
T: Float + Debug,
{
match metric {
DistanceMetric::Euclidean => euclidean(x, y),
DistanceMetric::Manhattan => manhattan(x, y),
DistanceMetric::Chebyshev => chebyshev(x, y),
DistanceMetric::Minkowski(p) => minkowski(x, y, p),
DistanceMetric::Cosine => cosine(x, y),
DistanceMetric::Correlation => correlation(x, y),
DistanceMetric::Hamming => hamming(x, y),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_euclidean_distance() {
let x = Array::from_vec(vec![0.0, 0.0]);
let y = Array::from_vec(vec![3.0, 4.0]);
let dist = euclidean(&x, &y).expect("euclidean distance should succeed");
assert!((dist - 5.0).abs() < 1e-10, "Expected 5.0, got {}", dist);
}
#[test]
fn test_manhattan_distance() {
let x = Array::from_vec(vec![1.0, 2.0, 3.0]);
let y = Array::from_vec(vec![4.0, 5.0, 6.0]);
let dist = manhattan(&x, &y).expect("manhattan distance should succeed");
assert!((dist - 9.0).abs() < 1e-10);
}
#[test]
fn test_chebyshev_distance() {
let x = Array::from_vec(vec![1.0, 2.0, 3.0]);
let y = Array::from_vec(vec![4.0, 6.0, 5.0]);
let dist = chebyshev(&x, &y).expect("chebyshev distance should succeed");
assert!((dist - 4.0).abs() < 1e-10); }
#[test]
fn test_minkowski_distance() {
let x = Array::from_vec(vec![1.0, 2.0, 3.0]);
let y = Array::from_vec(vec![4.0, 5.0, 6.0]);
let l1 = minkowski(&x, &y, 1.0).expect("minkowski p=1 should succeed");
let manhattan_dist = manhattan(&x, &y).expect("manhattan should succeed");
assert!((l1 - manhattan_dist).abs() < 1e-10);
let l2 = minkowski(&x, &y, 2.0).expect("minkowski p=2 should succeed");
let euclidean_dist = euclidean(&x, &y).expect("euclidean should succeed");
assert!((l2 - euclidean_dist).abs() < 1e-10);
}
#[test]
fn test_cosine_distance() {
let x = Array::from_vec(vec![1.0, 0.0, 0.0]);
let y = Array::from_vec(vec![0.0, 1.0, 0.0]);
let dist = cosine(&x, &y).expect("cosine distance should succeed");
assert!((dist - 1.0).abs() < 1e-10);
let x2 = Array::from_vec(vec![1.0, 2.0, 3.0]);
let y2 = Array::from_vec(vec![2.0, 4.0, 6.0]);
let dist2 = cosine(&x2, &y2).expect("cosine distance for parallel vectors should succeed");
assert!(dist2.abs() < 1e-10); }
#[test]
fn test_correlation_distance() {
let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let y = Array::from_vec(vec![2.0, 4.0, 6.0, 8.0, 10.0]);
let dist = correlation(&x, &y).expect("correlation distance should succeed");
assert!(dist.abs() < 1e-10); }
#[test]
fn test_hamming_distance() {
let x = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
let y = Array::from_vec(vec![1.0, 2.0, 5.0, 6.0]);
let dist = hamming(&x, &y).expect("hamming distance should succeed");
assert!((dist - 0.5).abs() < 1e-10); }
#[test]
fn test_pdist() {
let points = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(&[3, 2]);
let dists = pdist(&points, DistanceMetric::Euclidean).expect("pdist should succeed");
assert_eq!(dists.size(), 3);
for i in 0..dists.size() {
assert!(dists.get(&[i]).expect("get element should succeed") > 0.0);
}
}
#[test]
fn test_cdist() {
let xa = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0]).reshape(&[2, 2]);
let xb = Array::from_vec(vec![5.0, 6.0, 7.0, 8.0]).reshape(&[2, 2]);
let dists = cdist(&xa, &xb, DistanceMetric::Euclidean).expect("cdist should succeed");
assert_eq!(dists.shape(), vec![2, 2]);
for i in 0..2 {
for j in 0..2 {
assert!(dists.get(&[i, j]).expect("get element should succeed") > 0.0);
}
}
}
#[test]
fn test_distance_validation() {
let x = Array::from_vec(vec![1.0, 2.0]);
let y = Array::from_vec(vec![1.0, 2.0, 3.0]);
assert!(euclidean(&x, &y).is_err());
}
#[test]
fn test_zero_vector_cosine() {
let x = Array::from_vec(vec![0.0, 0.0, 0.0]);
let y = Array::from_vec(vec![1.0, 2.0, 3.0]);
assert!(cosine(&x, &y).is_err());
}
}