use crate::error::{CoreError, CoreResult};
use crate::ndarray_ext::Array2;
pub fn hamming_distance(s1: &[u8], s2: &[u8]) -> CoreResult<usize> {
if s1.len() != s2.len() {
return Err(CoreError::DimensionError(crate::error_context!(format!(
"hamming_distance: sequences must have equal length ({} vs {})",
s1.len(),
s2.len()
))));
}
let dist = s1
.iter()
.zip(s2.iter())
.filter(|(&a, &b)| !a.eq_ignore_ascii_case(&b))
.count();
Ok(dist)
}
pub fn jukes_cantor_distance(p: f64) -> CoreResult<f64> {
if p.is_nan() || p < 0.0 || p >= 0.75 {
return Err(CoreError::DomainError(crate::error_context!(format!(
"jukes_cantor_distance: p must be in [0, 0.75), got {p}"
))));
}
if p == 0.0 {
return Ok(0.0);
}
let d = -0.75 * (1.0 - (4.0 / 3.0) * p).ln();
Ok(d)
}
pub fn distance_matrix(sequences: &[&[u8]]) -> CoreResult<Array2<f64>> {
let n = sequences.len();
if n == 0 {
return Err(CoreError::DimensionError(crate::error_context!(
"distance_matrix: sequences slice must not be empty"
)));
}
let seq_len = sequences[0].len();
for (idx, seq) in sequences.iter().enumerate() {
if seq.len() != seq_len {
return Err(CoreError::DimensionError(crate::error_context!(format!(
"distance_matrix: sequence {idx} has length {} but expected {seq_len}",
seq.len()
))));
}
}
let mut mat = Array2::<f64>::zeros((n, n));
for i in 0..n {
for j in (i + 1)..n {
let h = hamming_distance(sequences[i], sequences[j])?;
let p = if seq_len == 0 {
0.0
} else {
h as f64 / seq_len as f64
};
let d = jukes_cantor_distance(p).unwrap_or(f64::INFINITY);
mat[[i, j]] = d;
mat[[j, i]] = d;
}
}
Ok(mat)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hamming_identical() {
assert_eq!(
hamming_distance(b"ATGC", b"ATGC").expect("hamming failed"),
0
);
}
#[test]
fn test_hamming_all_different() {
assert_eq!(
hamming_distance(b"ATGC", b"TACG").expect("hamming failed"),
4
);
}
#[test]
fn test_hamming_two_differences() {
assert_eq!(
hamming_distance(b"ATGC", b"TTGT").expect("hamming failed"),
2
);
}
#[test]
fn test_hamming_length_mismatch() {
let result = hamming_distance(b"ATG", b"AT");
assert!(result.is_err());
}
#[test]
fn test_hamming_empty() {
assert_eq!(hamming_distance(b"", b"").expect("hamming failed"), 0);
}
#[test]
fn test_hamming_case_insensitive() {
assert_eq!(
hamming_distance(b"atgc", b"ATGC").expect("hamming failed"),
0
);
}
#[test]
fn test_jc_zero_p() {
assert!((jukes_cantor_distance(0.0).expect("jc failed") - 0.0).abs() < 1e-10);
}
#[test]
fn test_jc_small_p() {
let d = jukes_cantor_distance(0.1).expect("jc failed");
assert!((d - 0.107326).abs() < 1e-4, "got {d}");
}
#[test]
fn test_jc_p_above_threshold_errors() {
let result = jukes_cantor_distance(0.75);
assert!(result.is_err());
}
#[test]
fn test_jc_negative_p_errors() {
let result = jukes_cantor_distance(-0.1);
assert!(result.is_err());
}
#[test]
fn test_jc_nan_p_errors() {
let result = jukes_cantor_distance(f64::NAN);
assert!(result.is_err());
}
#[test]
fn test_jc_monotone_increasing() {
let d1 = jukes_cantor_distance(0.1).expect("jc failed");
let d2 = jukes_cantor_distance(0.2).expect("jc failed");
let d3 = jukes_cantor_distance(0.5).expect("jc failed");
assert!(d1 < d2, "JC distance should be monotonically increasing");
assert!(d2 < d3, "JC distance should be monotonically increasing");
}
#[test]
fn test_distance_matrix_shape() {
let seqs: &[&[u8]] = &[b"ATGC", b"ATGC", b"TTGT"];
let mat = distance_matrix(seqs).expect("distance_matrix failed");
assert_eq!(mat.shape(), &[3, 3]);
}
#[test]
fn test_distance_matrix_zero_diagonal() {
let seqs: &[&[u8]] = &[b"ATGC", b"TTGT", b"GGCC"];
let mat = distance_matrix(seqs).expect("distance_matrix failed");
for i in 0..3 {
assert_eq!(mat[[i, i]], 0.0, "diagonal must be zero at ({i},{i})");
}
}
#[test]
fn test_distance_matrix_symmetric() {
let seqs: &[&[u8]] = &[b"ATGC", b"TTGT", b"GGCC"];
let mat = distance_matrix(seqs).expect("distance_matrix failed");
for i in 0..3 {
for j in 0..3 {
let a = mat[[i, j]];
let b = mat[[j, i]];
let symmetric = if a.is_infinite() && b.is_infinite() {
a.signum() == b.signum()
} else {
(a - b).abs() < 1e-15
};
assert!(symmetric, "matrix must be symmetric at ({i},{j})");
}
}
}
#[test]
fn test_distance_matrix_identical_sequences() {
let seqs: &[&[u8]] = &[b"ATGC", b"ATGC"];
let mat = distance_matrix(seqs).expect("distance_matrix failed");
assert_eq!(mat[[0, 1]], 0.0);
assert_eq!(mat[[1, 0]], 0.0);
}
#[test]
fn test_distance_matrix_length_mismatch_errors() {
let seqs: &[&[u8]] = &[b"ATGC", b"ATG"];
let result = distance_matrix(seqs);
assert!(result.is_err());
}
#[test]
fn test_distance_matrix_empty_errors() {
let seqs: &[&[u8]] = &[];
let result = distance_matrix(seqs);
assert!(result.is_err());
}
}