use crate::utils::TextSlice;
pub fn hamming(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u64 {
assert_eq!(
alpha.len(),
beta.len(),
"hamming distance cannot be calculated for texts of different length ({}!={})",
alpha.len(),
beta.len()
);
let mut dist = 0;
for (a, b) in alpha.iter().zip(beta) {
if a != b {
dist += 1;
}
}
dist
}
#[allow(unused_assignments)]
pub fn levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u32 {
editdistancek::edit_distance(alpha, beta) as u32
}
pub mod simd {
use crate::utils::TextSlice;
use std::cmp::{max, min};
pub fn hamming(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u64 {
assert_eq!(
alpha.len(),
beta.len(),
"simd hamming distance cannot be calculated for texts of different length ({}!={})",
alpha.len(),
beta.len()
);
triple_accel::hamming(alpha, beta) as u64
}
pub fn levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u32 {
triple_accel::levenshtein_exp(alpha, beta)
}
pub fn bounded_levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>, k: u32) -> Option<u32> {
editdistancek::edit_distance_bounded(
alpha,
beta,
min(k as usize, max(alpha.len(), beta.len())),
)
.map(|x| x as u32)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::u32;
#[test]
fn test_hamming_dist_good() {
let x = b"GTCTGCATGCG";
let y = b"TTTAGCTAGCG";
assert_eq!(hamming(x, y), 5);
}
#[test]
fn test_simd_hamming_dist_good() {
let x = b"GTCTGCATGCG";
let y = b"TTTAGCTAGCG";
assert_eq!(simd::hamming(x, y), 5);
}
#[test]
#[should_panic(
expected = "hamming distance cannot be calculated for texts of different length (11!=8)"
)]
fn test_hamming_dist_bad() {
let x = b"GACTATATCGA";
let y = b"TTTAGCTC";
hamming(x, y);
}
#[test]
#[should_panic(
expected = "simd hamming distance cannot be calculated for texts of different length (11!=8)"
)]
fn test_simd_hamming_dist_bad() {
let x = b"GACTATATCGA";
let y = b"TTTAGCTC";
simd::hamming(x, y);
}
#[test]
fn test_levenshtein_dist() {
let x = b"ACCGTGGAT";
let y = b"AAAAACCGTTGAT";
assert_eq!(levenshtein(x, y), 5);
assert_eq!(levenshtein(x, y), levenshtein(y, x));
assert_eq!(levenshtein(b"AAA", b"TTTT"), 4);
assert_eq!(levenshtein(b"TTTT", b"AAA"), 4);
}
#[test]
fn test_simd_levenshtein_dist() {
let x = b"ACCGTGGAT";
let y = b"AAAAACCGTTGAT";
assert_eq!(simd::levenshtein(x, y), 5);
assert_eq!(simd::levenshtein(x, y), simd::levenshtein(y, x));
assert_eq!(simd::levenshtein(b"AAA", b"TTTT"), 4);
assert_eq!(simd::levenshtein(b"TTTT", b"AAA"), 4);
}
#[test]
fn test_simd_bounded_levenshtein_dist() {
let x = b"ACCGTGGAT";
let y = b"AAAAACCGTTGAT";
assert_eq!(simd::bounded_levenshtein(x, y, u32::MAX), Some(5));
assert_eq!(
simd::bounded_levenshtein(x, y, u32::MAX),
simd::bounded_levenshtein(y, x, u32::MAX)
);
assert_eq!(
simd::bounded_levenshtein(b"AAA", b"TTTT", u32::MAX),
Some(4)
);
assert_eq!(
simd::bounded_levenshtein(b"TTTT", b"AAA", u32::MAX),
Some(4)
);
}
}