use serde::Serialize;
#[inline]
fn aa_index(code: char) -> Option<usize> {
match code.to_ascii_uppercase() {
'A' => Some(0),
'C' => Some(1),
'D' => Some(2),
'E' => Some(3),
'F' => Some(4),
'G' => Some(5),
'H' => Some(6),
'I' => Some(7),
'K' => Some(8),
'L' => Some(9),
'M' => Some(10),
'N' => Some(11),
'P' => Some(12),
'Q' => Some(13),
'R' => Some(14),
'S' => Some(15),
'T' => Some(16),
'V' => Some(17),
'W' => Some(18),
'Y' => Some(19),
_ => None,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[non_exhaustive]
pub enum Matrix {
Blosum62,
Pam250,
}
#[rustfmt::skip]
const BLOSUM62: [[i8; 20]; 20] = [
[ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2], [ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2], [-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3], [-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2], [-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3], [ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3], [-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2], [-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1], [-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2], [-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1], [-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1], [-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2], [-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3], [-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1], [-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2], [ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2], [ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2], [ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1], [-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2], [-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7], ];
#[rustfmt::skip]
const PAM250: [[i8; 20]; 20] = [
[ 2, -2, 0, 0, -4, 1, -1, -1, -1, -2, -1, 0, 1, 0, -2, 1, 1, 0, -6, -3], [-2, 12, -5, -5, -4, -3, -3, -2, -5, -6, -5, -4, -3, -5, -4, 0, -2, -2, -8, 0], [ 0, -5, 4, 3, -6, 1, 1, -2, 0, -4, -3, 2, -1, 2, -1, 0, 0, -2, -7, -4], [ 0, -5, 3, 4, -5, 0, 1, -2, 0, -3, -2, 1, -1, 2, -1, 0, 0, -2, -7, -4], [-4, -4, -6, -5, 9, -5, -2, 1, -5, 2, 0, -4, -5, -5, -4, -3, -3, -1, 0, 7], [ 1, -3, 1, 0, -5, 5, -2, -3, -2, -4, -3, 0, -1, -1, -3, 1, 0, -1, -7, -5], [-1, -3, 1, 1, -2, -2, 6, -2, 0, -2, -2, 2, 0, 3, 2, -1, -1, -2, -3, 0], [-1, -2, -2, -2, 1, -3, -2, 5, -2, 2, 2, -2, -2, -2, -2, -1, 0, 4, -5, -1], [-1, -5, 0, 0, -5, -2, 0, -2, 5, -3, 0, 1, -1, 1, 3, 0, 0, -2, -3, -4], [-2, -6, -4, -3, 2, -4, -2, 2, -3, 6, 4, -3, -3, -2, -3, -3, -2, 2, -2, -1], [-1, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, -2, -2, -1, 0, -2, -1, 2, -4, -2], [ 0, -4, 2, 1, -4, 0, 2, -2, 1, -3, -2, 2, -1, 1, 0, 1, 0, -2, -4, -2], [ 1, -3, -1, -1, -5, -1, 0, -2, -1, -3, -2, -1, 6, 0, 0, 1, 0, -1, -6, -5], [ 0, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, 1, -1, -1, -2, -5, -4], [-2, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 0, -1, -2, 2, -4], [ 1, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, -1, -2, -3], [ 1, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0, -5, -3], [ 0, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0, 4, -6, -2], [-6, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2, -5, -6, 17, 0], [-3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, 10], ];
#[must_use]
#[inline]
pub fn substitution_score(a: char, b: char, matrix: Matrix) -> Option<i8> {
let i = aa_index(a)?;
let j = aa_index(b)?;
let m = match matrix {
Matrix::Blosum62 => &BLOSUM62,
Matrix::Pam250 => &PAM250,
};
Some(m[i][j])
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize)]
pub struct AlignmentScore {
pub score: i32,
pub identities: usize,
pub length: usize,
pub identity: f64,
}
#[must_use]
pub fn score_alignment(seq_a: &str, seq_b: &str, matrix: Matrix) -> Option<AlignmentScore> {
if seq_a.is_empty() || seq_a.len() != seq_b.len() {
return None;
}
let mut score: i32 = 0;
let mut identities: usize = 0;
let len = seq_a.len();
for (a, b) in seq_a.chars().zip(seq_b.chars()) {
let s = substitution_score(a, b, matrix)?;
score += i32::from(s);
if a.eq_ignore_ascii_case(&b) {
identities += 1;
}
}
Some(AlignmentScore {
score,
identities,
length: len,
identity: identities as f64 / len as f64,
})
}
#[must_use]
pub fn needleman_wunsch(seq_a: &str, seq_b: &str, matrix: Matrix, gap_penalty: i32) -> Option<i32> {
let a: Vec<char> = seq_a.chars().map(|c| c.to_ascii_uppercase()).collect();
let b: Vec<char> = seq_b.chars().map(|c| c.to_ascii_uppercase()).collect();
let m = a.len();
let n = b.len();
if m == 0 || n == 0 {
return None;
}
for &c in &a {
aa_index(c)?;
}
for &c in &b {
aa_index(c)?;
}
let mut dp = vec![vec![0i32; n + 1]; m + 1];
for (i, row) in dp.iter_mut().enumerate().take(m + 1) {
row[0] = i as i32 * gap_penalty;
}
for (j, val) in dp[0].iter_mut().enumerate().take(n + 1) {
*val = j as i32 * gap_penalty;
}
for i in 1..=m {
for j in 1..=n {
let sub = substitution_score(a[i - 1], b[j - 1], matrix)?;
let diag = dp[i - 1][j - 1] + i32::from(sub);
let up = dp[i - 1][j] + gap_penalty;
let left = dp[i][j - 1] + gap_penalty;
dp[i][j] = diag.max(up).max(left);
}
}
Some(dp[m][n])
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_blosum62_diagonal_positive() {
for aa in "ACDEFGHIKLMNPQRSTVWY".chars() {
let s = substitution_score(aa, aa, Matrix::Blosum62).unwrap();
assert!(s > 0, "{aa}-{aa} should be positive, got {s}");
}
}
#[test]
fn test_blosum62_symmetric() {
for a in "ACDEFGHIKLMNPQRSTVWY".chars() {
for b in "ACDEFGHIKLMNPQRSTVWY".chars() {
let ab = substitution_score(a, b, Matrix::Blosum62).unwrap();
let ba = substitution_score(b, a, Matrix::Blosum62).unwrap();
assert_eq!(ab, ba, "BLOSUM62 not symmetric for {a}-{b}");
}
}
}
#[test]
fn test_blosum62_known_values() {
assert_eq!(substitution_score('W', 'W', Matrix::Blosum62).unwrap(), 11);
assert_eq!(substitution_score('A', 'A', Matrix::Blosum62).unwrap(), 4);
assert!(substitution_score('D', 'E', Matrix::Blosum62).unwrap() > 0);
}
#[test]
fn test_pam250_symmetric() {
for a in "ACDEFGHIKLMNPQRSTVWY".chars() {
for b in "ACDEFGHIKLMNPQRSTVWY".chars() {
let ab = substitution_score(a, b, Matrix::Pam250).unwrap();
let ba = substitution_score(b, a, Matrix::Pam250).unwrap();
assert_eq!(ab, ba, "PAM250 not symmetric for {a}-{b}");
}
}
}
#[test]
fn test_pam250_known_values() {
assert_eq!(substitution_score('W', 'W', Matrix::Pam250).unwrap(), 17);
assert_eq!(substitution_score('C', 'C', Matrix::Pam250).unwrap(), 12);
}
#[test]
fn test_unknown_residue() {
assert!(substitution_score('X', 'A', Matrix::Blosum62).is_none());
assert!(substitution_score('A', 'X', Matrix::Blosum62).is_none());
}
#[test]
fn test_lowercase() {
let upper = substitution_score('A', 'D', Matrix::Blosum62).unwrap();
let lower = substitution_score('a', 'd', Matrix::Blosum62).unwrap();
assert_eq!(upper, lower);
}
#[test]
fn test_score_alignment_identical() {
let result = score_alignment("ACDEF", "ACDEF", Matrix::Blosum62).unwrap();
assert_eq!(result.identities, 5);
assert!((result.identity - 1.0).abs() < f64::EPSILON);
assert!(result.score > 0);
}
#[test]
fn test_score_alignment_different() {
let result = score_alignment("AAAAA", "WWWWW", Matrix::Blosum62).unwrap();
assert_eq!(result.identities, 0);
assert!((result.identity - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_score_alignment_length_mismatch() {
assert!(score_alignment("ACE", "ACDE", Matrix::Blosum62).is_none());
}
#[test]
fn test_score_alignment_empty() {
assert!(score_alignment("", "", Matrix::Blosum62).is_none());
}
#[test]
fn test_score_alignment_unknown() {
assert!(score_alignment("AXC", "AXC", Matrix::Blosum62).is_none());
}
#[test]
fn test_needleman_wunsch_identical() {
let score = needleman_wunsch("ACDEF", "ACDEF", Matrix::Blosum62, -4).unwrap();
let expected: i32 = "ACDEF"
.chars()
.map(|c| i32::from(substitution_score(c, c, Matrix::Blosum62).unwrap()))
.sum();
assert_eq!(score, expected);
}
#[test]
fn test_needleman_wunsch_with_gap() {
let no_gap = needleman_wunsch("ADF", "ADF", Matrix::Blosum62, -4).unwrap();
let with_gap = needleman_wunsch("ACDF", "ADF", Matrix::Blosum62, -4).unwrap();
assert!(with_gap < no_gap, "Gap should reduce score");
}
#[test]
fn test_needleman_wunsch_empty() {
assert!(needleman_wunsch("", "ACD", Matrix::Blosum62, -4).is_none());
assert!(needleman_wunsch("ACD", "", Matrix::Blosum62, -4).is_none());
}
#[test]
fn test_needleman_wunsch_unknown() {
assert!(needleman_wunsch("AXC", "ACD", Matrix::Blosum62, -4).is_none());
}
}