#[derive(Debug, Clone)]
pub struct AlignmentParams {
pub match_score: i32,
pub mismatch_score: i32,
pub gap_open: i32,
pub gap_extend: i32,
}
impl Default for AlignmentParams {
fn default() -> Self {
Self {
match_score: 5,
mismatch_score: 0,
gap_open: -4,
gap_extend: -1,
}
}
}
#[derive(Debug, Clone)]
pub struct SequenceAlignment {
pub mapping: Vec<Option<usize>>,
pub score: i32,
pub identity: f64,
pub num_aligned: usize,
}
pub fn align_sequences(
seq1: &[String],
seq2: &[String],
params: &AlignmentParams,
) -> SequenceAlignment {
let n = seq1.len();
let m = seq2.len();
if n == 0 || m == 0 {
return SequenceAlignment {
mapping: vec![None; n],
score: 0,
identity: 0.0,
num_aligned: 0,
};
}
let rows = n + 1;
let cols = m + 1;
let mut score = vec![vec![0i32; cols]; rows];
let mut traceback = vec![vec![0u8; cols]; rows];
for i in 1..rows {
score[i][0] = params.gap_open + params.gap_extend * (i as i32 - 1);
traceback[i][0] = 1; }
for j in 1..cols {
score[0][j] = params.gap_open + params.gap_extend * (j as i32 - 1);
traceback[0][j] = 2; }
for i in 1..rows {
for j in 1..cols {
let match_mismatch = if seq1[i - 1] == seq2[j - 1] {
params.match_score
} else {
params.mismatch_score
};
let diag = score[i - 1][j - 1] + match_mismatch;
let up_gap_penalty = if traceback[i - 1][j] == 1 {
params.gap_extend
} else {
params.gap_open
};
let up = score[i - 1][j] + up_gap_penalty;
let left_gap_penalty = if traceback[i][j - 1] == 2 {
params.gap_extend
} else {
params.gap_open
};
let left = score[i][j - 1] + left_gap_penalty;
if diag >= up && diag >= left {
score[i][j] = diag;
traceback[i][j] = 0;
} else if up >= left {
score[i][j] = up;
traceback[i][j] = 1;
} else {
score[i][j] = left;
traceback[i][j] = 2;
}
}
}
let mut mapping = vec![None; n];
let mut i = n;
let mut j = m;
let mut num_identical = 0;
let mut num_aligned = 0;
while i > 0 || j > 0 {
if i > 0 && j > 0 && traceback[i][j] == 0 {
mapping[i - 1] = Some(j - 1);
num_aligned += 1;
if seq1[i - 1] == seq2[j - 1] {
num_identical += 1;
}
i -= 1;
j -= 1;
} else if i > 0 && traceback[i][j] == 1 {
i -= 1;
} else {
j -= 1;
}
}
let identity = if num_aligned > 0 {
num_identical as f64 / num_aligned as f64
} else {
0.0
};
SequenceAlignment {
mapping,
score: score[n][m],
identity,
num_aligned,
}
}
pub fn sequence_identity(seq1: &[String], seq2: &[String]) -> f64 {
let alignment = align_sequences(seq1, seq2, &AlignmentParams::default());
alignment.identity
}
#[cfg(test)]
mod tests {
use super::*;
fn to_strings(s: &[&str]) -> Vec<String> {
s.iter().map(|x| x.to_string()).collect()
}
#[test]
fn test_identical_sequences() {
let seq = to_strings(&["ALA", "GLY", "VAL", "LEU"]);
let result = align_sequences(&seq, &seq, &AlignmentParams::default());
assert_eq!(result.identity, 1.0);
assert_eq!(result.num_aligned, 4);
for (i, m) in result.mapping.iter().enumerate() {
assert_eq!(*m, Some(i));
}
}
#[test]
fn test_completely_different() {
let seq1 = to_strings(&["ALA", "ALA", "ALA"]);
let seq2 = to_strings(&["GLY", "GLY", "GLY"]);
let result = align_sequences(&seq1, &seq2, &AlignmentParams::default());
assert_eq!(result.identity, 0.0);
}
#[test]
fn test_one_mismatch() {
let seq1 = to_strings(&["ALA", "GLY", "VAL"]);
let seq2 = to_strings(&["ALA", "LEU", "VAL"]);
let result = align_sequences(&seq1, &seq2, &AlignmentParams::default());
assert!(result.identity > 0.5);
assert!(result.identity < 1.0);
}
#[test]
fn test_different_lengths() {
let seq1 = to_strings(&["ALA", "GLY", "VAL", "LEU", "ILE"]);
let seq2 = to_strings(&["ALA", "GLY", "VAL"]);
let result = align_sequences(&seq1, &seq2, &AlignmentParams::default());
assert!(result.num_aligned >= 3);
}
#[test]
fn test_empty_sequence() {
let seq1 = to_strings(&["ALA", "GLY"]);
let seq2: Vec<String> = vec![];
let result = align_sequences(&seq1, &seq2, &AlignmentParams::default());
assert_eq!(result.identity, 0.0);
assert_eq!(result.num_aligned, 0);
}
#[test]
fn test_sequence_identity() {
let seq1 = to_strings(&["ALA", "GLY", "VAL"]);
let seq2 = to_strings(&["ALA", "GLY", "VAL"]);
assert!((sequence_identity(&seq1, &seq2) - 1.0).abs() < 1e-10);
}
#[test]
fn test_insertion_alignment() {
let seq1 = to_strings(&["ALA", "GLY", "VAL"]);
let seq2 = to_strings(&["ALA", "LEU", "GLY", "VAL"]);
let result = align_sequences(&seq1, &seq2, &AlignmentParams::default());
assert_eq!(result.mapping[0], Some(0)); assert!(result.num_aligned >= 3);
}
}