use std::cmp::Ordering;
use std::collections::HashMap;
use crate::error::{CoreError, CoreResult};
#[derive(Debug, Clone)]
pub struct ScoringMatrix {
pub match_score: i32,
pub mismatch_penalty: i32,
pub gap_open: i32,
pub gap_extend: i32,
}
impl ScoringMatrix {
#[must_use]
pub fn dna_default() -> Self {
Self {
match_score: 2,
mismatch_penalty: -1,
gap_open: -2,
gap_extend: -1,
}
}
#[must_use]
pub fn blosum62() -> Self {
Self {
match_score: 4,
mismatch_penalty: -1,
gap_open: -11,
gap_extend: -1,
}
}
}
#[derive(Debug, Clone)]
pub struct AlignmentResult {
pub score: i32,
pub seq1_aligned: String,
pub seq2_aligned: String,
pub identity: f64,
pub gaps: usize,
pub aligned_length: usize,
}
impl AlignmentResult {
fn from_aligned(score: i32, s1: String, s2: String) -> Self {
let len = s1.len();
let gaps = s1.chars().filter(|&c| c == '-').count()
+ s2.chars().filter(|&c| c == '-').count();
let identical = s1
.chars()
.zip(s2.chars())
.filter(|(a, b)| *a != '-' && *b != '-' && a == b)
.count();
let non_gap_cols = s1
.chars()
.zip(s2.chars())
.filter(|(a, b)| *a != '-' && *b != '-')
.count();
let identity = if non_gap_cols > 0 {
identical as f64 / non_gap_cols as f64
} else {
0.0
};
AlignmentResult {
score,
seq1_aligned: s1,
seq2_aligned: s2,
identity,
gaps,
aligned_length: len,
}
}
}
pub fn needleman_wunsch_affine(
seq1: &str,
seq2: &str,
scoring: &ScoringMatrix,
) -> CoreResult<AlignmentResult> {
let s1: Vec<u8> = seq1.bytes().map(|b| b.to_ascii_uppercase()).collect();
let s2: Vec<u8> = seq2.bytes().map(|b| b.to_ascii_uppercase()).collect();
let m = s1.len();
let n = s2.len();
let cols = n + 1;
const NEG_INF: i32 = i32::MIN / 2;
let mut m_mat = vec![NEG_INF; (m + 1) * cols];
let mut ix_mat = vec![NEG_INF; (m + 1) * cols];
let mut iy_mat = vec![NEG_INF; (m + 1) * cols];
m_mat[0] = 0;
for j in 1..=n {
iy_mat[j] = scoring.gap_open + (j as i32 - 1) * scoring.gap_extend;
}
for i in 1..=m {
ix_mat[i * cols] = scoring.gap_open + (i as i32 - 1) * scoring.gap_extend;
}
for i in 1..=m {
for j in 1..=n {
let sub = if s1[i - 1] == s2[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let prev_m = m_mat[(i - 1) * cols + (j - 1)];
let prev_ix = ix_mat[(i - 1) * cols + (j - 1)];
let prev_iy = iy_mat[(i - 1) * cols + (j - 1)];
m_mat[i * cols + j] = safe_max3(
safe_add(prev_m, sub),
safe_add(prev_ix, sub),
safe_add(prev_iy, sub),
);
let open_ix = safe_add(m_mat[(i - 1) * cols + j], scoring.gap_open);
let ext_ix = safe_add(ix_mat[(i - 1) * cols + j], scoring.gap_extend);
ix_mat[i * cols + j] = open_ix.max(ext_ix);
let open_iy = safe_add(m_mat[i * cols + (j - 1)], scoring.gap_open);
let ext_iy = safe_add(iy_mat[i * cols + (j - 1)], scoring.gap_extend);
iy_mat[i * cols + j] = open_iy.max(ext_iy);
}
}
let final_m = m_mat[m * cols + n];
let final_ix = ix_mat[m * cols + n];
let final_iy = iy_mat[m * cols + n];
let score = safe_max3(final_m, final_ix, final_iy);
let (aligned1, aligned2) =
traceback_affine_nw(&m_mat, &ix_mat, &iy_mat, &s1, &s2, m, n, scoring);
Ok(AlignmentResult::from_aligned(score, aligned1, aligned2))
}
fn traceback_affine_nw(
m_mat: &[i32],
ix_mat: &[i32],
iy_mat: &[i32],
s1: &[u8],
s2: &[u8],
m: usize,
n: usize,
scoring: &ScoringMatrix,
) -> (String, String) {
let cols = n + 1;
const NEG_INF: i32 = i32::MIN / 2;
#[derive(Copy, Clone, PartialEq)]
enum State {
M,
Ix,
Iy,
}
let final_m = m_mat[m * cols + n];
let final_ix = ix_mat[m * cols + n];
let final_iy = iy_mat[m * cols + n];
let best = safe_max3(final_m, final_ix, final_iy);
let mut cur_state = if final_m == best {
State::M
} else if final_ix == best {
State::Ix
} else {
State::Iy
};
let mut a1: Vec<u8> = Vec::new();
let mut a2: Vec<u8> = Vec::new();
let mut i = m;
let mut j = n;
while i > 0 || j > 0 {
match cur_state {
State::M => {
if i == 0 {
a1.push(b'-');
a2.push(s2[j - 1]);
j -= 1;
cur_state = State::Iy;
continue;
}
if j == 0 {
a1.push(s1[i - 1]);
a2.push(b'-');
i -= 1;
cur_state = State::Ix;
continue;
}
let sub = if s1[i - 1] == s2[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let cur = m_mat[i * cols + j];
let from_m = safe_add(m_mat[(i - 1) * cols + (j - 1)], sub);
let from_ix = safe_add(ix_mat[(i - 1) * cols + (j - 1)], sub);
let from_iy = safe_add(iy_mat[(i - 1) * cols + (j - 1)], sub);
a1.push(s1[i - 1]);
a2.push(s2[j - 1]);
i -= 1;
j -= 1;
cur_state = if from_m == cur {
State::M
} else if from_ix == cur {
State::Ix
} else if from_iy == cur {
State::Iy
} else {
State::M };
}
State::Ix => {
if i == 0 {
a1.push(b'-');
a2.push(s2[j - 1]);
j -= 1;
continue;
}
a1.push(s1[i - 1]);
a2.push(b'-');
let cur = ix_mat[i * cols + j];
let from_open = safe_add(m_mat[(i - 1) * cols + j], scoring.gap_open);
i -= 1;
cur_state = if from_open == cur && m_mat[i * cols + j] != NEG_INF {
State::M
} else {
State::Ix
};
}
State::Iy => {
if j == 0 {
a1.push(s1[i - 1]);
a2.push(b'-');
i -= 1;
continue;
}
a1.push(b'-');
a2.push(s2[j - 1]);
let cur = iy_mat[i * cols + j];
let from_open = safe_add(m_mat[i * cols + (j - 1)], scoring.gap_open);
j -= 1;
cur_state = if from_open == cur && m_mat[i * cols + j] != NEG_INF {
State::M
} else {
State::Iy
};
}
}
}
a1.reverse();
a2.reverse();
let aligned1 = String::from_utf8(a1).unwrap_or_default();
let aligned2 = String::from_utf8(a2).unwrap_or_default();
(aligned1, aligned2)
}
pub fn smith_waterman_affine(
seq1: &str,
seq2: &str,
scoring: &ScoringMatrix,
) -> CoreResult<AlignmentResult> {
let s1: Vec<u8> = seq1.bytes().map(|b| b.to_ascii_uppercase()).collect();
let s2: Vec<u8> = seq2.bytes().map(|b| b.to_ascii_uppercase()).collect();
let m = s1.len();
let n = s2.len();
let cols = n + 1;
let mut m_mat = vec![0i32; (m + 1) * cols];
let mut ix_mat = vec![0i32; (m + 1) * cols];
let mut iy_mat = vec![0i32; (m + 1) * cols];
let mut best_score = 0i32;
let mut best_i = 0usize;
let mut best_j = 0usize;
for i in 1..=m {
for j in 1..=n {
let sub = if s1[i - 1] == s2[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let m_val = safe_max3(
safe_add(m_mat[(i - 1) * cols + (j - 1)], sub),
safe_add(ix_mat[(i - 1) * cols + (j - 1)], sub),
safe_add(iy_mat[(i - 1) * cols + (j - 1)], sub),
)
.max(0);
let ix_val =
(safe_add(m_mat[(i - 1) * cols + j], scoring.gap_open))
.max(safe_add(ix_mat[(i - 1) * cols + j], scoring.gap_extend))
.max(0);
let iy_val =
(safe_add(m_mat[i * cols + (j - 1)], scoring.gap_open))
.max(safe_add(iy_mat[i * cols + (j - 1)], scoring.gap_extend))
.max(0);
m_mat[i * cols + j] = m_val;
ix_mat[i * cols + j] = ix_val;
iy_mat[i * cols + j] = iy_val;
let cell_best = safe_max3(m_val, ix_val, iy_val);
if cell_best > best_score {
best_score = cell_best;
best_i = i;
best_j = j;
}
}
}
if best_score == 0 {
return Ok(AlignmentResult::from_aligned(0, String::new(), String::new()));
}
let (aligned1, aligned2) = traceback_affine_sw(
&m_mat, &ix_mat, &iy_mat, &s1, &s2, best_i, best_j, scoring,
);
Ok(AlignmentResult::from_aligned(
best_score, aligned1, aligned2,
))
}
fn traceback_affine_sw(
m_mat: &[i32],
ix_mat: &[i32],
iy_mat: &[i32],
s1: &[u8],
s2: &[u8],
start_i: usize,
start_j: usize,
scoring: &ScoringMatrix,
) -> (String, String) {
let cols = s2.len() + 1;
let mut a1: Vec<u8> = Vec::new();
let mut a2: Vec<u8> = Vec::new();
let mut i = start_i;
let mut j = start_j;
while i > 0 && j > 0 {
let cur_m = m_mat[i * cols + j];
let cur_ix = ix_mat[i * cols + j];
let cur_iy = iy_mat[i * cols + j];
let cur_best = safe_max3(cur_m, cur_ix, cur_iy);
if cur_best == 0 {
break;
}
if cur_best == cur_m && i > 0 && j > 0 {
let sub = if s1[i - 1] == s2[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let prev = safe_add(
safe_max3(
m_mat[(i - 1) * cols + (j - 1)],
ix_mat[(i - 1) * cols + (j - 1)],
iy_mat[(i - 1) * cols + (j - 1)],
),
sub,
);
if prev == cur_m {
a1.push(s1[i - 1]);
a2.push(s2[j - 1]);
i -= 1;
j -= 1;
continue;
}
}
if cur_best == cur_ix && i > 0 {
a1.push(s1[i - 1]);
a2.push(b'-');
i -= 1;
} else if j > 0 {
a1.push(b'-');
a2.push(s2[j - 1]);
j -= 1;
} else {
break;
}
}
a1.reverse();
a2.reverse();
(
String::from_utf8(a1).unwrap_or_default(),
String::from_utf8(a2).unwrap_or_default(),
)
}
pub fn semi_global_align(
query: &str,
target: &str,
scoring: &ScoringMatrix,
) -> CoreResult<AlignmentResult> {
if query.is_empty() || target.is_empty() {
return Err(CoreError::ValueError(crate::error_context!(
"query and target must be non-empty"
)));
}
let q: Vec<u8> = query.bytes().map(|b| b.to_ascii_uppercase()).collect();
let t: Vec<u8> = target.bytes().map(|b| b.to_ascii_uppercase()).collect();
let m = q.len();
let n = t.len();
let cols = n + 1;
const NEG_INF: i32 = i32::MIN / 2;
let mut dp = vec![NEG_INF; (m + 1) * cols];
for i in 0..=m {
dp[i * cols] = i as i32 * scoring.gap_open;
}
for j in 0..=n {
dp[j] = 0;
}
for i in 1..=m {
for j in 1..=n {
let sub = if q[i - 1] == t[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let diag = safe_add(dp[(i - 1) * cols + (j - 1)], sub);
let up = safe_add(dp[(i - 1) * cols + j], scoring.gap_open);
let left = safe_add(dp[i * cols + (j - 1)], scoring.gap_open);
dp[i * cols + j] = diag.max(up).max(left);
}
}
let (best_j, best_score) = (0..=n)
.map(|j| (j, dp[m * cols + j]))
.max_by_key(|&(_, s)| s)
.unwrap_or((n, NEG_INF));
let (aligned1, aligned2) =
traceback_semi_global(&dp, &q, &t, m, best_j, n, scoring);
Ok(AlignmentResult::from_aligned(best_score, aligned1, aligned2))
}
fn traceback_semi_global(
dp: &[i32],
q: &[u8],
t: &[u8],
start_i: usize,
start_j: usize,
n: usize,
scoring: &ScoringMatrix,
) -> (String, String) {
let cols = n + 1;
let mut a1: Vec<u8> = Vec::new();
let mut a2: Vec<u8> = Vec::new();
for j in (start_j..n).rev() {
a1.push(b'-');
a2.push(t[j]);
}
let mut i = start_i;
let mut j = start_j;
while i > 0 || j > 0 {
if i > 0 && j > 0 {
let sub = if q[i - 1] == t[j - 1] {
scoring.match_score
} else {
scoring.mismatch_penalty
};
let expected_diag = safe_add(dp[(i - 1) * cols + (j - 1)], sub);
if dp[i * cols + j] == expected_diag {
a1.push(q[i - 1]);
a2.push(t[j - 1]);
i -= 1;
j -= 1;
continue;
}
}
if i > 0 && (j == 0 || dp[i * cols + j] == safe_add(dp[(i - 1) * cols + j], scoring.gap_open)) {
a1.push(q[i - 1]);
a2.push(b'-');
i -= 1;
} else if j > 0 {
a1.push(b'-');
a2.push(t[j - 1]);
j -= 1;
} else {
break;
}
}
a1.reverse();
a2.reverse();
(
String::from_utf8(a1).unwrap_or_default(),
String::from_utf8(a2).unwrap_or_default(),
)
}
#[derive(Debug, Clone)]
pub struct MultipleAlignment {
pub aligned_sequences: Vec<String>,
pub ids: Vec<String>,
pub consensus: String,
}
impl MultipleAlignment {
#[must_use]
pub fn conservation_scores(&self) -> Vec<f64> {
if self.aligned_sequences.is_empty() {
return Vec::new();
}
let len = self.aligned_sequences[0].len();
(0..len)
.map(|col| {
let mut counts: HashMap<char, usize> = HashMap::new();
let mut total = 0usize;
for seq in &self.aligned_sequences {
let ch = seq.chars().nth(col).unwrap_or('-');
if ch != '-' {
*counts.entry(ch).or_insert(0) += 1;
total += 1;
}
}
if total == 0 {
return 0.0;
}
let max_count = counts.values().max().copied().unwrap_or(0);
max_count as f64 / total as f64
})
.collect()
}
#[must_use]
pub fn gaps_per_column(&self) -> Vec<usize> {
if self.aligned_sequences.is_empty() {
return Vec::new();
}
let len = self.aligned_sequences[0].len();
(0..len)
.map(|col| {
self.aligned_sequences
.iter()
.filter(|seq| seq.chars().nth(col).unwrap_or('-') == '-')
.count()
})
.collect()
}
#[must_use]
pub fn to_fasta(&self) -> String {
self.ids
.iter()
.zip(self.aligned_sequences.iter())
.map(|(id, seq)| format!(">{id}\n{seq}\n"))
.collect()
}
}
pub fn multiple_sequence_alignment(
sequences: &[(String, String)],
scoring: &ScoringMatrix,
) -> CoreResult<MultipleAlignment> {
if sequences.len() < 2 {
return Err(CoreError::ValueError(crate::error_context!(
"at least 2 sequences are required for multiple alignment"
)));
}
let n = sequences.len();
let mut dist = vec![vec![0.0f64; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let result = needleman_wunsch_affine(&sequences[i].1, &sequences[j].1, scoring)?;
let d = 1.0 - result.identity;
dist[i][j] = d;
dist[j][i] = d;
}
}
let merge_order = upgma_order(&dist, n);
let mut profiles: Vec<Vec<String>> = sequences
.iter()
.map(|(_, seq)| vec![seq.clone()])
.collect();
let mut profile_ids: Vec<Vec<String>> = sequences
.iter()
.map(|(id, _)| vec![id.clone()])
.collect();
let mut active = vec![true; n];
for (pi, pj) in merge_order {
if pi >= n || pj >= n || !active[pi] || !active[pj] {
continue;
}
let consensus_i = profile_consensus(&profiles[pi]);
let consensus_j = profile_consensus(&profiles[pj]);
let alignment = needleman_wunsch_affine(&consensus_i, &consensus_j, scoring)?;
let new_pi = apply_gaps_to_profile(&profiles[pi], &alignment.seq1_aligned, &consensus_i);
let new_pj = apply_gaps_to_profile(&profiles[pj], &alignment.seq2_aligned, &consensus_j);
let mut merged_seqs = new_pi;
merged_seqs.extend(new_pj);
let mut merged_ids = profile_ids[pi].clone();
merged_ids.extend(profile_ids[pj].clone());
profiles[pi] = merged_seqs;
profile_ids[pi] = merged_ids;
active[pj] = false;
}
let final_idx = active
.iter()
.rposition(|&a| a)
.unwrap_or(0);
let aligned = profiles[final_idx].clone();
let ids = profile_ids[final_idx].clone();
let consensus = build_msa_consensus(&aligned);
Ok(MultipleAlignment {
aligned_sequences: aligned,
ids,
consensus,
})
}
fn profile_consensus(profile: &[String]) -> String {
if profile.is_empty() {
return String::new();
}
let len = profile[0].len();
(0..len)
.map(|col| {
let mut counts: HashMap<char, usize> = HashMap::new();
for seq in profile {
let ch = seq.chars().nth(col).unwrap_or('-');
*counts.entry(ch).or_insert(0) += 1;
}
counts
.into_iter()
.max_by_key(|&(_, c)| c)
.map(|(ch, _)| ch)
.unwrap_or('-')
})
.collect()
}
fn apply_gaps_to_profile(profile: &[String], aligned: &str, original: &str) -> Vec<String> {
let mut orig_to_aligned: Vec<Vec<char>> = Vec::new();
let mut orig_idx = 0usize;
let orig_chars: Vec<char> = original.chars().collect();
let aligned_chars: Vec<char> = aligned.chars().collect();
let mut pending_gaps: Vec<char> = Vec::new();
for &ch in &aligned_chars {
if ch == '-' {
pending_gaps.push('-');
} else {
if orig_idx < orig_chars.len() {
let mut entry = pending_gaps.drain(..).collect::<Vec<char>>();
entry.push(ch);
orig_to_aligned.push(entry);
orig_idx += 1;
}
}
}
if !pending_gaps.is_empty() {
if orig_to_aligned.is_empty() {
orig_to_aligned.push(pending_gaps);
} else {
orig_to_aligned.last_mut().map(|v| v.extend(pending_gaps.iter()));
}
}
profile
.iter()
.map(|seq| {
let seq_chars: Vec<char> = seq.chars().collect();
let mut result = String::new();
let mut col = 0usize;
for (oi, mapping) in orig_to_aligned.iter().enumerate() {
for &mch in mapping {
if mch == '-' {
result.push('-');
} else {
let seq_col = find_non_gap_position(seq_chars.as_slice(), oi);
result.push(seq_col);
col += 1;
}
}
}
let total_non_gap = seq_chars.iter().filter(|&&c| c != '-').count();
while col < total_non_gap {
let pos = find_non_gap_position(seq_chars.as_slice(), col);
result.push(pos);
col += 1;
}
result
})
.collect()
}
fn find_non_gap_position(seq: &[char], idx: usize) -> char {
let mut count = 0;
for &ch in seq {
if ch != '-' {
if count == idx {
return ch;
}
count += 1;
}
}
'-'
}
fn build_msa_consensus(aligned: &[String]) -> String {
if aligned.is_empty() {
return String::new();
}
let len = aligned[0].len();
(0..len)
.map(|col| {
let mut counts: HashMap<char, usize> = HashMap::new();
for seq in aligned {
let ch = seq.chars().nth(col).unwrap_or('-');
if ch != '-' {
*counts.entry(ch).or_insert(0) += 1;
}
}
if counts.is_empty() {
'-'
} else {
counts
.into_iter()
.max_by_key(|&(_, c)| c)
.map(|(ch, _)| ch)
.unwrap_or('-')
}
})
.collect()
}
fn upgma_order(dist: &[Vec<f64>], n: usize) -> Vec<(usize, usize)> {
let mut clusters: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
let mut active: Vec<bool> = vec![true; n];
let mut order: Vec<(usize, usize)> = Vec::new();
for _ in 0..(n - 1) {
let mut best_dist = f64::INFINITY;
let mut best_i = 0;
let mut best_j = 1;
let active_clusters: Vec<usize> = (0..clusters.len()).filter(|&i| active[i]).collect();
for ci in 0..active_clusters.len() {
for cj in (ci + 1)..active_clusters.len() {
let i = active_clusters[ci];
let j = active_clusters[cj];
let avg = average_cluster_dist(dist, &clusters[i], &clusters[j]);
if avg < best_dist {
best_dist = avg;
best_i = i;
best_j = j;
}
}
}
order.push((best_i, best_j));
let merged: Vec<usize> = clusters[best_i]
.iter()
.chain(clusters[best_j].iter())
.copied()
.collect();
clusters[best_i] = merged;
active[best_j] = false;
}
order
}
fn average_cluster_dist(dist: &[Vec<f64>], a: &[usize], b: &[usize]) -> f64 {
if a.is_empty() || b.is_empty() {
return f64::INFINITY;
}
let total: f64 = a.iter().flat_map(|&i| b.iter().map(move |&j| dist[i][j])).sum();
total / (a.len() * b.len()) as f64
}
#[inline]
fn safe_add(a: i32, b: i32) -> i32 {
a.saturating_add(b)
}
#[inline]
fn safe_max3(a: i32, b: i32, c: i32) -> i32 {
a.max(b).max(c)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scoring_matrix_dna_default() {
let s = ScoringMatrix::dna_default();
assert_eq!(s.match_score, 2);
assert!(s.mismatch_penalty < 0);
assert!(s.gap_open < 0);
assert!(s.gap_extend < 0);
}
#[test]
fn test_scoring_matrix_blosum62() {
let s = ScoringMatrix::blosum62();
assert!(s.match_score > 0);
assert!(s.gap_open < 0);
}
#[test]
fn test_nw_affine_identical() {
let s = ScoringMatrix::dna_default();
let r = needleman_wunsch_affine("ATGC", "ATGC", &s).expect("NW failed");
assert!((r.identity - 1.0).abs() < 1e-10);
assert_eq!(r.gaps, 0);
assert_eq!(r.aligned_length, 4);
}
#[test]
fn test_nw_affine_single_gap() {
let s = ScoringMatrix::dna_default();
let r = needleman_wunsch_affine("AGCT", "AGT", &s).expect("NW failed");
assert_eq!(r.seq1_aligned.len(), r.seq2_aligned.len());
assert!(r.score > 0 || r.aligned_length >= 3);
}
#[test]
fn test_nw_affine_alignment_lengths_equal() {
let s = ScoringMatrix::dna_default();
let r = needleman_wunsch_affine("GCATGCU", "GATTACA", &s).expect("NW failed");
assert_eq!(r.seq1_aligned.len(), r.seq2_aligned.len());
}
#[test]
fn test_nw_affine_identity_in_range() {
let s = ScoringMatrix::dna_default();
let r = needleman_wunsch_affine("ATGCATGC", "ATGCTTGC", &s).expect("NW failed");
assert!(r.identity >= 0.0 && r.identity <= 1.0);
}
#[test]
fn test_sw_affine_substring() {
let s = ScoringMatrix::dna_default();
let r = smith_waterman_affine("NNNNATGCNNNN", "ATGC", &s).expect("SW failed");
assert!((r.identity - 1.0).abs() < 1e-10);
assert_eq!(r.seq1_aligned, "ATGC");
}
#[test]
fn test_sw_affine_no_match() {
let s = ScoringMatrix {
match_score: 1,
mismatch_penalty: -100,
gap_open: -100,
gap_extend: -100,
};
let r = smith_waterman_affine("AAAA", "TTTT", &s).expect("SW failed");
assert_eq!(r.score, 0);
assert!(r.seq1_aligned.is_empty());
}
#[test]
fn test_sw_affine_identical() {
let s = ScoringMatrix::dna_default();
let r = smith_waterman_affine("ATGC", "ATGC", &s).expect("SW failed");
assert!((r.identity - 1.0).abs() < 1e-10);
}
#[test]
fn test_sw_affine_alignment_lengths_equal() {
let s = ScoringMatrix::dna_default();
let r = smith_waterman_affine("TGTTACGG", "GGTTGACTA", &s).expect("SW failed");
assert_eq!(r.seq1_aligned.len(), r.seq2_aligned.len());
}
#[test]
fn test_semi_global_query_in_target() {
let s = ScoringMatrix::dna_default();
let r = semi_global_align("ATGC", "NNATGCNN", &s).expect("semi-global failed");
assert!((r.identity - 1.0).abs() < 1e-10);
}
#[test]
fn test_semi_global_aligned_lengths_equal() {
let s = ScoringMatrix::dna_default();
let r = semi_global_align("ATGCATGC", "GCATGCATGCGC", &s).expect("semi-global failed");
assert_eq!(r.seq1_aligned.len(), r.seq2_aligned.len());
}
#[test]
fn test_semi_global_empty_error() {
let s = ScoringMatrix::dna_default();
let r = semi_global_align("", "ATGC", &s);
assert!(r.is_err());
}
#[test]
fn test_msa_two_identical_sequences() {
let seqs = vec![
("s1".to_string(), "ATGC".to_string()),
("s2".to_string(), "ATGC".to_string()),
];
let s = ScoringMatrix::dna_default();
let msa = multiple_sequence_alignment(&seqs, &s).expect("MSA failed");
assert_eq!(msa.aligned_sequences.len(), 2);
assert_eq!(msa.ids.len(), 2);
}
#[test]
fn test_msa_three_sequences() {
let seqs = vec![
("s1".to_string(), "ATGCATGC".to_string()),
("s2".to_string(), "ATGCTTGC".to_string()),
("s3".to_string(), "ATGCATGC".to_string()),
];
let s = ScoringMatrix::dna_default();
let msa = multiple_sequence_alignment(&seqs, &s).expect("MSA failed");
assert_eq!(msa.aligned_sequences.len(), 3);
let len = msa.aligned_sequences[0].len();
for seq in &msa.aligned_sequences {
assert_eq!(seq.len(), len, "aligned sequences must have equal length");
}
}
#[test]
fn test_msa_single_sequence_error() {
let seqs = vec![("s1".to_string(), "ATGC".to_string())];
let s = ScoringMatrix::dna_default();
let r = multiple_sequence_alignment(&seqs, &s);
assert!(r.is_err());
}
#[test]
fn test_msa_conservation_scores_length() {
let seqs = vec![
("s1".to_string(), "ATGC".to_string()),
("s2".to_string(), "ATGC".to_string()),
];
let s = ScoringMatrix::dna_default();
let msa = multiple_sequence_alignment(&seqs, &s).expect("should succeed");
let cons = msa.conservation_scores();
assert_eq!(cons.len(), msa.aligned_sequences[0].len());
for v in &cons {
assert!(*v >= 0.0 && *v <= 1.0);
}
}
#[test]
fn test_msa_gaps_per_column_length() {
let seqs = vec![
("s1".to_string(), "ATGCAT".to_string()),
("s2".to_string(), "ATGC".to_string()),
];
let s = ScoringMatrix::dna_default();
let msa = multiple_sequence_alignment(&seqs, &s).expect("should succeed");
let gaps = msa.gaps_per_column();
assert_eq!(gaps.len(), msa.aligned_sequences[0].len());
}
#[test]
fn test_msa_to_fasta() {
let seqs = vec![
("seq1".to_string(), "ATGC".to_string()),
("seq2".to_string(), "ATGC".to_string()),
];
let s = ScoringMatrix::dna_default();
let msa = multiple_sequence_alignment(&seqs, &s).expect("should succeed");
let fasta = msa.to_fasta();
assert!(fasta.contains(">seq1"));
assert!(fasta.contains(">seq2"));
}
}