use std::collections::HashSet;
pub fn char_indices_from_score_matrix(score_matrix: &[&[u16]]) -> Vec<usize> {
if score_matrix.is_empty() {
return vec![];
}
let mut max_score_position = (0, 0);
let mut max_score = 0;
for (col, col_scores) in score_matrix.iter().enumerate() {
for (row, score) in col_scores.iter().enumerate() {
if *score > max_score {
max_score = *score;
max_score_position = (col, row);
}
}
}
let mut indices = HashSet::new();
let (mut col_idx, mut row_idx) = max_score_position;
let mut score = score_matrix[col_idx][row_idx];
while score > 0 {
let diag = if col_idx == 0 || row_idx == 0 {
0
} else {
score_matrix[col_idx - 1][row_idx - 1]
};
let left = if col_idx == 0 {
0
} else {
score_matrix[col_idx - 1][row_idx]
};
let up = if row_idx == 0 {
0
} else {
score_matrix[col_idx][row_idx - 1]
};
if diag >= left && diag >= up {
if diag < score {
indices.insert(row_idx);
}
row_idx = row_idx.saturating_sub(1);
col_idx = col_idx.saturating_sub(1);
score = diag;
}
else if up >= left {
if up > score && up > 0 {
indices.remove(&(row_idx));
indices.insert(row_idx.saturating_sub(1));
}
row_idx = row_idx.saturating_sub(1);
score = up;
}
else {
col_idx = col_idx.saturating_sub(1);
score = left;
}
}
let mut indices = indices.iter().copied().collect::<Vec<_>>();
indices.sort();
indices
}
#[cfg(test)]
mod tests {
use super::super::smith_waterman;
use super::char_indices_from_score_matrix;
fn run_single_indices(needle: &str, haystack: &str) -> Vec<usize> {
let (_, score_matrix) = smith_waterman(needle, haystack);
let score_matrix_ref = score_matrix
.iter()
.map(|v| v.as_slice())
.collect::<Vec<_>>();
char_indices_from_score_matrix(&score_matrix_ref)
}
#[test]
fn test_basic_indices() {
assert_eq!(run_single_indices("", "abc"), vec![]);
assert_eq!(run_single_indices("b", "abc"), vec![1]);
assert_eq!(run_single_indices("c", "abc"), vec![2]);
}
#[test]
fn test_prefix_indices() {
assert_eq!(run_single_indices("a", "abc"), vec![0]);
assert_eq!(run_single_indices("a", "aabc"), vec![0]);
assert_eq!(run_single_indices("a", "babc"), vec![1]);
}
#[test]
fn test_exact_match_indices() {
assert_eq!(run_single_indices("a", "a"), vec![0]);
assert_eq!(run_single_indices("abc", "abc"), vec![0, 1, 2]);
assert_eq!(run_single_indices("ab", "abc"), vec![0, 1]);
}
#[test]
fn test_delimiter_indices() {
assert_eq!(run_single_indices("b", "a-b"), vec![2]);
assert_eq!(run_single_indices("a", "a-b-c"), vec![0]);
assert_eq!(run_single_indices("b", "a--b"), vec![3]);
assert_eq!(run_single_indices("c", "a--bc"), vec![4]);
}
#[test]
fn test_affine_gap_indices() {
assert_eq!(run_single_indices("test", "Uterst"), vec![1, 2, 4, 5]);
assert_eq!(run_single_indices("test", "Uterrst"), vec![1, 2, 5, 6]);
assert_eq!(run_single_indices("test", "Uterrs t"), vec![1, 2, 5, 7]);
}
#[test]
fn test_capital_indices() {
assert_eq!(run_single_indices("a", "A"), vec![0]);
assert_eq!(run_single_indices("A", "Aa"), vec![0]);
assert_eq!(run_single_indices("D", "forDist"), vec![3]);
}
#[test]
fn test_typo_indices() {
assert_eq!(run_single_indices("b", "a"), vec![]);
assert_eq!(run_single_indices("reba", "repack"), vec![0, 1, 3]);
assert_eq!(run_single_indices("bbb", "abc"), vec![1]);
}
}