use std::collections::{HashSet, VecDeque};
use crate::op::{Backtrack, BestCost, IndexedOperation, Operation};
use crate::{Measure, SeqPair};
pub trait Align<'a, M, T>
where
M: Measure<T>,
T: Eq,
{
fn align(&'a self, source: &'a [T], target: &'a [T]) -> Alignment<'a, M, T>;
}
impl<'a, M, T> Align<'a, M, T> for M
where
M: Measure<T>,
T: Eq,
{
fn align(&'a self, source: &'a [T], target: &'a [T]) -> Alignment<'a, M, T> {
let pair = SeqPair { source, target };
let source_len = pair.source.len() + 1;
let target_len = pair.target.len() + 1;
let mut cost_matrix = vec![vec![0; target_len]; source_len];
for target_idx in 1..target_len {
cost_matrix[0][target_idx] = self
.best_cost(&pair, &cost_matrix, 0, target_idx)
.expect("No applicable operation");
}
for source_idx in 1..source_len {
for target_idx in 0..target_len {
cost_matrix[source_idx][target_idx] = self
.best_cost(&pair, &cost_matrix, source_idx, target_idx)
.expect("No applicable operation");
}
}
Alignment {
measure: self,
pair,
cost_matrix,
}
}
}
pub struct Alignment<'a, M, T>
where
M: Measure<T>,
T: Eq,
{
measure: &'a M,
pair: SeqPair<'a, T>,
cost_matrix: Vec<Vec<usize>>,
}
impl<'a, M, T> Alignment<'a, M, T>
where
M: Measure<T>,
T: Eq,
{
pub fn distance(&self) -> usize {
self.cost_matrix[self.cost_matrix.len() - 1][self.cost_matrix[0].len() - 1]
}
pub fn edit_script(&self) -> Vec<IndexedOperation<M::Operation>> {
let mut source_idx = self.pair.source.len();
let mut target_idx = self.pair.target.len();
let mut script = Vec::new();
while let Some(op) =
self.measure
.backtrack(&self.pair, &self.cost_matrix, source_idx, target_idx)
{
let (new_source_idx, new_target_idx) = op
.backtrack(&self.pair, source_idx, target_idx)
.expect("Cannot backtrack");
source_idx = new_source_idx;
target_idx = new_target_idx;
script.push(IndexedOperation::new(op, source_idx, target_idx));
if source_idx == 0 && target_idx == 0 {
break;
}
}
assert_eq!(source_idx, 0, "Cannot backtrack to cell 0, 0");
assert_eq!(target_idx, 0, "Cannot backtrack to cell 0, 0");
script.reverse();
script
}
pub fn edit_scripts(&self) -> HashSet<Vec<IndexedOperation<M::Operation>>> {
let mut q: VecDeque<BacktrackState<M, T>> = VecDeque::new();
q.push_back(BacktrackState {
source_idx: self.pair.source.len(),
target_idx: self.pair.target.len(),
script: Vec::new(),
});
let mut scripts = HashSet::new();
while let Some(BacktrackState {
source_idx,
target_idx,
script,
}) = q.pop_front()
{
for op in self
.measure
.backtracks(&self.pair, &self.cost_matrix, source_idx, target_idx)
{
let (new_source_idx, new_target_idx) = op
.backtrack(&self.pair, source_idx, target_idx)
.expect("Cannot backtrack");
let mut new_script = script.clone();
new_script.push(IndexedOperation::new(op, new_source_idx, new_target_idx));
if new_source_idx == 0 && new_target_idx == 0 {
new_script.reverse();
scripts.insert(new_script);
} else {
q.push_back(BacktrackState {
source_idx: new_source_idx,
target_idx: new_target_idx,
script: new_script,
})
}
}
}
assert!(!scripts.is_empty(), "Cannot backtrack to cell 0, 0");
scripts
}
pub fn cost_matrix(&self) -> &Vec<Vec<usize>> {
&self.cost_matrix
}
pub fn seq_pair(&self) -> &SeqPair<T> {
&self.pair
}
}
struct BacktrackState<M, T>
where
M: Measure<T>,
{
source_idx: usize,
target_idx: usize,
script: Vec<IndexedOperation<M::Operation>>,
}
#[cfg(test)]
mod tests {
use crate::measures::Levenshtein;
use crate::measures::LevenshteinOp::*;
use crate::op::IndexedOperation;
use super::Align;
#[test]
fn distance_test() {
let applet: Vec<char> = "applet".chars().collect();
let pineapple: Vec<char> = "pineapple".chars().collect();
let pen: Vec<char> = "pen".chars().collect();
let levenshtein = Levenshtein::new(1, 1, 1);
assert_eq!(levenshtein.align(&pineapple, &pen).distance(), 7);
assert_eq!(levenshtein.align(&pen, &pineapple).distance(), 7);
assert_eq!(levenshtein.align(&pineapple, &applet).distance(), 5);
assert_eq!(levenshtein.align(&applet, &pen).distance(), 4);
}
#[test]
fn edit_script_test() {
let applet: Vec<char> = "applet".chars().collect();
let pineapple: Vec<char> = "pineapple".chars().collect();
let pen: Vec<char> = "pen".chars().collect();
let levenshtein = Levenshtein::new(1, 1, 1);
assert_eq!(
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Substitute(1), 1, 1),
IndexedOperation::new(Match, 2, 2),
IndexedOperation::new(Delete(1), 3, 3),
IndexedOperation::new(Delete(1), 4, 3),
IndexedOperation::new(Delete(1), 5, 3),
IndexedOperation::new(Delete(1), 6, 3),
IndexedOperation::new(Delete(1), 7, 3),
IndexedOperation::new(Delete(1), 8, 3),
],
levenshtein.align(&pineapple, &pen).edit_script()
);
assert_eq!(
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Substitute(1), 1, 1),
IndexedOperation::new(Match, 2, 2),
IndexedOperation::new(Insert(1), 3, 3),
IndexedOperation::new(Insert(1), 3, 4),
IndexedOperation::new(Insert(1), 3, 5),
IndexedOperation::new(Insert(1), 3, 6),
IndexedOperation::new(Insert(1), 3, 7),
IndexedOperation::new(Insert(1), 3, 8),
],
levenshtein.align(&pen, &pineapple).edit_script()
);
assert_eq!(
vec![
IndexedOperation::new(Delete(1), 0, 0),
IndexedOperation::new(Delete(1), 1, 0),
IndexedOperation::new(Delete(1), 2, 0),
IndexedOperation::new(Delete(1), 3, 0),
IndexedOperation::new(Match, 4, 0),
IndexedOperation::new(Match, 5, 1),
IndexedOperation::new(Match, 6, 2),
IndexedOperation::new(Match, 7, 3),
IndexedOperation::new(Match, 8, 4),
IndexedOperation::new(Insert(1), 9, 5),
],
levenshtein.align(&pineapple, &applet).edit_script()
);
}
#[test]
fn edit_script_tests() {
let applet: Vec<char> = "applet".chars().collect();
let pineapple: Vec<char> = "pineapple".chars().collect();
let aplpet: Vec<char> = "aplpet".chars().collect();
let levenshtein = Levenshtein::new(1, 1, 1);
assert_eq!(
hashset![vec![
IndexedOperation::new(Delete(1), 0, 0),
IndexedOperation::new(Delete(1), 1, 0),
IndexedOperation::new(Delete(1), 2, 0),
IndexedOperation::new(Delete(1), 3, 0),
IndexedOperation::new(Match, 4, 0),
IndexedOperation::new(Match, 5, 1),
IndexedOperation::new(Match, 6, 2),
IndexedOperation::new(Match, 7, 3),
IndexedOperation::new(Match, 8, 4),
IndexedOperation::new(Insert(1), 9, 5),
],],
levenshtein.align(&pineapple, &applet).edit_scripts()
);
assert_eq!(
hashset![
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Match, 1, 1),
IndexedOperation::new(Substitute(1), 2, 2),
IndexedOperation::new(Substitute(1), 3, 3),
IndexedOperation::new(Match, 4, 4),
IndexedOperation::new(Match, 5, 5),
],
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Match, 1, 1),
IndexedOperation::new(Delete(1), 2, 2),
IndexedOperation::new(Match, 3, 2),
IndexedOperation::new(Insert(1), 4, 3),
IndexedOperation::new(Match, 4, 4),
IndexedOperation::new(Match, 5, 5),
],
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Delete(1), 1, 1),
IndexedOperation::new(Match, 2, 1),
IndexedOperation::new(Match, 3, 2),
IndexedOperation::new(Insert(1), 4, 3),
IndexedOperation::new(Match, 4, 4),
IndexedOperation::new(Match, 5, 5),
],
vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Match, 1, 1),
IndexedOperation::new(Insert(1), 2, 2),
IndexedOperation::new(Match, 2, 3),
IndexedOperation::new(Delete(1), 3, 4),
IndexedOperation::new(Match, 4, 4),
IndexedOperation::new(Match, 5, 5),
],
],
levenshtein.align(&applet, &aplpet).edit_scripts()
);
assert_eq!(
hashset![vec![
IndexedOperation::new(Match, 0, 0),
IndexedOperation::new(Match, 1, 1),
IndexedOperation::new(Match, 2, 2),
IndexedOperation::new(Match, 3, 3),
IndexedOperation::new(Match, 4, 4),
IndexedOperation::new(Match, 5, 5),
]],
levenshtein.align(&applet, &applet).edit_scripts()
);
}
#[test]
fn align_empty_test() {
let empty: &[char] = &[];
let non_empty: Vec<char> = "hello".chars().collect();
let levenshtein = Levenshtein::new(1, 1, 1);
assert_eq!(levenshtein.align(empty, empty).distance(), 0);
assert_eq!(levenshtein.align(non_empty.as_slice(), empty).distance(), 5);
assert_eq!(levenshtein.align(empty, non_empty.as_slice()).distance(), 5);
}
}