use std::cmp::min;
use crate::util::DistanceMatrix;
#[derive(Clone, PartialEq)]
pub enum Edit<T: PartialEq> {
Delete(usize),
Insert(usize, T),
Substitute(usize, T),
}
pub fn apply_edits<T, U>(source: U, edits: &Vec<Edit<T>>) -> Vec<T>
where
T: Clone + PartialEq,
U: AsRef<[T]>,
{
let source = source.as_ref();
let mut target_constructor: Vec<Option<T>> =
source.iter().map(|item| Some(item.clone())).collect();
let mut inserts = Vec::<Edit<T>>::with_capacity(source.len());
for i in (0..edits.len()).rev() {
match &edits[i] {
Edit::Substitute(idx, val) => target_constructor[idx - 1] = Some(val.clone()),
Edit::Delete(idx) => target_constructor[idx - 1] = None,
Edit::Insert(idx, val) => inserts.push(Edit::Insert(*idx, val.clone())),
}
}
for i in &inserts {
if let Edit::Insert(idx, val) = i {
target_constructor.insert(*idx, Some(val.clone()));
}
}
let mut target = Vec::<T>::new();
for i in &target_constructor {
match i {
Some(val) => target.push(val.clone()),
None => (),
}
}
target
}
pub fn generate_edits<T, U>(
source: U,
target: U,
distances: &DistanceMatrix,
) -> Result<Vec<Edit<T>>, &'static str>
where
T: Clone + PartialEq,
U: AsRef<[T]>,
{
let source = source.as_ref();
let target = target.as_ref();
let mut source_idx = source.len();
let mut target_idx = target.len();
assert_eq!(source_idx + 1, distances.len());
assert_eq!(target_idx + 1, distances[0].len());
let mut edits = Vec::<Edit<T>>::new();
while source_idx != 0 || target_idx != 0 {
let current_item = distances[source_idx][target_idx];
let substitute = if source_idx > 0 && target_idx > 0 {
Some(distances[source_idx - 1][target_idx - 1])
} else {
None
};
let delete = if source_idx > 0 {
Some(distances[source_idx - 1][target_idx])
} else {
None
};
let insert = if target_idx > 0 {
Some(distances[source_idx][target_idx - 1])
} else {
None
};
let min = min(min(insert, delete), substitute);
if min == Some(current_item) {
source_idx = source_idx - 1;
target_idx = target_idx - 1;
} else if min == Some(current_item - 1) {
if min == insert {
edits.push(Edit::Insert(source_idx, target[target_idx - 1].clone()));
target_idx = target_idx - 1;
} else if min == delete {
edits.push(Edit::Delete(source_idx));
source_idx = source_idx - 1;
} else if min == substitute {
edits.push(Edit::Substitute(source_idx, target[target_idx - 1].clone()));
source_idx = source_idx - 1;
target_idx = target_idx - 1;
} else {
return Err("Invalid distance matrix");
};
} else {
return Err("Invalid distance matrix");
};
}
Ok(edits)
}
#[cfg(test)]
mod tests {
use crate::edit::*;
fn do_vecs_match<T: PartialEq>(a: &Vec<T>, b: &Vec<T>) -> bool {
let matching = a.iter().zip(b.iter()).filter(|&(a, b)| a == b).count();
matching == a.len() && matching == b.len()
}
#[test]
fn edit_list_is_correct() {
let s1 = "SATURDAY";
let s2 = "SUNDAY";
let distances = vec![
vec![0, 1, 2, 3, 4, 5, 6],
vec![1, 0, 1, 2, 3, 4, 5],
vec![2, 1, 1, 2, 3, 3, 4],
vec![3, 2, 2, 2, 3, 4, 4],
vec![4, 3, 2, 3, 3, 4, 5],
vec![5, 4, 3, 3, 4, 4, 5],
vec![6, 5, 4, 4, 3, 4, 5],
vec![7, 6, 5, 5, 4, 3, 4],
vec![8, 7, 6, 6, 5, 4, 3],
];
let expected_edits = vec![
Edit::<u8>::Substitute(5, 78),
Edit::<u8>::Delete(3),
Edit::<u8>::Delete(2),
];
let edits = generate_edits(&s1.as_bytes(), &s2.as_bytes(), &distances).unwrap();
assert_eq!(do_vecs_match(&edits, &expected_edits), true);
}
#[test]
fn edits_are_applied_correctly() {
let s1 = "SATURDAY";
let expected_s2 = "SUNDAY";
let mut edits = vec![
Edit::<u8>::Substitute(5, 78),
Edit::<u8>::Delete(3),
Edit::<u8>::Delete(2),
];
let s2_bytes_vec = apply_edits(s1, &mut edits);
let s2 = match std::str::from_utf8(&s2_bytes_vec) {
Ok(v) => v,
Err(_) => panic!("Not a valid UTF-8 sequence!"),
};
assert_eq!(s2, expected_s2);
}
}