use crate::collections::circularbuffer::CircularBuffer;
use colored::*;
use std::ops::Index;
pub fn find_close_words(dictionary: Vec<String>, word: &str) -> Vec<WordDiff> {
let size_hint = dictionary.len();
let mut comparisons = Vec::with_capacity(size_hint);
for valid_word in dictionary {
let comparison = levenshtein_distance(word, valid_word.as_str()); comparisons.push(comparison);
}
comparisons.sort_by(|a, b| a.distance.cmp(&b.distance));
comparisons
}
fn levenshtein_distance(a: &str, b: &str) -> WordDiff {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
let m = b.len();
let mut c = CircularBuffer::new(m + 2);
let idx_modify = m + 1;
let idx_subtract = m;
let idx_add = 0;
c.push(WordDiff {
distance: 0,
ops: Vec::with_capacity(std::cmp::max(a.len(), b.len())),
});
for b_j in &b {
let mut cmp = c.clone_to_front(idx_add);
cmp.ops.push(DiffOp::Add(*b_j));
cmp.distance += 1;
}
for a_i in &a {
let mut cmp = c.clone_to_front(idx_subtract);
cmp.ops.push(DiffOp::Subtract(*a_i));
cmp.distance += 1;
for b_j in &b {
let (idx_to_clone, diff, distance_delta) = if a_i == b_j {
(idx_modify, DiffOp::Keep(*a_i), 0)
} else {
let cost_modify = c.index(idx_modify).distance;
let cost_add = c.index(idx_add).distance;
let cost_subtract = c.index(idx_subtract).distance;
if cost_modify <= std::cmp::min(cost_subtract, cost_add) {
(idx_modify, DiffOp::Modify(*a_i, *b_j), 1)
} else if cost_subtract <= std::cmp::min(cost_modify, cost_add) {
(idx_subtract, DiffOp::Subtract(*a_i), 1)
} else {
(idx_add, DiffOp::Add(*b_j), 1)
}
};
let mut cmp = c.clone_to_front(idx_to_clone);
cmp.ops.push(diff);
cmp.distance += distance_delta;
}
}
c.index(0).clone()
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum DiffOp {
Keep(char),
Add(char),
Subtract(char),
Modify(char, char),
}
impl DiffOp {
#[cfg(test)]
fn invert(&self) -> DiffOp {
match self {
DiffOp::Keep(c) => DiffOp::Keep(*c),
DiffOp::Add(c) => DiffOp::Subtract(*c),
DiffOp::Subtract(c) => DiffOp::Add(*c),
DiffOp::Modify(a, b) => DiffOp::Modify(*b, *a),
}
}
#[cfg(test)]
fn distance(&self) -> usize {
match self {
DiffOp::Keep(_) => 0,
DiffOp::Add(_) => 1,
DiffOp::Subtract(_) => 1,
DiffOp::Modify(_, _) => 1,
}
}
fn left(&self) -> Option<char> {
match self {
DiffOp::Keep(c) => Some(*c),
DiffOp::Add(_) => None,
DiffOp::Subtract(c) => Some(*c),
DiffOp::Modify(c, _) => Some(*c),
}
}
fn right(&self) -> Option<char> {
match self {
DiffOp::Keep(c) => Some(*c),
DiffOp::Add(c) => Some(*c),
DiffOp::Subtract(_) => None,
DiffOp::Modify(_, c) => Some(*c),
}
}
fn colored(&self) -> colored::ColoredString {
match self {
DiffOp::Keep(c) => c.to_string().normal(),
DiffOp::Add(c) => c.to_string().on_green(), DiffOp::Subtract(c) => c.to_string().on_red(),
DiffOp::Modify(_, c) => c.to_string().on_yellow(), }
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct WordDiff {
distance: usize,
ops: Vec<DiffOp>,
}
impl WordDiff {
pub fn left(&self) -> String {
self.ops.iter().filter_map(DiffOp::left).collect()
}
pub fn right(&self) -> String {
self.ops.iter().filter_map(DiffOp::right).collect()
}
pub fn colored(&self) -> String {
let mut s = String::default();
for diff in self.ops.iter() {
s = format!["{}{}", s, diff.colored()];
}
s
}
}
impl Clone for WordDiff {
fn clone(&self) -> Self {
let mut cmp = WordDiff {
distance: self.distance,
ops: Vec::with_capacity(self.ops.capacity()),
};
cmp.ops.clone_from(&self.ops);
cmp
}
fn clone_from(&mut self, source: &Self) {
self.distance = source.distance;
self.ops.clear();
for diff in source.ops.iter() {
self.ops.push(*diff);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! levenshtein_tests {
($( ($name: ident, $a: expr, $b: expr, $i: expr),)+) => {
$(
#[test]
fn $name() {
let mut cmp = WordDiff {
distance: 0,
ops: $i,
};
for diff in cmp.ops.iter() {
cmp.distance += diff.distance();
}
assert_eq![levenshtein_distance($a, $b), cmp];
let mut inverse_diffs = Vec::new();
for diff in cmp.ops.iter() {
inverse_diffs.push(diff.invert());
}
let cmp = WordDiff {
distance: cmp.distance,
ops: inverse_diffs,
};
assert_eq![levenshtein_distance($b, $a), cmp];
}
)+
};
}
levenshtein_tests![
(case_1, "", "", Vec::<DiffOp>::new()),
(case_2, "a", "", vec![DiffOp::Subtract('a')]),
(case_3, "a", "a", vec![DiffOp::Keep('a')]),
(case_4, "a", "b", vec![DiffOp::Modify('a', 'b')]),
(
case_5,
"aa",
"a",
vec![DiffOp::Subtract('a'), DiffOp::Keep('a')]
),
(
case_6,
"aa",
"ab",
vec![DiffOp::Keep('a'), DiffOp::Modify('a', 'b')]
),
(
case_7,
"abb",
"acbb",
vec![
DiffOp::Keep('a'),
DiffOp::Add('c'),
DiffOp::Keep('b'),
DiffOp::Keep('b'),
]
),
(
case_8,
"aabb",
"abb",
vec![
DiffOp::Subtract('a'),
DiffOp::Keep('a'),
DiffOp::Keep('b'),
DiffOp::Keep('b'),
]
),
(
case_9,
"james",
"laura",
vec![
DiffOp::Modify('j', 'l'),
DiffOp::Keep('a'),
DiffOp::Modify('m', 'u'),
DiffOp::Modify('e', 'r'),
DiffOp::Modify('s', 'a'),
]
),
(
case_10,
"ab12345e",
"a12345de",
vec![
DiffOp::Keep('a'),
DiffOp::Subtract('b'),
DiffOp::Keep('1'),
DiffOp::Keep('2'),
DiffOp::Keep('3'),
DiffOp::Keep('4'),
DiffOp::Keep('5'),
DiffOp::Add('d'),
DiffOp::Keep('e'),
]
),
];
#[test]
fn find_close_words_test() {
let dictionary = vec!["james".to_string(), "laura".to_string(), "mint".to_string()];
let word = "janes";
let result = find_close_words(dictionary, &word);
assert_eq![result[0].right(), "james"];
assert_eq![result[1].right(), "laura"];
assert_eq![result[2].right(), "mint"];
assert_eq![result.len(), 3];
}
}