dam_lev/
lib.rs

1use std::collections::HashMap;
2use std::default::Default;
3use std::rc::Rc;
4
5/// Represents a way of mutating a sequence.
6#[derive(Clone, Copy, PartialEq, Eq, Debug)]
7pub enum Mutation {
8    /// Transpose the item at the specified index with its successor.
9    Transposition(usize),
10    /// Delete the item at the specified index.
11    Deletion(usize),
12    /// Insert at the first index the item from the other sequence at the second index.
13    Insertion(usize, usize),
14    /// Substitute for the item at the first index the item from the other sequence at the second index.
15    Substitution(usize, usize),
16}
17
18/// Determine a minimal sequence of mutations that will transform one sequence into another.
19///
20/// # Arguments
21///
22/// * `seq1` - Original sequence.
23/// * `seq2` - Mutated sequence.
24///
25/// # Returns
26///
27/// Returns the sequence of mutations.
28///
29/// # Examples
30///
31/// ```
32/// use dam_lev::Mutation::*;
33///
34/// let seq1 = ['a', 'b', 'c', 'd', 'e', 'f'];
35/// let seq2 = ['b', 'c', 'e', 'd', 'x', 'y'];
36/// let diffs = dam_lev::diff(&seq1, &seq2);
37/// assert_eq!(diffs, vec![Deletion(0), Transposition(3), Substitution(5, 4), Insertion(6, 5)]);
38/// ```
39pub fn diff<T: Clone + PartialEq>(seq1: &[T], seq2: &[T]) -> Vec<Mutation> {
40    diff_with_compare(seq1, seq2, PartialEq::eq)
41}
42
43/// Does the same as `diff` but the elements of the sequences are compared using a user-provided function.
44///
45/// # Arguments
46///
47/// * `seq1` - Original sequence.
48/// * `seq2` - Mutated sequence.
49/// * `compare` - Function used to compare the elements.
50///
51/// # Returns
52///
53/// Returns the sequence of mutations.
54///
55/// # Examples
56///
57/// ```
58/// let seq1 = [1, 2, 3];
59/// let seq2 = [91, 92, 93];
60/// let compare = |num1: &u32, num2: &u32| num1 % 3 == num2 % 3;
61/// let diffs = dam_lev::diff_with_compare(&seq1, &seq2, compare);
62/// assert_eq!(diffs.len(), 0);
63/// ```
64pub fn diff_with_compare<T: Clone, F>(seq1: &[T], seq2: &[T], compare: F) -> Vec<Mutation>
65where
66    F: FnMut(&T, &T) -> bool,
67{
68    Differ::new(seq1, seq2, compare).diff()
69}
70
71struct Differ<'a, T: Clone> {
72    seq1: &'a [T],
73    seq2: &'a [T],
74    #[allow(clippy::type_complexity)]
75    compare: Box<dyn FnMut(&T, &T) -> bool + 'a>,
76    record: HashMap<(usize, usize), Rc<Vec<Mutation>>>,
77}
78
79impl<'a, T: Clone> Differ<'a, T> {
80    fn new<F>(seq1: &'a [T], seq2: &'a [T], compare: F) -> Self
81    where
82        F: FnMut(&T, &T) -> bool + 'a,
83    {
84        Self {
85            seq1,
86            seq2,
87            compare: Box::new(compare),
88            record: Default::default(),
89        }
90    }
91
92    fn diff(mut self) -> Vec<Mutation> {
93        let result = self.lookup(self.seq1.len(), self.seq2.len());
94        drop(self);
95        Rc::try_unwrap(result).unwrap()
96    }
97
98    fn lookup(&mut self, idx1: usize, idx2: usize) -> Rc<Vec<Mutation>> {
99        if let Some(result) = self.record.get(&(idx1, idx2)) {
100            return result.clone();
101        }
102
103        let result = self.chain(idx1, idx2);
104        self.record.insert((idx1, idx2), result.clone());
105        result
106    }
107
108    fn chain(&mut self, idx1: usize, idx2: usize) -> Rc<Vec<Mutation>> {
109        if idx1 == 0 && idx2 == 0 {
110            return Default::default();
111        }
112
113        let mut scorer: Scorer = Default::default();
114
115        let spots_differ: bool;
116        if idx1 > 0 && idx2 > 0 {
117            spots_differ = !(self.compare)(&self.seq1[idx1 - 1], &self.seq2[idx2 - 1]);
118
119            if spots_differ
120                && idx1 >= 2
121                && idx2 >= 2
122                && (self.compare)(&self.seq1[idx1 - 1], &self.seq2[idx2 - 2])
123                && (self.compare)(&self.seq1[idx1 - 2], &self.seq2[idx2 - 1])
124            {
125                self.update_scorer(&mut scorer, idx1 - 2, idx2 - 2, Mutation::Transposition(idx1 - 2));
126            }
127        } else {
128            spots_differ = true;
129        }
130
131        if spots_differ {
132            if idx1 > 0 {
133                self.update_scorer(&mut scorer, idx1 - 1, idx2, Mutation::Deletion(idx1 - 1));
134            }
135
136            if idx2 > 0 {
137                self.update_scorer(&mut scorer, idx1, idx2 - 1, Mutation::Insertion(idx1, idx2 - 1));
138            }
139        }
140
141        if idx1 > 0 && idx2 > 0 {
142            let candidate = self.lookup(idx1 - 1, idx2 - 1);
143            if spots_differ {
144                if scorer.prefers(candidate.len() + 1) {
145                    let mut candidate = (*candidate).clone();
146                    candidate.push(Mutation::Substitution(idx1 - 1, idx2 - 1));
147                    scorer.update(candidate.into());
148                }
149            } else if scorer.prefers(candidate.len()) {
150                scorer.update(candidate);
151            }
152        }
153
154        scorer.champion()
155    }
156
157    fn update_scorer(&mut self, scorer: &mut Scorer, idx1: usize, idx2: usize, mutation: Mutation) {
158        let candidate = self.lookup(idx1, idx2);
159        if scorer.prefers(candidate.len() + 1) {
160            let mut candidate = (*candidate).clone();
161            candidate.push(mutation);
162            scorer.update(candidate.into());
163        }
164    }
165}
166
167#[derive(Default)]
168struct Scorer(Option<Rc<Vec<Mutation>>>);
169
170impl Scorer {
171    fn prefers(&self, length: usize) -> bool {
172        match self.0 {
173            Some(ref current) => length < current.len(),
174            None => true,
175        }
176    }
177
178    fn update(&mut self, candidate: Rc<Vec<Mutation>>) {
179        self.0 = Some(candidate);
180    }
181
182    fn champion(self) -> Rc<Vec<Mutation>> {
183        self.0.unwrap_or_default()
184    }
185}