Skip to main content

heckel_diff/
lib.rs

1//! Calculates the difference between two lists using Paul Heckel's algorithm from 1978.
2//! (<https://dl.acm.org/doi/10.1145/359460.359467>)
3
4#[cfg(test)]
5mod test;
6
7use std::collections::HashMap;
8use std::fmt::Debug;
9use std::hash::Hash;
10
11#[derive(Debug, Eq, PartialEq)]
12pub enum Change<T> {
13    /// Delete value with the old position.
14    Delete(T, usize),
15    /// Insert value with the new position.
16    Insert(T, usize),
17    /// Unchanged value with the old and new positions.
18    Unchanged(T, usize, usize),
19}
20
21#[derive(Debug)]
22struct Line<'a, T>
23where
24    T: Eq + Hash,
25{
26    symbol_key: Option<&'a T>,
27    other_position: Option<usize>,
28}
29
30#[derive(Copy, Clone, Debug)]
31struct Symbol {
32    new_copies: usize,
33    old_copies: usize,
34    old_position: usize,
35}
36
37impl<'a, T> Line<'a, T>
38where
39    T: Eq + Hash,
40{
41    fn from_key(key: &'a T) -> Self {
42        Self {
43            symbol_key: Some(key),
44            other_position: None,
45        }
46    }
47
48    fn from_position(position: usize) -> Self {
49        Self {
50            symbol_key: None,
51            other_position: Some(position),
52        }
53    }
54}
55
56impl Symbol {
57    fn new() -> Self {
58        Self {
59            new_copies: 0,
60            old_copies: 0,
61            old_position: 0,
62        }
63    }
64}
65
66fn connect_neighbours_ascending<T>(oa: &mut [Line<T>], na: &mut [Line<T>])
67where
68    T: Eq + Hash,
69{
70    for i in 0..na.len() - 1 {
71        if let Some(p) = na[i].other_position
72            && p < oa.len() - 1
73            && same_symbol(&na[i + 1], &oa[p + 1])
74        {
75            oa[p + 1] = Line::from_position(i + 1);
76            na[i + 1] = Line::from_position(p + 1);
77        }
78    }
79}
80
81fn connect_neighbours_descending<T>(oa: &mut [Line<T>], na: &mut [Line<T>])
82where
83    T: Eq + Hash,
84{
85    for i in (1..na.len()).rev() {
86        if let Some(p) = na[i].other_position
87            && p > 0
88            && same_symbol(&na[i - 1], &oa[p - 1])
89        {
90            oa[p - 1] = Line::from_position(i - 1);
91            na[i - 1] = Line::from_position(p - 1);
92        }
93    }
94}
95
96/// The difference between the two slices is calculated. The changes are in the order that
97/// constructs the new version from the old version.
98/// ```rust
99/// # use heckel_diff::{diff, new_version, old_version};
100/// # use heckel_diff::Change::{Delete, Insert, Unchanged};
101/// # fn main() {
102/// let old = vec!["MUCH", "WRITING", "IS", "LIKE", "SNOW", ",",
103///               "A", "MASS", "OF", "LONG", "WORDS", "AND",
104///               "PHRASES", "FALLS", "UPON", "THE", "RELEVANT",
105///               "FACTS", "COVERING", "UP", "THE", "DETAILS", "."];
106///let new = vec!["A", "MASS", "OF", "LATIN", "WORDS", "FALLS",
107///               "UPON", "THE", "RELEVANT", "FACTS", "LIKE", "SOFT",
108///               "SNOW", ",", "COVERING", "UP", "THE", "DETAILS", "."];
109///let answer = vec![Delete(&"MUCH", 0), Delete(&"WRITING", 1), Delete(&"IS", 2),
110///                  Unchanged(&"A",6, 0), Unchanged(&"MASS", 7, 1), Unchanged(&"OF", 8, 2),
111///                  Insert(&"LATIN", 3), Delete(&"LONG", 9), Unchanged(&"WORDS", 10, 4),
112///                  Delete(&"AND", 11), Delete(&"PHRASES", 12),Unchanged(&"FALLS", 13, 5),
113///                  Unchanged(&"UPON", 14, 6), Unchanged(&"THE", 15, 7),
114///                  Unchanged(&"RELEVANT", 16, 8), Unchanged(&"FACTS", 17, 9),
115///                  Unchanged(&"LIKE", 3, 10), Insert(&"SOFT", 11), Unchanged(&"SNOW", 4, 12),
116///                  Unchanged(&",", 5, 13), Unchanged(&"COVERING", 18, 14),
117///                  Unchanged(&"UP", 19, 15), Unchanged(&"THE", 20, 16),
118///                  Unchanged(&"DETAILS", 21, 17), Unchanged(&".", 22, 18)];
119///let calculated = diff(&old, &new);
120///let new_version: Vec<&str> =
121///    new_version(calculated.as_slice()).into_iter().copied().collect();
122///let old_version: Vec<&str> =
123///    old_version(calculated.as_slice()).into_iter().copied().collect();
124///
125///assert_eq!(answer, calculated);
126///assert_eq!(new, new_version);
127///assert_eq!(old, old_version);
128/// # }
129/// ```
130pub fn diff<'a, T>(old: &'a [T], new: &'a [T]) -> Vec<Change<&'a T>>
131where
132    T: Eq + Hash,
133{
134    let mut na: Vec<Line<T>> = new.iter().map(|v| Line::from_key(v)).collect();
135    let mut oa: Vec<Line<T>> = old.iter().map(|v| Line::from_key(v)).collect();
136    let mut symbol_table: HashMap<&T, Symbol> = HashMap::new();
137
138    new.iter()
139        .for_each(|v| symbol_table.entry(v).or_insert(Symbol::new()).new_copies += 1);
140    old.iter().enumerate().for_each(|(i, v)| {
141        let e = symbol_table.entry(v).or_insert(Symbol::new());
142        e.old_copies += 1;
143        e.old_position = i;
144    });
145
146    connect_ends(&mut oa, &mut na);
147    make_connections(&mut oa, &mut na, &symbol_table);
148    connect_neighbours_ascending(&mut oa, &mut na);
149    connect_neighbours_descending(&mut oa, &mut na);
150    encode_changes(old, new, &oa, &na)
151}
152
153fn connect_ends<T>(oa: &mut Vec<Line<T>>, na: &mut Vec<Line<T>>)
154where
155    T: Eq + Hash,
156{
157    na.insert(0, Line::from_position(0));
158    oa.insert(0, Line::from_position(0));
159    na.push(Line::from_position(oa.len()));
160    oa.push(Line::from_position(na.len() - 1));
161}
162
163fn encode_changes<'a, T>(
164    old: &'a [T],
165    new: &'a [T],
166    oa: &[Line<T>],
167    na: &[Line<T>],
168) -> Vec<Change<&'a T>>
169where
170    T: Eq + Hash,
171{
172    let mut old_position = 1;
173    let mut result = vec![];
174
175    for i in 1..na.len() - 1 {
176        if na[i].other_position.is_none() {
177            result.push(Change::Insert(&new[i - 1], i - 1))
178        } else {
179            let other_position = na[i].other_position.unwrap();
180
181            for j in old_position..other_position {
182                if oa[j].other_position.is_none() {
183                    result.push(Change::Delete(&old[j - 1], j - 1));
184                }
185
186                old_position += 1;
187            }
188
189            result.push(Change::Unchanged(&new[i - 1], other_position - 1, i - 1));
190        }
191    }
192
193    for i in old_position..old.len() {
194        if oa[i].other_position.is_none() {
195            result.push(Change::Delete(&old[i - 1], i - 1));
196        }
197    }
198
199    result
200}
201
202fn make_connections<T>(oa: &mut [Line<T>], na: &mut [Line<T>], symbol_table: &HashMap<&T, Symbol>)
203where
204    T: Eq + Hash,
205{
206    for i in 1..na.len() - 1 {
207        if let Some(s) = na[i]
208            .symbol_key
209            .and_then(|k| symbol_table.get(k))
210            .filter(|s| s.old_copies == 1 && s.new_copies == 1)
211        {
212            na[i] = Line::from_position(s.old_position + 1);
213            oa[s.old_position + 1] = Line::from_position(i);
214        }
215    }
216}
217
218fn new_position<T>(change: &Change<&T>) -> usize {
219    match change {
220        Change::Delete(_, _) => usize::MAX,
221        Change::Insert(_, p) => *p,
222        Change::Unchanged(_, _, p) => *p,
223    }
224}
225
226/// Produces the new version of the list from the calculated changes.
227pub fn new_version<'a, T>(changes: &'a [Change<&T>]) -> Vec<&'a T> {
228    let mut filtered: Vec<&Change<&T>> = changes
229        .iter()
230        .flat_map(|c| match c {
231            Change::Delete(_, _) => None,
232            Change::Insert(_, _) => Some(c),
233            Change::Unchanged(_, _, _) => Some(c),
234        })
235        .collect();
236
237    filtered.sort_by_key(|c| new_position(c));
238
239    changes
240        .iter()
241        .flat_map(|c| match c {
242            Change::Delete(_, _) => None,
243            Change::Insert(v, _) => Some(*v),
244            Change::Unchanged(v, _, _) => Some(*v),
245        })
246        .collect()
247}
248
249fn old_position<T>(change: &Change<&T>) -> usize {
250    match change {
251        Change::Delete(_, p) => *p,
252        Change::Insert(_, _) => usize::MAX,
253        Change::Unchanged(_, p, _) => *p,
254    }
255}
256
257/// Produces the old version of the list from the calculated changes.
258pub fn old_version<'a, T>(changes: &'a [Change<&T>]) -> Vec<&'a T> {
259    let mut filtered: Vec<&Change<&T>> = changes
260        .iter()
261        .flat_map(|c| match c {
262            Change::Delete(_, _) => Some(c),
263            Change::Insert(_, _) => None,
264            Change::Unchanged(_, _, _) => Some(c),
265        })
266        .collect();
267
268    filtered.sort_by_key(|c| old_position(c));
269
270    filtered
271        .iter()
272        .flat_map(|c| match c {
273            Change::Delete(v, _) => Some(*v),
274            Change::Insert(_, _) => None,
275            Change::Unchanged(v, _, _) => Some(*v),
276        })
277        .collect()
278}
279
280fn same_symbol<T>(line1: &Line<T>, line2: &Line<T>) -> bool
281where
282    T: Eq + Hash,
283{
284    line1.symbol_key.is_some()
285        && line2.symbol_key.is_some()
286        && line1.symbol_key.unwrap() == line2.symbol_key.unwrap()
287}