1#[cfg(test)]
13#[macro_use]
14extern crate proptest;
15
16use std::collections::hash_map::Entry;
17use std::collections::HashMap;
18use std::hash::{Hash, Hasher};
19
20mod lis;
21
22#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
23pub enum LineDiff {
24 New(usize),
27 Delete(usize),
29 Keep(usize, usize),
32}
33
34#[derive(Eq, PartialOrd)]
38struct WithIndex<T> {
39 idx: usize,
40 elem: T,
41}
42
43impl<T: PartialEq> PartialEq for WithIndex<T> {
44 fn eq(&self, other: &WithIndex<T>) -> bool {
45 self.elem.eq(&other.elem)
46 }
47}
48
49impl<T: Hash> Hash for WithIndex<T> {
50 fn hash<H: Hasher>(&self, state: &mut H) {
51 self.elem.hash(state);
52 }
53}
54
55fn line_counts<T: Hash + Eq>(lines: &[T]) -> HashMap<WithIndex<&T>, usize> {
58 let mut line_counts = HashMap::new();
59
60 for (line_idx, line) in lines.iter().enumerate() {
61 let elem = WithIndex {
62 elem: line,
63 idx: line_idx,
64 };
65 *line_counts.entry(elem).or_insert(0) += 1;
66 }
67 line_counts
68}
69
70fn prefix_len<T: Eq>(a: &[T], b: &[T]) -> usize {
73 a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
74}
75
76fn suffix_len<T: Eq>(a: &[T], b: &[T]) -> usize {
79 a.iter()
80 .rev()
81 .zip(b.iter().rev())
82 .take_while(|(x, y)| x == y)
83 .count()
84}
85
86fn match_ends<'a, T: Eq>(a: &'a [T], b: &'a [T]) -> (usize, &'a [T], &'a [T], usize) {
91 let pref_len = prefix_len(a, b);
92 let suff_len = suffix_len(&a[pref_len..], &b[pref_len..]);
93 let a_mid = &a[pref_len..(a.len() - suff_len)];
94 let b_mid = &b[pref_len..(b.len() - suff_len)];
95 (pref_len, a_mid, b_mid, suff_len)
96}
97
98fn diff_ends<T: Eq>(a: &[T], a_offset: usize, b: &[T], b_offset: usize, diff: &mut Vec<LineDiff>) {
105 let (pref_len, a_mid, b_mid, suff_len) = match_ends(a, b);
106 for i in 0..pref_len {
107 diff.push(LineDiff::Keep(a_offset + i, b_offset + i));
108 }
109 for i in 0..a_mid.len() {
110 diff.push(LineDiff::Delete(a_offset + pref_len + i));
111 }
112 for i in 0..b_mid.len() {
113 diff.push(LineDiff::New(b_offset + pref_len + i));
114 }
115 for i in 0..suff_len {
116 diff.push(LineDiff::Keep(
117 a_offset + pref_len + a_mid.len() + i,
118 b_offset + pref_len + b_mid.len() + i,
119 ));
120 }
121}
122
123pub fn diff<T: Hash + Eq>(a: &[T], b: &[T]) -> Vec<LineDiff> {
124 let (pref_len, a_mid, b_mid, suff_len) = match_ends(a, b);
125 let a_line_counts = line_counts(a_mid);
126 let mut b_line_counts = line_counts(b_mid);
127 let a_unique = a_line_counts
128 .into_iter()
129 .filter(|(_, count)| *count == 1)
130 .map(|(line, _)| line);
131
132 let mut both_unique = a_unique
138 .filter_map(|a_line| {
139 let a_idx = a_line.idx;
142 if let Entry::Occupied(entry) = b_line_counts.entry(a_line) {
143 if entry.get() == &1 {
144 return Some((entry.key().idx, a_idx));
145 }
146 }
147 None
148 })
149 .collect::<Vec<(usize, usize)>>();
150 both_unique.sort_unstable_by_key(|(_b_idx, a_idx)| *a_idx);
151
152 let mut ret = Vec::with_capacity(a.len().max(b.len()));
153 for i in 0..pref_len {
154 ret.push(LineDiff::Keep(i, i));
155 }
156
157 let lis = lis::longest_increasing_subsequence(&both_unique);
158 let mut prev_b_idx = 0;
159 let mut prev_a_idx = 0;
160 for i in lis {
161 let (next_b_idx, next_a_idx) = both_unique[i];
162 let a_chunk = &a_mid[prev_a_idx..next_a_idx];
163 let b_chunk = &b_mid[prev_b_idx..next_b_idx];
164 diff_ends(
165 a_chunk,
166 pref_len + prev_a_idx,
167 b_chunk,
168 pref_len + prev_b_idx,
169 &mut ret,
170 );
171 prev_b_idx = next_b_idx;
172 prev_a_idx = next_a_idx;
173 }
174
175 let a_chunk = &a_mid[prev_a_idx..];
176 let b_chunk = &b_mid[prev_b_idx..];
177 diff_ends(
178 a_chunk,
179 pref_len + prev_a_idx,
180 b_chunk,
181 pref_len + prev_b_idx,
182 &mut ret,
183 );
184
185 for i in 0..suff_len {
186 ret.push(LineDiff::Keep(
187 a.len() - suff_len + i,
188 b.len() - suff_len + i,
189 ));
190 }
191
192 ret
193}
194
195#[cfg(test)]
196mod tests {
197 use proptest::prelude::*;
198 use std::fmt::Debug;
199
200 use super::LineDiff::*;
201 use super::*;
202
203 macro_rules! test_diff_ends {
204 ($name:ident, $a:expr, $b:expr, $expected:expr) => {
205 #[test]
206 fn $name() {
207 let a: &[_] = &$a[..];
208 let b: &[_] = &$b[..];
209 let expected: &[_] = &$expected[..];
210 let mut diff = Vec::new();
211 diff_ends(a, 0, b, 0, &mut diff);
212 assert_eq!(diff.as_slice(), expected);
213 }
214 };
215 }
216
217 test_diff_ends!(
218 diff_ends_all,
219 [1, 2, 3],
220 [1, 2, 3],
221 [Keep(0, 0), Keep(1, 1), Keep(2, 2),]
222 );
223 test_diff_ends!(
224 diff_ends_shorter_first,
225 [1, 1],
226 [1, 1, 1],
227 [Keep(0, 0), Keep(1, 1), New(2),]
228 );
229 test_diff_ends!(
230 diff_ends_longer_first,
231 [1, 1, 1],
232 [1, 1],
233 [Keep(0, 0), Keep(1, 1), Delete(2),]
234 );
235
236 fn assert_valid<T: Debug + Eq>(a: &[T], b: &[T], diff: &[LineDiff]) {
241 let input_indices = diff
242 .iter()
243 .filter_map(|line| match *line {
244 New(_) => None,
245 Keep(i, _) => Some(i),
246 Delete(i) => Some(i),
247 })
248 .collect::<Vec<_>>();
249 assert_eq!(input_indices, (0..a.len()).into_iter().collect::<Vec<_>>());
250
251 let output_indices = diff
252 .iter()
253 .filter_map(|line| match *line {
254 New(i) => Some(i),
255 Keep(_, i) => Some(i),
256 Delete(_) => None,
257 })
258 .collect::<Vec<_>>();
259 assert_eq!(output_indices, (0..b.len()).into_iter().collect::<Vec<_>>());
260
261 for line in diff {
262 if let &Keep(i, j) = line {
263 assert_eq!(a[i], b[j]);
264 }
265 }
266 }
267
268 fn file() -> BoxedStrategy<Vec<i32>> {
272 prop::collection::vec(
273 prop::strategy::Union::new_weighted(vec![(10, 0..10), (1, 0..1000)]),
274 1..100,
275 )
276 .boxed()
277 }
278
279 fn two_files() -> BoxedStrategy<(Vec<i32>, Vec<i32>)> {
282 file()
283 .prop_perturb(|f, mut rng| {
284 let mut g = f.clone();
285 for _ in 0..rng.gen_range(0, 20) {
287 let g_len = g.len();
288 match rng.choose(&[1, 2, 3]).unwrap() {
289 1 => {
290 if !g.is_empty() {
292 g.remove(rng.gen_range(0, g_len));
293 }
294 }
295 2 => {
296 g.insert(rng.gen_range(0, g_len + 1), rng.gen_range(0, 10));
298 }
299 3 => {
300 if !g.is_empty() {
302 g.swap(rng.gen_range(0, g_len), rng.gen_range(0, g_len));
303 }
304 }
305 _ => unreachable!(),
306 }
307 }
308 (f, g)
309 })
310 .boxed()
311 }
312
313 proptest! {
314 #[test]
315 fn test_valid_diff((f, g) in two_files()) {
316 let d = diff(&f, &g);
317 assert_valid(&f, &g, &d);
318 }
319 }
320}