1use std::collections::HashMap;
2use std::iter::zip;
3use std::ops::Range;
4
5#[derive(Clone, Copy, Debug, Eq, PartialEq)]
6pub enum Side {
7 Lhs,
8 Rhs,
9}
10
11#[derive(Clone, Debug, Eq, PartialEq)]
12pub enum DiffItem {
13 Match { lhs: Range<usize>, rhs: Range<usize> },
14 Mutation { lhs: Range<usize>, rhs: Range<usize> },
15}
16
17use DiffItem::*;
18
19impl DiffItem {
20 pub fn lhs(&self) -> Range<usize> {
21 match self {
22 Match { lhs, .. } => lhs.clone(),
23 Mutation { lhs, .. } => lhs.clone(),
24 }
25 }
26
27 pub fn rhs(&self) -> Range<usize> {
28 match self {
29 Match { rhs, .. } => rhs.clone(),
30 Mutation { rhs, .. } => rhs.clone(),
31 }
32 }
33
34 pub fn side(&self, side: Side) -> Range<usize> {
35 match side {
36 Side::Lhs => self.lhs(),
37 Side::Rhs => self.rhs(),
38 }
39 }
40}
41
42#[derive(Clone, Debug, Eq, PartialEq)]
43pub struct Hunk {
44 pub diffs: Vec<DiffItem>,
45}
46
47impl Hunk {
48 pub fn build(context: usize, diffs: &[DiffItem]) -> Vec<Hunk> {
49 let mut res = vec![Hunk { diffs: Vec::new() }];
50
51 for d in diffs {
52 res.last_mut().unwrap().diffs.push(d.clone());
53
54 if matches!(d, Match { lhs, .. } if lhs.len() > 2 * context) {
55 res.push(Hunk { diffs: vec![d.clone()] });
56 }
57 }
58
59 res
60 .into_iter()
61 .filter_map(|mut hunk| {
62 if hunk.diffs.len() <= 1 && matches!(hunk.diffs[0], Match { .. }) {
63 return None;
64 }
65
66 if context == 0 {
67 hunk.diffs.retain(|d| matches!(d, Mutation { .. }));
68 return Some(hunk);
69 }
70
71 if let Some(Match { lhs, rhs }) = hunk.diffs.first_mut() {
72 if lhs.len() > context {
73 lhs.start = lhs.end - context;
74 rhs.start = rhs.end - context;
75 }
76 }
77 if let Some(Match { lhs, rhs }) = hunk.diffs.last_mut() {
78 if lhs.len() > context {
79 lhs.end = lhs.start + context;
80 rhs.end = rhs.start + context;
81 }
82 }
83 Some(hunk)
84 })
85 .collect()
86 }
87
88 pub fn side(&self, side: Side) -> Range<usize> {
89 Range {
90 start: self.diffs.first().map_or(0, |d| d.side(side).start),
91 end: self.diffs.last().map_or(0, |d| d.side(side).end),
92 }
93 }
94
95 pub fn lhs(&self) -> Range<usize> {
96 self.side(Side::Lhs)
97 }
98
99 pub fn rhs(&self) -> Range<usize> {
100 self.side(Side::Rhs)
101 }
102}
103
104#[derive(Clone, Debug, Default, Eq, PartialEq)]
105struct Diffs {
106 vec: Vec<DiffItem>,
107}
108
109impl Diffs {
110 fn add_match(&mut self, len: usize) {
111 if len == 0 {
112 return;
113 }
114 if let Some(Match { lhs, rhs }) = self.vec.last_mut() {
115 lhs.end += len;
116 rhs.end += len;
117 } else {
118 self.vec.push(Match {
119 lhs: Range {
120 start: self.lhs_pos(),
121 end: self.lhs_pos() + len,
122 },
123 rhs: Range {
124 start: self.rhs_pos(),
125 end: self.rhs_pos() + len,
126 },
127 });
128 }
129 }
130
131 fn add_mutation(&mut self, lhs: usize, rhs: usize) {
132 if lhs == 0 && rhs == 0 {
133 return;
134 }
135 if let Some(Mutation { lhs: l, rhs: r }) = self.vec.last_mut() {
136 l.end += lhs;
137 r.end += rhs;
138 } else {
139 self.vec.push(Mutation {
140 lhs: Range {
141 start: self.lhs_pos(),
142 end: self.lhs_pos() + lhs,
143 },
144 rhs: Range {
145 start: self.rhs_pos(),
146 end: self.rhs_pos() + rhs,
147 },
148 });
149 }
150 }
151
152 fn lhs_pos(&self) -> usize {
153 self.vec.last().map_or(0, |d| d.lhs().end)
154 }
155
156 fn rhs_pos(&self) -> usize {
157 self.vec.last().map_or(0, |d| d.rhs().end)
158 }
159}
160
161pub fn diff(lhs: &[&str], rhs: &[&str]) -> Vec<DiffItem> {
162 let mut d = Diffs::default();
163 accumulate_partitions(&mut d, lhs, rhs);
164 d.vec
165}
166
167pub fn tokenize_lines<'a>(lines: &[&'a str]) -> Vec<&'a str> {
168 let re = regex::Regex::new(r"\w+|\s+").unwrap();
169 let mut v = Vec::new();
170 for &line in lines {
171 let mut last_pos = 0;
172 for m in re.find_iter(line) {
173 if m.start() > last_pos {
174 v.push(&line[last_pos..m.start()]);
175 }
176 last_pos = m.end();
177 v.push(m.as_str());
178 }
179 if last_pos < line.len() {
180 v.push(&line[last_pos..]);
181 }
182 v.push("\n");
183 }
184 v.pop();
185 v
186}
187
188fn accumulate_diffs(diffs: &mut Diffs, lhs: &[&str], rhs: &[&str]) {
198 let leading = leading_match_len(lhs, rhs);
199 diffs.add_match(leading);
200 if leading == lhs.len() && leading == rhs.len() {
201 return;
202 }
203
204 let trailing =
205 trailing_match_len(&lhs[leading..lhs.len()], &rhs[leading..rhs.len()]);
206 accumulate_partitions(
207 diffs,
208 &lhs[leading..lhs.len() - trailing],
209 &rhs[leading..rhs.len() - trailing],
210 );
211 diffs.add_match(trailing);
212}
213
214fn leading_match_len(lhs: &[&str], rhs: &[&str]) -> usize {
215 zip(lhs, rhs).take_while(|(&l, &r)| l == r).count()
216}
217
218fn trailing_match_len(lhs: &[&str], rhs: &[&str]) -> usize {
219 zip(lhs.iter().rev(), rhs.iter().rev())
220 .take_while(|(&l, &r)| l == r)
221 .count()
222}
223
224fn accumulate_partitions(diffs: &mut Diffs, lhs: &[&str], rhs: &[&str]) {
225 let matched = match_lines(lhs, rhs);
226 if matched.is_empty() {
227 diffs.add_mutation(lhs.len(), rhs.len());
228 return;
229 }
230 let matched = longest_common_subseq(&matched);
231
232 let mut lhs_pos: usize = 0;
233 let mut rhs_pos: usize = 0;
234 for (lhs_next, rhs_next) in matched {
235 accumulate_diffs(diffs, &lhs[lhs_pos..lhs_next], &rhs[rhs_pos..rhs_next]);
236 diffs.add_match(1);
237 lhs_pos = lhs_next + 1;
238 rhs_pos = rhs_next + 1;
239 }
240 accumulate_diffs(diffs, &lhs[lhs_pos..lhs.len()], &rhs[rhs_pos..rhs.len()]);
241}
242
243fn match_lines(lhs: &[&str], rhs: &[&str]) -> Vec<(usize, usize)> {
244 let mut m: HashMap<&str, (Vec<usize>, Vec<usize>)> = HashMap::new();
245 for (i, l) in lhs.iter().enumerate() {
246 m.entry(l).or_default().0.push(i);
247 }
248 for (i, r) in rhs.iter().enumerate() {
249 m.entry(r).or_default().1.push(i);
250 }
251
252 let mut min = usize::MAX;
253 m.retain(|_, (l, r)| {
254 if l.len() == r.len() {
255 min = min.min(l.len());
256 true
257 } else {
258 false
259 }
260 });
261
262 let mut v: Vec<(usize, usize)> = m
263 .into_values()
264 .filter(|(l, _)| l.len() == min)
265 .flat_map(|(l, r)| zip(l, r))
266 .collect();
267 v.sort();
268 v
269}
270
271fn longest_common_subseq(pairings: &[(usize, usize)]) -> Vec<(usize, usize)> {
272 type PairingStack = Vec<Vec<((usize, usize), usize)>>;
273 fn find_push_pos(stacks: &PairingStack, p: &(usize, usize)) -> usize {
274 for (pos, stack) in stacks.iter().enumerate() {
275 if p.1 < stack.last().unwrap().0 .1 {
276 return pos;
277 }
278 }
279 stacks.len()
280 }
281
282 let mut stacks = PairingStack::new();
283 for p in pairings {
284 let push_pos = find_push_pos(&stacks, p);
285 if push_pos == stacks.len() {
286 stacks.push(vec![]);
287 }
288 let prev = if push_pos == 0 { 0 } else { stacks[push_pos - 1].len() - 1 };
289 stacks[push_pos].push((*p, prev));
290 }
291
292 let mut r = vec![];
293 let mut prev = stacks.last().unwrap().len() - 1;
294 for stack in stacks.iter().rev() {
295 r.push(stack[prev].0);
296 prev = stack[prev].1;
297 }
298 r.reverse();
299 r
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 fn diff_lines(lhs: &str, rhs: &str) -> Vec<DiffItem> {
307 let lhs_lines: Vec<_> = lhs.lines().collect();
308 let rhs_lines: Vec<_> = rhs.lines().collect();
309 diff(&lhs_lines, &rhs_lines)
310 }
311
312 #[test]
313 fn diff_empty() {
314 assert_eq!(diff(&[], &[]), &[]);
315 }
316
317 #[test]
318 fn diff_eq() {
319 assert_eq!(
320 diff(&["a", "b", "c"], &["a", "b", "c"]),
321 &[Match {
322 lhs: Range { start: 0, end: 3 },
323 rhs: Range { start: 0, end: 3 },
324 }]
325 );
326 }
327
328 #[test]
329 fn diff_ne() {
330 assert_eq!(
331 diff(&["a", "b", "c"], &["a", "c"]),
332 &[
333 Match {
334 lhs: Range { start: 0, end: 1 },
335 rhs: Range { start: 0, end: 1 },
336 },
337 Mutation {
338 lhs: Range { start: 1, end: 2 },
339 rhs: Range { start: 1, end: 1 },
340 },
341 Match {
342 lhs: Range { start: 2, end: 3 },
343 rhs: Range { start: 1, end: 2 },
344 },
345 ]
346 );
347 assert_eq!(
348 diff(&["z", "a", "b", "c"], &["a", "c"]),
349 &[
350 Mutation {
351 lhs: Range { start: 0, end: 1 },
352 rhs: Range { start: 0, end: 0 },
353 },
354 Match {
355 lhs: Range { start: 1, end: 2 },
356 rhs: Range { start: 0, end: 1 },
357 },
358 Mutation {
359 lhs: Range { start: 2, end: 3 },
360 rhs: Range { start: 1, end: 1 },
361 },
362 Match {
363 lhs: Range { start: 3, end: 4 },
364 rhs: Range { start: 1, end: 2 },
365 },
366 ]
367 );
368 assert_eq!(
369 diff(&["z", "a", "e", "b", "c"], &["a", "e", "c"]),
370 &[
371 Mutation {
372 lhs: Range { start: 0, end: 1 },
373 rhs: Range { start: 0, end: 0 },
374 },
375 Match {
376 lhs: Range { start: 1, end: 3 },
377 rhs: Range { start: 0, end: 2 },
378 },
379 Mutation {
380 lhs: Range { start: 3, end: 4 },
381 rhs: Range { start: 2, end: 2 },
382 },
383 Match {
384 lhs: Range { start: 4, end: 5 },
385 rhs: Range { start: 2, end: 3 },
386 },
387 ]
388 );
389 }
390
391 #[test]
392 fn diff_only_non_unique() {
393 assert_eq!(
394 diff(&["a", "b", "b", "c"], &["b", "b"]),
395 &[
396 Mutation {
397 lhs: Range { start: 0, end: 1 },
398 rhs: Range { start: 0, end: 0 },
399 },
400 Match {
401 lhs: Range { start: 1, end: 3 },
402 rhs: Range { start: 0, end: 2 },
403 },
404 Mutation {
405 lhs: Range { start: 3, end: 4 },
406 rhs: Range { start: 2, end: 2 },
407 },
408 ]
409 );
410 }
411
412 #[test]
413 fn match_lines_arity1() {
414 assert_eq!(
415 match_lines(&["a", "b", "c", "d", "e", "d"], &["a", "c", "d", "e"]),
416 vec![(0, 0), (2, 1), (4, 3)],
417 );
418 }
419
420 #[test]
421 fn match_lines_arity2() {
422 assert_eq!(
423 match_lines(&["a", "b", "b", "c"], &["b", "b"]),
424 vec![(1, 0), (2, 1)],
425 );
426 }
427
428 #[test]
429 fn longest_common_subseq_basic() {
430 assert_eq!(
432 longest_common_subseq(&[
433 (0, 9),
434 (1, 4),
435 (2, 6),
436 (3, 12),
437 (4, 8),
438 (5, 7),
439 (6, 1),
440 (7, 5),
441 (8, 10),
442 (9, 11),
443 (10, 3),
444 (11, 2),
445 (12, 13),
446 ]),
447 &[(1, 4), (2, 6), (5, 7), (8, 10), (9, 11), (12, 13),]
448 );
449 }
450
451 #[test]
452 fn lead_trail_overlap() {
453 assert_eq!(
454 diff(&["a", "b", "d", "b", "c"], &["a", "b", "c"]),
455 &[
456 Match {
457 lhs: Range { start: 0, end: 2 },
458 rhs: Range { start: 0, end: 2 },
459 },
460 Mutation {
461 lhs: Range { start: 2, end: 4 },
462 rhs: Range { start: 2, end: 2 },
463 },
464 Match {
465 lhs: Range { start: 4, end: 5 },
466 rhs: Range { start: 2, end: 3 },
467 },
468 ]
469 );
470 }
471
472 #[test]
473 fn lead_move_txt() {
474 assert_eq!(
475 diff_lines(
476 include_str!("testdata/old/move.txt"),
477 include_str!("testdata/new/move.txt"),
478 ),
479 &[
480 Mutation {
481 lhs: Range { start: 0, end: 8 },
482 rhs: Range { start: 0, end: 0 },
483 },
484 Match {
485 lhs: Range { start: 8, end: 16 },
486 rhs: Range { start: 0, end: 8 },
487 },
488 Mutation {
489 lhs: Range { start: 16, end: 16 },
490 rhs: Range { start: 8, end: 16 },
491 },
492 ]
493 );
494 }
495
496 fn hunk_positions(hunks: &[Hunk]) -> Vec<((usize, usize), (usize, usize))> {
497 hunks
498 .iter()
499 .map(|h| {
500 let (l, r) = (h.lhs(), h.rhs());
501 ((l.start + 1, l.len()), (r.start + 1, r.len()))
502 })
503 .collect::<Vec<_>>()
504 }
505
506 #[test]
507 fn build_hunks() {
508 let diff = diff_lines(
509 include_str!("testdata/old/move.txt"),
510 include_str!("testdata/new/move.txt"),
511 );
512 assert_eq!(
513 hunk_positions(&Hunk::build(3, &diff)),
514 &[((1, 11), (1, 3)), ((14, 3), (6, 11))]
515 );
516 assert_eq!(
517 hunk_positions(&Hunk::build(0, &diff)),
518 &[((1, 8), (1, 0)), ((17, 0), (9, 8))]
519 );
520 }
521
522 #[test]
523 fn tokenize() {
524 assert_eq!(
525 tokenize_lines(&["void func1() {", " x += 1"]),
526 &[
527 "void", " ", "func1", "()", " ", "{", "\n", " ", "x", " ", "+=", " ",
528 "1"
529 ],
530 );
531 }
532}