1use crate::input_processing::{EditType, Entry};
5use crate::neg_idx_vec::NegIdxVec;
6use anyhow::Result;
7use logging_timer::time;
8use serde::Serialize;
9use std::fmt::Debug;
10use std::iter::FromIterator;
11use std::ops::Range;
12use thiserror::Error;
13
14fn common_prefix_len<T: PartialEq>(
16 a: &[T],
17 a_range: Range<usize>,
18 b: &[T],
19 b_range: Range<usize>,
20) -> usize {
21 let mut l = 0;
22
23 unsafe {
24 while a_range.start + l < a_range.end
25 && b_range.start + l < b_range.end
26 && a.get_unchecked(a_range.start + l) == b.get_unchecked(b_range.start + l)
27 {
28 l += 1;
29 }
30 }
31 l
32}
33
34#[derive(Debug, PartialEq, Eq, Clone, Copy)]
38struct Coordinates<T>
39where
40 T: Debug + PartialEq + Eq + Clone + Copy,
41{
42 pub old: T,
44
45 pub new: T,
47}
48
49fn convert_range<FromType, IntoType>(range: Range<FromType>) -> Range<IntoType>
54where
55 FromType: TryInto<IntoType>,
56 <FromType as TryInto<IntoType>>::Error: std::fmt::Debug,
57{
58 range.start.try_into().unwrap()..range.end.try_into().unwrap()
59}
60
61fn common_suffix_len<T: PartialEq>(
64 a: &[T],
65 a_range: Range<usize>,
66 b: &[T],
67 b_range: Range<usize>,
68) -> usize {
69 let mut l: isize = 1;
70 let a_range: Range<isize> = convert_range(a_range);
71 let b_range: Range<isize> = convert_range(b_range);
72 unsafe {
73 while a_range.end - l >= a_range.start
74 && b_range.end - l >= b_range.start
75 && a.get_unchecked::<usize>((a_range.end - l).try_into().unwrap())
76 == b.get_unchecked::<usize>((b_range.end - l).try_into().unwrap())
77 {
78 l += 1;
79 }
80 }
81 (l - 1).try_into().unwrap()
82}
83
84#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
86pub struct Line<'a> {
87 pub line_index: usize,
89 pub entries: Vec<&'a Entry<'a>>,
91}
92
93impl<'a> Line<'a> {
94 #[must_use]
95 pub fn new(line_index: usize) -> Self {
96 Line {
97 line_index,
98 entries: Vec::new(),
99 }
100 }
101}
102
103#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
107pub struct Hunk<'a>(pub Vec<Line<'a>>);
108
109#[derive(Debug, Error)]
111pub enum HunkInsertionError {
112 #[error(
113 "Non-adjacent entry (line {incoming_line:?}) added to hunk (last line: {last_line:?})"
114 )]
115 NonAdjacentHunk {
116 incoming_line: usize,
117 last_line: usize,
118 },
119
120 #[error("Attempted to append an entry with a line index ({incoming_line:?}) less than the first line's index ({last_line:?})")]
121 PriorLine {
122 incoming_line: usize,
123 last_line: usize,
124 },
125
126 #[error("Attempted to append an entry with a column ({incoming_col:?}, line: {incoming_line:?}) less than the first entry's column ({last_col:?}, line: {last_line:?})")]
127 PriorColumn {
128 incoming_col: usize,
129 incoming_line: usize,
130 last_col: usize,
131 last_line: usize,
132 },
133}
134
135impl<'a> Hunk<'a> {
136 #[must_use]
138 pub fn new() -> Self {
139 Hunk(Vec::new())
140 }
141
142 #[must_use]
146 pub fn first_line(&self) -> Option<usize> {
147 self.0.first().map(|x| x.line_index)
148 }
149
150 #[must_use]
154 pub fn last_line(&self) -> Option<usize> {
155 self.0.last().map(|x| x.line_index)
156 }
157
158 pub fn can_push_back(&self, entry: &Entry<'a>) -> Result<(), HunkInsertionError> {
167 let incoming_line_idx = entry.start_position().row;
168
169 if let Some(last_line) = self.0.last() {
172 let last_line_idx = last_line.line_index;
173
174 if incoming_line_idx < last_line_idx {
175 return Err(HunkInsertionError::PriorLine {
176 incoming_line: incoming_line_idx,
177 last_line: last_line_idx,
178 });
179 } else if incoming_line_idx - last_line_idx > 1 {
180 return Err(HunkInsertionError::NonAdjacentHunk {
181 incoming_line: incoming_line_idx,
182 last_line: last_line_idx,
183 });
184 }
185 if incoming_line_idx == last_line_idx && !last_line.entries.is_empty() {
187 let last_entry = last_line.entries.last().unwrap();
188 let last_col = last_entry.end_position().column;
189 let last_line = last_entry.end_position().row;
190 let incoming_col = entry.start_position().column;
191 let incoming_line = entry.end_position().row;
192 if incoming_col < last_col {
193 return Err(HunkInsertionError::PriorColumn {
194 incoming_col,
195 last_col,
196 incoming_line,
197 last_line,
198 });
199 }
200 }
201 }
202 Ok(())
203 }
204
205 pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<(), HunkInsertionError> {
211 self.can_push_back(entry)?;
212 let incoming_line_idx = entry.start_position().row;
213
214 if self.0.is_empty() || incoming_line_idx - self.0.last().unwrap().line_index == 1 {
217 self.0.push(Line::new(incoming_line_idx));
218 }
219 let last_line = self.0.last_mut().unwrap();
222 last_line.entries.push(entry);
223 Ok(())
224 }
225}
226
227impl<'a> Default for Hunk<'a> {
228 fn default() -> Self {
229 Self::new()
230 }
231}
232
233#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
238pub enum DocumentType<T: Debug + PartialEq + Serialize + Clone> {
239 Old(T),
240 New(T),
241}
242
243impl<T> AsRef<T> for DocumentType<T>
244where
245 T: Debug + Clone + PartialEq + Serialize,
246{
247 fn as_ref(&self) -> &T {
248 match self {
249 Self::Old(x) | Self::New(x) => x,
250 }
251 }
252}
253
254impl<T> AsMut<T> for DocumentType<T>
255where
256 T: Debug + Clone + PartialEq + Serialize,
257{
258 fn as_mut(&mut self) -> &mut T {
259 match self {
260 Self::Old(x) | Self::New(x) => x,
261 }
262 }
263}
264
265impl<T: Debug + Clone + PartialEq + Serialize> DocumentType<T> {
266 fn consume(self) -> T {
268 match self {
269 Self::Old(x) | Self::New(x) => x,
270 }
271 }
272}
273
274pub type RichHunk<'a> = DocumentType<Hunk<'a>>;
276
277#[derive(Debug, Clone, PartialEq, Eq)]
281pub struct Hunks<'a>(pub Vec<Hunk<'a>>);
282
283#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
284pub struct RichHunks<'a>(pub Vec<RichHunk<'a>>);
285
286pub struct RichHunksBuilder<'a> {
290 hunks: RichHunks<'a>,
292
293 last_old: Option<usize>,
295
296 last_new: Option<usize>,
298}
299
300impl<'a> RichHunksBuilder<'a> {
301 #[must_use]
302 pub fn new() -> Self {
303 Self {
304 hunks: RichHunks(Vec::new()),
305 last_old: None,
306 last_new: None,
307 }
308 }
309
310 #[must_use]
314 pub fn build(self) -> RichHunks<'a> {
315 self.hunks
316 }
317
318 fn get_hunk_for_insertion(
322 &mut self,
323 incoming_entry: &DocumentType<&'a Entry<'a>>,
324 ) -> Result<usize, HunkInsertionError> {
325 let (mut last_idx, new_hunk) = match incoming_entry {
326 DocumentType::Old(_) => (self.last_old, DocumentType::Old(Hunk::new())),
327 DocumentType::New(_) => (self.last_new, DocumentType::New(Hunk::new())),
328 };
329
330 match last_idx {
331 None => {
334 self.hunks.0.push(new_hunk);
335 last_idx = Some(self.hunks.0.len() - 1);
336 }
337 Some(idx) => {
340 let last_hunk = self.hunks.0[idx].as_ref();
341 let last_line = last_hunk.last_line();
342
343 if let Some(last_line) = last_line {
347 let incoming_line = incoming_entry.as_ref().end_position.row;
348
349 if incoming_line < last_line {
350 return Err(HunkInsertionError::PriorLine {
351 incoming_line,
352 last_line,
353 });
354 }
355
356 if incoming_line - last_line > 1 {
360 self.hunks.0.push(new_hunk);
361 last_idx = Some(self.hunks.0.len() - 1);
362 }
363
364 }
366 }
367 };
368
369 match incoming_entry {
370 DocumentType::Old(_) => self.last_old = last_idx,
371 DocumentType::New(_) => self.last_new = last_idx,
372 }
373 Ok(last_idx.unwrap())
374 }
375
376 pub fn push_back(&mut self, entry_wrapper: DocumentType<&'a Entry<'a>>) -> Result<()> {
378 let insertion_idx = self.get_hunk_for_insertion(&entry_wrapper)?;
379 self.hunks.0[insertion_idx]
380 .as_mut()
381 .push_back(entry_wrapper.consume())?;
382 Ok(())
383 }
384}
385
386impl<'a> Default for RichHunksBuilder<'a> {
387 fn default() -> Self {
388 Self::new()
389 }
390}
391
392impl<'a> Hunks<'a> {
393 #[must_use]
394 pub fn new() -> Self {
395 Hunks(Vec::new())
396 }
397
398 pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<()> {
399 if let Some(hunk) = self.0.last_mut() {
400 match hunk.can_push_back(entry) {
401 Ok(()) => hunk.push_back(entry),
402 Err(HunkInsertionError::NonAdjacentHunk {
405 incoming_line: _,
406 last_line: _,
407 }) => {
408 let mut hunk = Hunk::new();
409 hunk.push_back(entry)?;
410 self.0.push(hunk);
411 Ok(())
412 }
413 Err(e) => Err(e),
414 }?;
415 } else {
416 self.0.push(Hunk::new());
417 self.0.last_mut().unwrap().push_back(entry)?;
418 }
419 Ok(())
420 }
421}
422
423impl<'a> Default for Hunks<'a> {
424 fn default() -> Self {
425 Self::new()
426 }
427}
428
429pub struct HunkAppender<'a>(pub Hunks<'a>);
430
431impl<'a> FromIterator<&'a Entry<'a>> for HunkAppender<'a> {
432 fn from_iter<T>(iter: T) -> Self
437 where
438 T: IntoIterator<Item = &'a Entry<'a>>,
439 {
440 let mut hunks = Hunks::new();
441
442 for i in iter {
443 hunks.push_back(i).expect("Invalid iterator");
444 }
445 HunkAppender(hunks)
446 }
447}
448
449pub trait Engine<'elem, T>
454where
455 T: Eq + 'elem,
456{
457 type Container;
459
460 fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container;
462}
463
464#[derive(Eq, PartialEq, Copy, Clone, Debug, Default)]
465pub struct Myers {}
466
467impl<'elem, T> Engine<'elem, T> for Myers
468where
469 T: Eq + 'elem + std::fmt::Debug,
470{
471 type Container = Vec<EditType<&'elem T>>;
472
473 fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container {
474 let mut res = Vec::with_capacity(a.len() + b.len());
476 let mut frontiers = MyersFrontiers::new(a.len(), b.len());
477 Myers::diff_impl(&mut res, a, 0..a.len(), b, 0..b.len(), &mut frontiers);
478 res
479 }
480}
481
482#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
484pub struct MidSnakeInfo {
485 pub a_split: i32,
487
488 pub b_split: i32,
490
491 pub optimal_len: u32,
493}
494
495fn split_range(r: &Range<usize>, idx: usize) -> (Range<usize>, Range<usize>) {
497 (r.start..idx, idx..r.end)
498}
499
500pub struct MyersFrontiers {
505 pub forward: NegIdxVec<i32>,
507
508 pub reverse: NegIdxVec<i32>,
510}
511
512impl MyersFrontiers {
513 fn new(old_len: usize, new_len: usize) -> Self {
515 let midpoint = ((old_len + new_len) as f32 / 2.00).ceil() as i32 + 1;
516
517 let vec_length = (midpoint * 2) as usize;
519
520 MyersFrontiers {
521 forward: vec![0; vec_length].into(),
522 reverse: vec![0; vec_length].into(),
523 }
524 }
525}
526
527impl Myers {
528 fn diff_impl<'elem, T: Eq + Debug + 'elem>(
531 res: &mut Vec<EditType<&'elem T>>,
532 old: &'elem [T],
533 mut old_range: Range<usize>,
534 new: &'elem [T],
535 mut new_range: Range<usize>,
536 frontiers: &mut MyersFrontiers,
537 ) {
538 let common_pref_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
540 old_range.start += common_pref_len;
541 new_range.start += common_pref_len;
542
543 let common_suf_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
544 old_range.end = old_range.start.max(old_range.end - common_suf_len);
546 new_range.end = new_range.start.max(new_range.end - common_suf_len);
547
548 if old_range.is_empty() && new_range.is_empty() {
551 return;
552 }
553
554 if old_range.is_empty() {
555 for i in new_range {
556 res.push(EditType::Addition(&new[i]));
557 }
558 return;
559 }
560
561 if new_range.is_empty() {
562 for i in old_range {
563 res.push(EditType::Deletion(&old[i]));
564 }
565 return;
566 }
567
568 let Coordinates {
569 old: x_start,
570 new: y_start,
571 } = Myers::middle_snake(old, old_range.clone(), new, new_range.clone(), frontiers);
572
573 let (old_first_half, old_second_half) = split_range(&old_range, x_start);
575 let (new_first_half, new_second_half) = split_range(&new_range, y_start);
576
577 Myers::diff_impl(res, old, old_first_half, new, new_first_half, frontiers);
578 Myers::diff_impl(res, old, old_second_half, new, new_second_half, frontiers);
579 }
580
581 fn middle_snake<T: Eq>(
587 old: &[T],
588 old_range: Range<usize>,
589 new: &[T],
590 new_range: Range<usize>,
591 frontiers: &mut MyersFrontiers,
592 ) -> Coordinates<usize> {
593 let n = old_range.len() as i32;
594 let m = new_range.len() as i32;
595 let delta = n - m;
596 let is_odd = delta % 2 == 1;
597 let midpoint = ((m + n) as f32 / 2.00).ceil() as i32 + 1;
598
599 let fwd_front = &mut frontiers.forward;
600 let rev_front = &mut frontiers.reverse;
601
602 fwd_front[1] = 0;
603 rev_front[1] = 0;
604
605 for d in 0..=midpoint {
606 for k in (-d..=d).rev().step_by(2) {
608 let mut x = if k == -d || (k != d && fwd_front[k + 1] >= fwd_front[k - 1]) {
612 fwd_front[k + 1]
615 } else {
616 fwd_front[k - 1] + 1
619 };
620 let y = x - k;
621
622 let (x0, y0) = (x, y);
624
625 if x < n && y < m {
627 debug_assert!(x >= 0);
628 debug_assert!(y >= 0);
629
630 let common_pref_len = common_prefix_len(
631 old,
632 old_range.start + (x as usize)..old_range.end,
633 new,
634 new_range.start + (y as usize)..new_range.end,
635 );
636 x += common_pref_len as i32;
637 }
638
639 fwd_front[k] = x;
640
641 if is_odd && (k - delta).abs() < d {
643 let reverse_x = rev_front[-(k - delta)];
647
648 let old = (old_range.start as i32) + x0;
651 let new = (new_range.start as i32) + y0;
652
653 debug_assert!(
654 old >= (old_range.start as i32) && old <= (old_range.end as i32),
655 "expected old={} in {}..{}",
656 old,
657 old_range.start,
658 old_range.end,
659 );
660 debug_assert!(
661 new >= (new_range.start as i32) && new <= (new_range.end as i32),
662 "expected new={} in {}..{}",
663 new,
664 new_range.start,
665 new_range.end,
666 );
667
668 if x + reverse_x >= n {
672 return Coordinates {
673 old: old as usize,
674 new: new as usize,
675 };
676 }
677 }
678 }
679
680 for k in (-d..=d).rev().step_by(2) {
682 let mut x = if k == -d || (k != d && rev_front[k + 1] >= rev_front[k - 1]) {
689 rev_front[k + 1]
691 } else {
692 rev_front[k - 1] + 1
695 };
696 let mut y = x - k;
697
698 if x < n && y < m {
700 debug_assert!(x >= 0);
701 debug_assert!(y >= 0);
702 debug_assert!(n - x >= 0);
703 debug_assert!(m - y >= 0);
704
705 let common_suf_len = common_suffix_len(
706 old,
707 old_range.start..old_range.start + (n as usize) - (x as usize),
708 new,
709 new_range.start..new_range.start + (m as usize) - (y as usize),
710 );
711 x += common_suf_len as i32;
712 y += common_suf_len as i32;
713 }
714 rev_front[k] = x;
715
716 if !is_odd && (k - delta).abs() <= d {
718 let forward_x = fwd_front[-(k - delta)];
719
720 if forward_x + x >= n {
726 let old = n - x + (old_range.start as i32);
727 let new = m - y + (new_range.start as i32);
728
729 debug_assert!(
730 old >= (old_range.start as i32) && old <= (old_range.end as i32),
731 "expected old={} in {}..{}",
732 old,
733 old_range.start,
734 old_range.end,
735 );
736 debug_assert!(
737 new >= (new_range.start as i32) && new <= (new_range.end as i32),
738 "expected new={} in {}..{}",
739 new,
740 new_range.start,
741 new_range.end,
742 );
743
744 return Coordinates {
745 old: old as usize,
746 new: new as usize,
747 };
748 }
749 }
750 }
751 }
752 unreachable!();
753 }
754}
755
756impl<'a> TryFrom<Vec<EditType<&'a Entry<'a>>>> for RichHunks<'a> {
757 type Error = anyhow::Error;
758
759 fn try_from(edits: Vec<EditType<&'a Entry<'a>>>) -> Result<Self, Self::Error> {
760 let mut builder = RichHunksBuilder::new();
761 for edit_wrapper in edits {
762 let edit = match edit_wrapper {
764 EditType::Addition(edit) => DocumentType::New(edit),
765 EditType::Deletion(edit) => DocumentType::Old(edit),
766 };
767 builder.push_back(edit)?;
768 }
769 Ok(builder.build())
770 }
771}
772
773#[time("info", "diff::{}")]
780pub fn compute_edit_script<'a>(
781 old: &'a [Entry<'a>],
782 new: &'a [Entry<'a>],
783) -> Result<RichHunks<'a>> {
784 let myers = Myers::default();
785 let edit_script = myers.diff(old, new);
786 RichHunks::try_from(edit_script)
787}
788
789#[cfg(test)]
790mod tests {
791 use super::*;
792 use pretty_assertions::assert_eq as p_assert_eq;
793 use test_case::test_case;
794
795 fn myers_diff<'a, T>(a: &'a [T], b: &'a [T]) -> Vec<EditType<&'a T>>
797 where
798 T: 'a + Eq + Debug,
799 {
800 let myers = Myers::default();
801 myers.diff(a, b)
802 }
803
804 #[test]
805 fn mid_snake_empty_input() {
806 let input_a = b"";
807 let input_b = b"";
808
809 let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
810 let mid_snake = Myers::middle_snake(
811 &input_a[..],
812 0..input_a.len(),
813 &input_b[..],
814 0..input_b.len(),
815 &mut frontiers,
816 );
817 let expected = Coordinates { old: 0, new: 0 };
818 p_assert_eq!(expected, mid_snake);
819 }
820
821 #[test]
822 fn mid_snake() {
823 let input_a = &b"ABCABBA"[..];
824 let input_b = &b"CBABAC"[..];
825 let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
826 let mid_snake = Myers::middle_snake(
827 input_a,
828 0..input_a.len(),
829 input_b,
830 0..input_b.len(),
831 &mut frontiers,
832 );
833 let expected = Coordinates { old: 4, new: 1 };
834 p_assert_eq!(expected, mid_snake);
835 }
836
837 #[test]
838 fn myers_diff_empty_inputs() {
839 let input_a: Vec<i32> = vec![];
840 let input_b: Vec<i32> = vec![];
841 let edit_script = myers_diff(&input_a, &input_b);
842 assert!(edit_script.is_empty());
843 }
844
845 #[test]
846 fn myers_diff_no_diff() {
847 let input_a: Vec<i32> = vec![0; 4];
848 let input_b: Vec<i32> = vec![0; 4];
849 let edit_script = myers_diff(&input_a, &input_b);
850 assert!(edit_script.is_empty());
851 }
852
853 #[test]
854 fn myers_diff_one_addition() {
855 let input_a: Vec<i32> = Vec::new();
856 let input_b: Vec<i32> = vec![0];
857 let expected = vec![EditType::Addition(&input_b[0])];
858 let edit_script = myers_diff(&input_a, &input_b);
859 p_assert_eq!(expected, edit_script);
860 }
861
862 #[test]
863 fn myers_diff_one_deletion() {
864 let input_a: Vec<i32> = vec![0];
865 let input_b: Vec<i32> = Vec::new();
866 let expected = vec![EditType::Deletion(&input_a[0])];
867 let edit_script = myers_diff(&input_a, &input_b);
868 p_assert_eq!(expected, edit_script);
869 }
870
871 #[test]
872 fn myers_diff_single_substitution() {
873 let myers = Myers::default();
874 let input_a = [1];
875 let input_b = [2];
876 let edit_script = myers.diff(&input_a[..], &input_b[..]);
877 let expected = vec![
878 EditType::Addition(&input_b[0]),
879 EditType::Deletion(&input_a[0]),
880 ];
881 p_assert_eq!(expected, edit_script);
882 }
883
884 #[test]
885 fn myers_diff_single_substitution_with_common_elements() {
886 let myers = Myers::default();
887 let input_a = [0, 0, 0];
888 let input_b = [0, 1, 0];
889 let edit_script = myers.diff(&input_a[..], &input_b[..]);
890 let expected = vec![
891 EditType::Addition(&input_b[1]),
892 EditType::Deletion(&input_a[1]),
893 ];
894 p_assert_eq!(expected, edit_script);
895 }
896
897 #[test_case(b"BAAA", b"CAAA" => 0 ; "no common prefix")]
898 #[test_case(b"AAABA", b"AAACA" => 3 ; "with common prefix")]
899 fn common_prefix(a: &[u8], b: &[u8]) -> usize {
900 common_prefix_len(a, 0..a.len(), b, 0..b.len())
901 }
902
903 #[test_case(b"AAAB", b"AAAC" => 0 ; "no common suffix")]
904 #[test_case(b"ABAAA", b"ACAAA" => 3 ; "with common suffix")]
905 fn common_suffix(a: &[u8], b: &[u8]) -> usize {
906 common_suffix_len(a, 0..a.len(), b, 0..b.len())
907 }
908}