diffmatchpatch/
lib.rs

1use std::{
2    collections::HashMap,
3    fmt,
4    hash::Hash,
5    time::{Duration, Instant},
6};
7
8pub use chars::Chars;
9pub use patch::Patch;
10
11pub mod chars;
12mod r#match;
13mod patch;
14pub mod prelude;
15
16pub trait ToChars {
17    fn to_chars(&self) -> Vec<char>;
18}
19
20impl ToChars for String {
21    fn to_chars(&self) -> Vec<char> {
22        self.chars().collect()
23    }
24}
25
26impl ToChars for &str {
27    fn to_chars(&self) -> Vec<char> {
28        self.chars().collect()
29    }
30}
31
32pub enum LengthUnit {
33    UnicodeScalar,
34    UTF16,
35}
36
37/// Diff Match and Patch methods
38pub struct DiffMatchPatch {
39    /// Time duration to map a diff before giving up (None for infinity).
40    pub diff_timeout: Option<Duration>,
41    /// Cost of an empty edit operation in terms of edit characters.
42    pub edit_cost: i32,
43    /// How far to search for a match (0 = exact location, 1000+ = broad match).
44    /// A match this many characters away from the expected location will add
45    /// 1.0 to the score (0.0 is a perfect match).*/
46    pub match_distance: i32,
47    /// Chunk size for context length.
48    pub patch_margin: usize,
49    /// The number of bits in an int.
50    pub match_maxbits: usize,
51    /// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
52    pub match_threshold: f32,
53    // When deleting a large block of text (over ~64 characters), how close do
54    // the contents have to be to match the expected contents. (0.0 = perfection,
55    // 1.0 = very loose).  Note that Match_Threshold controls how closely the
56    // end points of a delete need to match.*/
57    pub patch_delete_threshold: f32,
58}
59
60impl Default for DiffMatchPatch {
61    fn default() -> Self {
62        Self::new()
63    }
64}
65
66/// The data structure representing a diff
67#[derive(PartialEq, Clone)]
68pub enum Diff<T = Chars> {
69    Delete(T),
70    Insert(T),
71    Equal(T),
72}
73
74impl Diff {
75    /// Placeholder for variable
76    pub(crate) const fn empty() -> Self {
77        Diff::Equal(Chars::new())
78    }
79
80    pub fn text(&self) -> &Chars {
81        match self {
82            Diff::Delete(text) => text,
83            Diff::Insert(text) => text,
84            Diff::Equal(text) => text,
85        }
86    }
87
88    pub fn text_mut(&mut self) -> &mut Chars {
89        match self {
90            Diff::Delete(text) => text,
91            Diff::Insert(text) => text,
92            Diff::Equal(text) => text,
93        }
94    }
95
96    pub fn is_delete(&self) -> bool {
97        matches!(self, Diff::Delete(_))
98    }
99
100    pub fn is_insert(&self) -> bool {
101        matches!(self, Diff::Insert(_))
102    }
103
104    pub fn is_equal(&self) -> bool {
105        matches!(self, Diff::Equal(_))
106    }
107}
108
109impl<T> Diff<T> {
110    pub fn translate<P, F>(&self, f: F) -> Diff<P>
111    where
112        F: Fn(&T) -> P,
113    {
114        match self {
115            Diff::Delete(chars) => Diff::Delete(f(chars)),
116            Diff::Insert(chars) => Diff::Insert(f(chars)),
117            Diff::Equal(chars) => Diff::Equal(f(chars)),
118        }
119    }
120}
121
122impl<T: fmt::Debug> fmt::Debug for Diff<T> {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        match self {
125            Diff::Delete(text) => write!(f, "- {text:?}"),
126            Diff::Insert(text) => write!(f, "+ {text:?}"),
127            Diff::Equal(text) => write!(f, "= {text:?}"),
128        }
129    }
130}
131
132impl DiffMatchPatch {
133    pub fn new() -> Self {
134        DiffMatchPatch {
135            diff_timeout: None,
136            patch_delete_threshold: 0.5,
137            edit_cost: 0,
138            match_distance: 1000,
139            patch_margin: 4,
140            match_maxbits: 32,
141            match_threshold: 0.5,
142        }
143    }
144
145    /**
146    Find the first index after a specific index in text1 where patern is present.
147
148    Args:
149        text1: Parent chars.
150        text2: Patern chars.
151        ind: index after which we have to find the patern.
152
153    Returns:
154        the first index where patern is found or -1 if not found.
155    */
156    fn kmp<T: PartialEq>(&self, text1: &[T], text2: &[T], ind: usize) -> Option<usize> {
157        if text2.is_empty() {
158            return Some(ind);
159        }
160        if text1.is_empty() {
161            return None;
162        }
163        let len1 = text1.len();
164        let len2 = text2.len();
165        let mut pattern: Vec<usize> = Vec::new();
166        pattern.push(0);
167        let mut len = 0;
168        let mut i = 1;
169
170        // Preprocess the pattern
171        while i < len2 {
172            if text2[i] == text2[len] {
173                len += 1;
174                pattern.push(len);
175                i += 1;
176            } else if len == 0 {
177                pattern.push(0);
178                i += 1;
179            } else {
180                len = pattern[len - 1];
181            }
182        }
183        i = ind;
184        len = 0;
185        while i < len1 {
186            if text1[i] == text2[len] {
187                len += 1;
188                i += 1;
189                if len == len2 {
190                    return Some(i - len);
191                }
192            } else if len == 0 {
193                i += 1;
194            } else {
195                len = pattern[len - 1];
196            }
197        }
198        None
199    }
200
201    /**
202    Find the last index before a specific index in text1 where patern is present.
203
204    Args:
205        text1: Parent chars.
206        text2: Patern chars.
207        ind: index just before we have to find the patern.
208
209    Returns:
210        the last index where patern is found or -1 if not found.
211    */
212    fn rkmp<T: PartialEq>(&mut self, text1: &[T], text2: &[T], ind: usize) -> Option<usize> {
213        if text2.is_empty() {
214            return Some(ind);
215        }
216        if text1.is_empty() {
217            return None;
218        }
219        let len2 = text2.len();
220        let mut pattern: Vec<usize> = Vec::new();
221        pattern.push(0);
222        let mut len = 0;
223        let mut i = 1;
224
225        // Preprocess the pattern
226        while i < len2 {
227            if text2[i] == text2[len] {
228                len += 1;
229                pattern.push(len);
230                i += 1;
231            } else if len == 0 {
232                pattern.push(0);
233                i += 1;
234            } else {
235                len = pattern[len - 1];
236            }
237        }
238        i = 0;
239        len = 0;
240        let mut ans = usize::MAX;
241        while i <= ind {
242            if text1[i] == text2[len] {
243                len += 1;
244                i += 1;
245                if len == len2 {
246                    ans = i - len;
247                    len = pattern[len - 1];
248                }
249            } else if len == 0 {
250                i += 1;
251            } else {
252                len = pattern[len - 1];
253            }
254        }
255        if ans == usize::MAX {
256            None
257        } else {
258            Some(ans)
259        }
260    }
261
262    /**
263      Determine the common prefix of two strings.
264
265      Args:
266          text1: First strings.
267          text2: Second strings.
268
269      Returns:
270          The number of characters common to the start of each chars.
271    */
272    pub fn diff_common_prefix<T: PartialEq>(&self, text1: &[T], text2: &[T]) -> usize {
273        // Quick check for common null cases
274        if text1.is_empty() || text2.is_empty() || text1[0] != text2[0] {
275            return 0;
276        }
277        // Binary search.
278        let pointermax = usize::min(text1.len(), text2.len());
279        let mut pointerstart = 0;
280        while pointerstart < pointermax {
281            if text1[pointerstart] == text2[pointerstart] {
282                pointerstart += 1;
283            } else {
284                return pointerstart;
285            }
286        }
287        pointermax
288    }
289
290    /**
291      Determine the common suffix of two strings.
292
293      Args:
294          text1: First chars.
295          text2: Second chars.
296
297      Returns:
298          The number of characters common to the end of each chars.
299    */
300    pub fn diff_common_suffix<T: PartialEq>(&self, text1: &[T], text2: &[T]) -> usize {
301        if text1.is_empty() || text2.is_empty() {
302            return 0;
303        }
304        let mut pointer_1 = (text1.len() - 1) as isize;
305        let mut pointer_2 = (text2.len() - 1) as isize;
306        let mut len = 0;
307        while pointer_1 >= 0 && pointer_2 >= 0 {
308            if text1[pointer_1 as usize] == text2[pointer_2 as usize] {
309                len += 1;
310            } else {
311                break;
312            }
313            pointer_1 -= 1;
314            pointer_2 -= 1;
315        }
316        len
317    }
318
319    /*
320      Determine if the suffix of one chars is the prefix of another.
321
322      Args:
323          text1 First chars.
324          text2 Second chars.
325
326      Returns:
327          The number of characters common to the end of the first
328          chars and the start of the second chars.
329    */
330    pub fn diff_common_overlap<T: PartialEq>(&self, text1: &[T], text2: &[T]) -> usize {
331        // Eliminate the null case.
332        if text1.is_empty() || text2.is_empty() {
333            return 0;
334        }
335
336        let text1_trunc;
337        let text2_trunc;
338        let len = text1.len().min(text2.len());
339
340        // Truncate the longer chars.
341        if text1.len() > text2.len() {
342            text1_trunc = &text1[(text1.len() - text2.len())..];
343            text2_trunc = text2;
344        } else {
345            text1_trunc = text1;
346            text2_trunc = &text2[0..text1.len()];
347        }
348        let mut best = 0;
349        let mut length = 1;
350        // Quick check for the worst case.
351        if text1_trunc == text2_trunc {
352            return len;
353        }
354        // Start by looking for a single character match
355        // and increase length until no match is found.
356        // Performance analysis: https://neil.fraser.name/news/2010/11/04/
357        loop {
358            let patern = &text1_trunc[(len - length)..len];
359            if let Some(found) = self.kmp(text2_trunc, patern, 0) {
360                length += found;
361                if found == 0 {
362                    best = length;
363                    length += 1;
364                }
365            } else {
366                return best;
367            }
368        }
369    }
370
371    /**
372    Do the two texts share a substring which is at least half the length of
373    the longer text?
374    This speedup can produce non-minimal diffs.
375
376    Args:
377        text1: First chars.
378        text2: Second chars.
379
380    Returns:
381        Five element Vector, containing the prefix of text1, the suffix of text1,
382        the prefix of text2, the suffix of text2 and the common middle.  Or empty vector
383        if there was no match.
384    */
385    pub fn diff_half_match<'a, T: PartialEq + Copy>(
386        &self,
387        text1: &'a [T],
388        text2: &'a [T],
389    ) -> Option<Vec<&'a [T]>> {
390        self.diff_timeout?;
391
392        let (long_text, short_text) = if text1.len() > text2.len() {
393            (text1, text2)
394        } else {
395            (text2, text1)
396        };
397        if long_text.len() < 4 || short_text.len() * 2 < long_text.len() {
398            return None;
399        }
400
401        let mut hm: Vec<&[T]>;
402        // First check if the second quarter is the seed for a half-match.
403        let hm1 = self.diff_half_matchi(long_text, short_text, (long_text.len() + 3) / 4);
404        // Check again based on the third quarter.
405        let hm2 = self.diff_half_matchi(long_text, short_text, (long_text.len() + 1) / 2);
406
407        if hm1.is_empty() && hm2.is_empty() {
408            return None;
409        } else if hm1.is_empty() {
410            hm = hm2;
411        } else if hm2.is_empty() {
412            hm = hm1;
413        } else {
414            // Both matched.  Select the longest.
415            hm = if hm1[4].len() > hm2[4].len() {
416                hm1
417            } else {
418                hm2
419            };
420        }
421        if text1.len() > text2.len() {
422            return Some(hm);
423        }
424        let mut temp2 = hm[0];
425        let mut temp3 = hm[2];
426        hm[0] = temp3;
427        hm[2] = temp2;
428        temp2 = hm[1];
429        temp3 = hm[3];
430        hm[1] = temp3;
431        hm[3] = temp2;
432        Some(hm)
433    }
434
435    /**
436    Does a substring of shorttext exist within longtext such that the
437    substring is at least half the length of longtext?
438    Closure, but does not reference any external variables.
439
440    Args:
441        longtext: Longer chars.
442        shorttext: Shorter chars.
443        i: Start index of quarter length substring within longtext.
444
445    Returns:
446        Five element vector, containing the prefix of longtext, the suffix of
447        longtext, the prefix of shorttext, the suffix of shorttext and the
448        common middle.  Or empty vector if there was no match.
449    */
450    fn diff_half_matchi<'a, T: PartialEq + Copy>(
451        &self,
452        long_text: &'a [T],
453        short_text: &'a [T],
454        i: usize,
455    ) -> Vec<&'a [T]> {
456        let long_len = long_text.len();
457        let seed = Vec::from_iter(long_text[i..(i + long_len / 4)].iter().cloned());
458        let mut best_common: &[T] = &[];
459        let mut best_longtext_a: &[T] = &[];
460        let mut best_longtext_b: &[T] = &[];
461        let mut best_shorttext_a: &[T] = &[];
462        let mut best_shorttext_b: &[T] = &[];
463        let mut j = self.kmp(short_text, &seed, 0);
464        while j.is_some() {
465            let prefix_length = self.diff_common_prefix(&long_text[i..], &short_text[j.unwrap()..]);
466            let suffix_length = self.diff_common_suffix(&long_text[..i], &short_text[..j.unwrap()]);
467            if best_common.len() < suffix_length + prefix_length {
468                best_common =
469                    &short_text[(j.unwrap() - suffix_length)..(j.unwrap() + prefix_length)];
470                best_longtext_a = &long_text[..(i - suffix_length)];
471                best_longtext_b = &long_text[(i + prefix_length)..];
472                best_shorttext_a = &short_text[..(j.unwrap() - suffix_length)];
473                best_shorttext_b = &short_text[(j.unwrap() + prefix_length)..];
474            }
475            j = self.kmp(short_text, &seed, j.unwrap() + 1);
476        }
477        if best_common.len() * 2 >= long_text.len() {
478            return vec![
479                best_longtext_a,
480                best_longtext_b,
481                best_shorttext_a,
482                best_shorttext_b,
483                best_common,
484            ];
485        }
486        vec![]
487    }
488
489    /// Recover compressed chars to array of any type
490    pub fn diff_chars_to_any<T>(&self, diffs: &[Diff], item_array: &[T]) -> Vec<Diff<Vec<T>>>
491    where
492        T: Clone,
493    {
494        let mut result = Vec::with_capacity(diffs.len());
495        for diff in diffs {
496            result.push(diff.translate(|t| t.translate(item_array)))
497        }
498        result
499    }
500
501    /// Reduce the sequences to a string
502    // of hashes where each Unicode character represents one item.
503    pub fn diff_any_to_chars<T>(&self, seq1: &[T], seq2: &[T]) -> (Chars, Chars, Vec<T>)
504    where
505        T: Hash + Eq + Clone + Default,
506    {
507        let mut item_array: Vec<T> = vec![T::default()];
508        let mut itemhash: HashMap<T, u32> = HashMap::new();
509        let chars1 = self.diff_any_to_chars_munge(seq1, &mut item_array, &mut itemhash);
510        let chars2 = self.diff_any_to_chars_munge(seq2, &mut item_array, &mut itemhash);
511        (chars1, chars2, item_array)
512    }
513
514    pub fn diff_any3_to_chars<T>(
515        &self,
516        seq1: &[T],
517        seq2: &[T],
518        seq3: &[T],
519    ) -> (Chars, Chars, Chars, Vec<T>)
520    where
521        T: Hash + Eq + Clone + Default,
522    {
523        let mut item_array: Vec<T> = vec![T::default()];
524        let mut itemhash: HashMap<T, u32> = HashMap::new();
525        let chars1 = self.diff_any_to_chars_munge(seq1, &mut item_array, &mut itemhash);
526        let chars2 = self.diff_any_to_chars_munge(seq2, &mut item_array, &mut itemhash);
527        let chars3 = self.diff_any_to_chars_munge(seq3, &mut item_array, &mut itemhash);
528        (chars1, chars2, chars3, item_array)
529    }
530
531    fn diff_any_to_chars_munge<T>(
532        &self,
533        seq: &[T],
534        item_array: &mut Vec<T>,
535        itemhash: &mut HashMap<T, u32>,
536    ) -> Chars
537    where
538        T: Hash + Eq + Clone + Default,
539    {
540        let mut chars = Chars::new();
541        for item in seq {
542            if let Some(ch) = itemhash.get(item) {
543                if let Some(ch) = char::from_u32(*ch) {
544                    chars.push(ch);
545                } else {
546                    panic!("Invalid char");
547                }
548            } else {
549                let mut u32char = item_array.len() as u32;
550                // skip reserved range - U+D800 to U+DFFF
551                // unicode code points in this range can't be converted to unicode scalars
552                if u32char >= 55296 {
553                    u32char += 2048;
554                }
555
556                // 1114111 is the biggest unicode scalar, so stop here
557                if u32char == 1114111 {
558                    panic!("max unicode scalar reached");
559                }
560
561                item_array.push(item.clone());
562                itemhash.insert(item.clone(), u32char);
563
564                chars.push(char::from_u32(u32char).unwrap());
565            }
566        }
567        chars
568    }
569
570    /**
571    Rehydrate the text in a diff from a string of line hashes to real lines
572    of text.
573
574    Args:
575        diffs: Vector of diffs as changes.
576        lineArray: Vector of unique strings.
577    */
578    pub fn diff_chars_to_lines(&self, diffs: &mut [Diff], line_array: &[Chars]) {
579        for diff in diffs.iter_mut() {
580            let mut text = Chars::new();
581            let text1 = diff.text();
582            for j in 0..text1.len() {
583                text += &line_array[text1[j] as usize];
584            }
585            *diff.text_mut() = text;
586        }
587    }
588
589    /**
590    Split two texts into an array of strings.  Reduce the texts to a string
591    of hashes where each Unicode character represents one line.
592
593    Args:
594        text1: First chars.
595        text2: Second chars.
596
597    Returns:
598        Three element tuple, containing the encoded text1, the encoded text2 and
599        the array of unique strings.  The zeroth element of the array of unique
600        strings is intentionally blank.
601    */
602    pub fn diff_lines_to_chars(
603        &self,
604        text1: &[char],
605        text2: &[char],
606    ) -> (Chars, Chars, Vec<Chars>) {
607        let mut linearray: Vec<Chars> = vec![Chars::new()];
608        let mut linehash: HashMap<Chars, u32> = HashMap::new();
609        let chars1 = self.diff_lines_to_chars_munge(text1, &mut linearray, &mut linehash);
610        let chars2 = self.diff_lines_to_chars_munge(text2, &mut linearray, &mut linehash);
611        (chars1, chars2, linearray)
612    }
613
614    /**
615    Split a text into an array of strings.  Reduce the texts to a string
616    of hashes where each Unicode character represents one line.
617    Modifies linearray and linehash through being a closure.
618
619    Args:
620        text: chars to encode.
621
622    Returns:
623        Encoded string.
624    */
625    fn diff_lines_to_chars_munge<'a>(
626        &self,
627        text: &[char],
628        linearray: &'a mut Vec<Chars>,
629        linehash: &'a mut HashMap<Chars, u32>,
630    ) -> Chars {
631        // Rust char type:
632        // 0..<d800
633        // d800..<e000
634        // e000..<110000
635
636        let mut chars = Chars::new();
637        // Walk the text, pulling out a substring for each line.
638        // text.split('\n') would would temporarily double our memory footprint.
639        // Modifying text would create many large strings to garbage collect.
640        for line in text.split_inclusive(|&c| c == '\n') {
641            if linehash.contains_key(line) {
642                if let Some(ch) = char::from_u32(linehash[line]) {
643                    chars.push(ch);
644                } else {
645                    panic!("Invalid char");
646                }
647            } else {
648                let mut u32char = linearray.len() as u32;
649                // skip reserved range - U+D800 to U+DFFF
650                // unicode code points in this range can't be converted to unicode scalars
651                if u32char >= 55296 {
652                    u32char += 2048;
653                }
654
655                // 1114111 is the biggest unicode scalar, so stop here
656                if u32char == 1114111 {
657                    panic!("max unicode scalar reached");
658                }
659
660                linearray.push(line.into());
661                linehash.insert(line.into(), u32char);
662
663                chars.push(char::from_u32(u32char).unwrap());
664            }
665        }
666        chars
667    }
668
669    /**
670      Reorder and merge like edit sections.  Merge equalities.
671      Any edit section can move as long as it doesn't cross an equality.
672
673      Args:
674          diffs: vectors of diff object.
675    */
676    pub fn diff_cleanup_merge(&self, diffs: &mut Vec<Diff>) {
677        if diffs.is_empty() {
678            return;
679        }
680        diffs.push(Diff::empty());
681        let mut text_insert = Chars::new();
682        let mut text_delete = Chars::new();
683        let mut i: usize = 0;
684        let mut count_insert = 0;
685        let mut count_delete = 0;
686        while i < diffs.len() {
687            if diffs[i].is_delete() {
688                text_delete += diffs[i].text();
689                count_delete += 1;
690                i += 1;
691            } else if diffs[i].is_insert() {
692                text_insert += diffs[i].text();
693                count_insert += 1;
694                i += 1;
695            } else {
696                // equal
697                // Upon reaching an equality, check for prior redundancies.
698                if count_delete + count_insert > 1 {
699                    if count_delete != 0 && count_insert != 0 {
700                        // Factor out any common prefixies.
701                        let mut commonlength = self.diff_common_prefix(&text_insert, &text_delete);
702                        if commonlength != 0 {
703                            let temp1 = &text_insert[..commonlength];
704                            //  i - count_delete - count_insert - 1
705                            let x = i.checked_sub(count_delete + count_insert + 1);
706
707                            if x.is_some() && diffs[x.unwrap()].is_equal() {
708                                *diffs[x.unwrap()].text_mut() += temp1;
709                            } else {
710                                diffs.insert(0, Diff::Equal(temp1.into()));
711                                i += 1;
712                            }
713                            text_insert = text_insert[commonlength..].into();
714                            text_delete = text_delete[commonlength..].into();
715                        }
716
717                        // Factor out any common suffixies.
718                        commonlength = self.diff_common_suffix(&text_insert, &text_delete);
719                        if commonlength != 0 {
720                            let temp2 =
721                                Chars::from(&text_insert[text_insert.len() - commonlength..])
722                                    + diffs[i].text();
723                            *diffs[i].text_mut() = temp2;
724                            text_insert = text_insert[..text_insert.len() - commonlength].into();
725                            text_delete = text_delete[..text_delete.len() - commonlength].into();
726                        }
727                    }
728
729                    // Delete the offending records and add the merged ones.
730                    i -= count_delete + count_insert;
731                    for _j in 0..(count_delete + count_insert) {
732                        diffs.remove(i);
733                    }
734                    if !text_delete.is_empty() {
735                        diffs.insert(i, Diff::Delete(text_delete.clone()));
736                        i += 1;
737                    }
738                    if !text_insert.is_empty() {
739                        diffs.insert(i, Diff::Insert(text_insert.clone()));
740                        i += 1;
741                    }
742                    i += 1;
743                } else if i != 0 && diffs[i - 1].is_equal() {
744                    // Merge this equality with the previous one.
745                    // temp variable to avoid borrow checker
746                    let temp1 = diffs[i - 1].text_mut().take() + diffs[i].text();
747                    *diffs[i - 1].text_mut() = temp1;
748                    diffs.remove(i);
749                } else {
750                    i += 1;
751                }
752                count_delete = 0;
753                text_delete.clear();
754                text_insert.clear();
755                count_insert = 0;
756            } // equal ends
757        }
758        // Remove the dummy entry at the end.
759        if diffs[diffs.len() - 1].text().is_empty() {
760            diffs.pop();
761        }
762
763        // Second pass: look for single edits surrounded on both sides by equalities
764        // which can be shifted sideways to eliminate an equality.
765        // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
766        let mut changes = false;
767        i = 1;
768        // Intentionally ignore the first and last element (don't need checking).
769        while i < diffs.len() - 1 {
770            if diffs[i - 1].is_equal() && diffs[i + 1].is_equal() {
771                // This is a single edit surrounded by equalities.
772                if diffs[i].text().ends_with(diffs[i - 1].text()) {
773                    // Shift the edit over the previous equality.
774                    //  A<ins>BA</ins>C -> <ins>AB</ins>AC
775                    if !diffs[i - 1].text().is_empty() {
776                        let prev = diffs[i - 1].text();
777                        let next = diffs[i + 1].text();
778
779                        // temp variables to eliminate borrow checker errors
780                        let temp1 =
781                            prev.to_owned() + diffs[i].text().slice_to(-(prev.len() as isize));
782                        let temp2 = prev.to_owned() + next;
783                        *diffs[i].text_mut() = temp1;
784                        *diffs[i + 1].text_mut() = temp2;
785                    }
786
787                    diffs.remove(i - 1); // remove prev
788                    changes = true;
789                } else if diffs[i].text().starts_with(diffs[i + 1].text()) {
790                    // Shift the edit over the next equality.
791                    //  A<ins>CB</ins>C -> AC<ins>BC</ins>
792                    let prev = diffs[i - 1].text();
793                    let next = diffs[i + 1].text();
794
795                    let temp1 = prev.to_owned() + next;
796                    let temp2 = Chars::from(diffs[i].text()[next.len()..].to_owned()) + next;
797                    *diffs[i - 1].text_mut() = temp1;
798                    *diffs[i].text_mut() = temp2;
799
800                    diffs.remove(i + 1);
801                    changes = true;
802                }
803            }
804            i += 1;
805        }
806
807        // If shifts were made, the diff needs reordering and another shift sweep.
808        if changes {
809            self.diff_cleanup_merge(diffs);
810        }
811    }
812
813    /**
814      Look for single edits surrounded on both sides by equalities
815      which can be shifted sideways to align the edit to a word boundary.
816      e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
817
818      Args:
819          diffs: Vector of diff object.
820    */
821    pub fn diff_cleanup_semantic_lossless(&self, diffs: &mut Vec<Diff>) {
822        /**
823        Given two strings, compute a score representing whether the
824        internal boundary falls on logical boundaries.
825        Scores range from 6 (best) to 0 (worst).
826        Closure, but does not reference any external variables.
827
828        Args:
829            one: First chars.
830            two: Second chars.
831
832        Returns:
833            The score.
834        */
835        fn diff_cleanup_semantic_score(one: &[char], two: &[char]) -> i32 {
836            if one.is_empty() || two.is_empty() {
837                // Edges are the best.
838                return 6;
839            }
840
841            // Each port of this function behaves slightly differently due to
842            // subtle differences in each language's definition of things like
843            // 'whitespace'.  Since this function's purpose is largely cosmetic,
844            // the choice has been made to use each language's native features
845            // rather than force total conformity.
846            let char1 = one[one.len() - 1];
847            let char2 = two[0];
848            let nonalphanumeric1: bool = !char1.is_alphanumeric();
849            let nonalphanumeric2: bool = !char2.is_alphanumeric();
850            let whitespace1: bool = nonalphanumeric1 & char1.is_whitespace();
851            let whitespace2: bool = nonalphanumeric2 & char2.is_whitespace();
852            let linebreak1: bool = whitespace1 & ((char1 == '\r') | (char1 == '\n'));
853            let linebreak2: bool = whitespace2 & ((char2 == '\r') | (char2 == '\n'));
854
855            let blanklineend1: bool =
856                one.ends_with(&['\n', '\n']) || one.ends_with(&['\n', '\r', '\n']);
857            let blanklinestart2: bool =
858                two.starts_with(&['\n', '\n']) || two.starts_with(&['\r', '\n', '\r', '\n']);
859
860            let blankline1: bool = linebreak1 & blanklineend1;
861            let blankline2: bool = linebreak2 & blanklinestart2;
862
863            if blankline1 || blankline2 {
864                // Five points for blank lines.
865                return 5;
866            }
867            if linebreak1 || linebreak2 {
868                // Four points for line breaks.
869                return 4;
870            }
871            if nonalphanumeric1 && !whitespace1 && whitespace2 {
872                // Three points for end of sentences.
873                return 3;
874            }
875            if whitespace1 || whitespace2 {
876                // Two points for whitespace.
877                return 2;
878            }
879            if nonalphanumeric1 || nonalphanumeric2 {
880                // One point for non-alphanumeric.
881                return 1;
882            }
883            0
884        }
885
886        let mut i = 1;
887        if diffs.len() <= 1 {
888            return;
889        }
890
891        // Intentionally ignore the first and last element (don't need checking).
892        while i < diffs.len() - 1 {
893            if diffs[i - 1].is_equal() && diffs[i + 1].is_equal() {
894                // This is a single edit surrounded by equalities.
895                // NOTE: sting concat operations, so use owned Chars type
896                let mut equality1 = diffs[i - 1].text().clone();
897                let mut edit = diffs[i].text().clone();
898                let mut equality2 = diffs[i + 1].text().clone();
899
900                // First, shift the edit as far left as possible.
901                let common_offset = self.diff_common_suffix(diffs[i - 1].text(), diffs[i].text());
902                if common_offset != 0 {
903                    let common_string = edit.slice_from(-(common_offset as isize));
904                    let temp1 =
905                        Chars::from(common_string) + edit.slice_to(-(common_offset as isize));
906                    let temp2 = Chars::from(common_string) + &equality2;
907                    equality1 = Chars::from(equality1.slice_to(-(common_offset as isize)));
908                    edit = temp1;
909                    equality2 = temp2;
910                }
911
912                // Second, step character by character right, looking for the best fit.
913                let mut best_equality1 = equality1.clone();
914                let mut best_edit = edit.clone();
915                let mut best_equality2 = equality2.clone();
916
917                let mut best_score = diff_cleanup_semantic_score(&equality1, &edit)
918                    + diff_cleanup_semantic_score(&edit, &equality2);
919
920                while edit.len() > 0 && equality2.len() > 0 && edit[0] == equality2[0] {
921                    // shift 1 char
922                    equality1.push(edit[0]);
923                    edit.remove(0);
924                    edit.push(equality2[0]);
925                    equality2.remove(0);
926
927                    let score = diff_cleanup_semantic_score(&equality1, &edit)
928                        + diff_cleanup_semantic_score(&edit, &equality2);
929
930                    // The >= encourages trailing rather than leading whitespace on edits.
931                    if score >= best_score {
932                        best_score = score;
933                        best_equality1 = equality1.clone();
934                        best_edit = edit.clone();
935                        best_equality2 = equality2.clone();
936                    }
937                }
938
939                if diffs[i - 1].text() != &best_equality1 {
940                    // We have an improvement, save it back to the diff.
941                    if !best_equality1.is_empty() {
942                        *diffs[i - 1].text_mut() = best_equality1;
943                    } else {
944                        diffs.remove(i - 1);
945                        i -= 1;
946                    }
947                    *diffs[i].text_mut() = best_edit;
948                    if !best_equality2.is_empty() {
949                        *diffs[i + 1].text_mut() = best_equality2;
950                    } else {
951                        diffs.remove(i + 1);
952                        i -= 1;
953                    }
954                }
955            }
956            i += 1;
957        }
958    }
959
960    /**
961      Reduce the number of edits by eliminating semantically trivial
962      equalities.
963
964      Args:
965          diffs: Vectors of diff object.
966    */
967    pub fn diff_cleanup_semantic(&self, diffs: &mut Vec<Diff>) {
968        let mut changes = false;
969        let mut equalities: Vec<usize> = vec![]; // Stack of indices where equalities are found.
970        let mut last_equality = Chars::new(); // Always equal to diffs[equalities[-1]][1]
971        let mut i: usize = 0; // Index of current position.
972
973        // Number of chars that changed prior to the equality.
974        let mut length_insertions1 = 0;
975        let mut length_deletions1 = 0;
976        // Number of chars that changed after the equality.
977        let mut length_insertions2 = 0;
978        let mut length_deletions2 = 0;
979        while i < diffs.len() {
980            if diffs[i].is_equal() {
981                // Equality found.
982                equalities.push(i);
983                length_insertions1 = length_insertions2;
984                length_insertions2 = 0;
985                length_deletions1 = length_deletions2;
986                length_deletions2 = 0;
987                last_equality = diffs[i].text().clone();
988            } else {
989                // An insertion or deletion.
990                if diffs[i].is_insert() {
991                    length_insertions2 += diffs[i].text().len();
992                } else {
993                    length_deletions2 += diffs[i].text().len();
994                }
995                // Eliminate an equality that is smaller or equal to the
996                // edits on both sides of it.
997                if !last_equality.is_empty()
998                    && last_equality.len() <= usize::max(length_insertions1, length_deletions1)
999                    && last_equality.len() <= usize::max(length_insertions2, length_deletions2)
1000                {
1001                    // Duplicate record.
1002                    diffs.insert(*equalities.last().unwrap(), Diff::Delete(last_equality.clone()));
1003                    // Change second copy to insert.
1004                    diffs[*equalities.last().unwrap() + 1] =
1005                        Diff::Insert(diffs[equalities[equalities.len() - 1] + 1].text().into());
1006                    // Throw away the equality we just deleted.
1007                    equalities.pop();
1008                    // Throw away the previous equality (it needs to be reevaluated).
1009                    if !equalities.is_empty() {
1010                        equalities.pop();
1011                    }
1012                    // Reset the counters.
1013                    length_insertions1 = 0;
1014                    length_deletions1 = 0;
1015                    length_insertions2 = 0;
1016                    length_deletions2 = 0;
1017                    last_equality = Chars::new();
1018                    changes = true;
1019
1020                    // NOT: reordered control flow, to use continue
1021                    if !equalities.is_empty() {
1022                        i = *equalities.last().unwrap();
1023                    } else {
1024                        i = 0;
1025                        continue;
1026                    }
1027                }
1028            }
1029            i += 1;
1030        }
1031        // Normalize the diff.
1032        if changes {
1033            self.diff_cleanup_merge(diffs);
1034        }
1035        self.diff_cleanup_semantic_lossless(diffs);
1036
1037        // Find any overlaps between deletions and insertions.
1038        // e.g: <del>abcxxx</del><ins>xxxdef</ins>
1039        //   -> <del>abc</del>xxx<ins>def</ins>
1040        // e.g: <del>xxxabc</del><ins>defxxx</ins>
1041        //   -> <ins>def</ins>xxx<del>abc</del>
1042        // Only extract an overlap if it is as big as the edit ahead or behind it.
1043        i = 1;
1044        while i < diffs.len() {
1045            if diffs[i - 1].is_delete() && diffs[i].is_insert() {
1046                let deletion = diffs[i - 1].text().clone();
1047                let insertion = diffs[i].text().clone();
1048                let overlap_length1 = self.diff_common_overlap(&deletion, &insertion);
1049                let overlap_length2 = self.diff_common_overlap(&insertion, &deletion);
1050                if overlap_length1 >= overlap_length2 {
1051                    if (overlap_length1 as f32) >= (deletion.len() as f32 / 2.0)
1052                        || (overlap_length1 as f32) >= (insertion.len() as f32 / 2.0)
1053                    {
1054                        // Overlap found.  Insert an equality and trim the surrounding edits.
1055                        diffs.insert(i, Diff::Equal(insertion[..overlap_length1].into()));
1056                        diffs[i - 1] =
1057                            Diff::Delete(deletion[..deletion.len() - overlap_length1].into());
1058                        diffs[i + 1] = Diff::Insert(insertion[overlap_length1..].into());
1059                        i += 1;
1060                    }
1061                } else if (overlap_length2 as f32) >= (deletion.len() as f32 / 2.0)
1062                    || (overlap_length2 as f32) >= (insertion.len() as f32 / 2.0)
1063                {
1064                    // Reverse overlap found.
1065                    // Insert an equality and swap and trim the surrounding edits.
1066                    diffs.insert(i, Diff::Equal(deletion[..overlap_length2].into()));
1067                    diffs[i - 1] =
1068                        Diff::Insert(insertion[..insertion.len() - overlap_length2].into());
1069                    diffs[i + 1] = Diff::Delete(deletion[overlap_length2..].into());
1070                    i += 1;
1071                }
1072                i += 1;
1073            }
1074            i += 1;
1075        }
1076    }
1077
1078    /**
1079      Reduce the number of edits by eliminating operationally trivial
1080      equalities.
1081
1082    Args:
1083        diffs: Vector of diff object.
1084
1085    Affected By:
1086        edit_cost
1087    */
1088    pub fn diff_cleanup_efficiency(&self, diffs: &mut Vec<Diff>) {
1089        if diffs.len() <= 1 {
1090            return;
1091        }
1092        let mut changes: bool = false;
1093        let mut equalities: Vec<usize> = vec![]; // Stack of indices where equalities are found.
1094        let mut last_equality = Chars::new(); // Always equal to diffs[equalities[-1]][1]
1095        let mut i: usize = 0; // Index of current position.
1096        let mut pre_ins = false; // Is there an insertion operation before the last equality.
1097        let mut pre_del = false; // Is there a deletion operation before the last equality.
1098        let mut post_ins = false; // Is there an insertion operation after the last equality.
1099        let mut post_del = false; // Is there a deletion operation after the last equality.
1100        while i < diffs.len() {
1101            if diffs[i].is_equal() {
1102                if diffs[i].text().len() < self.edit_cost as usize && (post_del || post_ins) {
1103                    // Candidate found.
1104                    equalities.push(i);
1105                    pre_ins = post_ins;
1106                    pre_del = post_del;
1107                    last_equality = diffs[i].text().clone();
1108                } else {
1109                    // Not a candidate, and can never become one.
1110                    equalities.clear();
1111                    // last_equality.clear();
1112                }
1113                post_ins = false;
1114                post_del = false;
1115            } else {
1116                // An insertion or deletion.
1117                if diffs[i].is_delete() {
1118                    post_del = true;
1119                } else {
1120                    post_ins = true;
1121                }
1122
1123                /*
1124                Five types to be split:
1125                <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1126                <ins>A</ins>X<ins>C</ins><del>D</del>
1127                <ins>A</ins><del>B</del>X<ins>C</ins>
1128                <ins>A</del>X<ins>C</ins><del>D</del>
1129                <ins>A</ins><del>B</del>X<del>C</del>
1130                */
1131
1132                if !last_equality.is_empty()
1133                    && ((pre_ins && pre_del && post_del && post_ins)
1134                        || ((last_equality.len() as i32) < self.edit_cost / 2
1135                            && (pre_ins as i32
1136                                + pre_del as i32
1137                                + post_del as i32
1138                                + post_ins as i32)
1139                                == 3))
1140                {
1141                    // Duplicate record.
1142                    diffs.insert(equalities[equalities.len() - 1], Diff::Delete(last_equality));
1143                    // Change second copy to insert.
1144                    diffs[equalities[equalities.len() - 1] + 1] =
1145                        Diff::Insert(diffs[equalities[equalities.len() - 1] + 1].text().clone());
1146                    equalities.pop(); // Throw away the equality we just deleted.
1147
1148                    last_equality = Chars::new();
1149                    if pre_ins && pre_del {
1150                        // No changes made which could affect previous entry, keep going.
1151                        post_del = true;
1152                        post_ins = true;
1153                        equalities.clear();
1154                    } else {
1155                        if !equalities.is_empty() {
1156                            equalities.pop(); // Throw away the previous equality.
1157                        }
1158                        if !equalities.is_empty() {
1159                            i = *equalities.last().unwrap();
1160                        } else {
1161                            i = 0;
1162                            // FIXME: reorder control flow. This requires following flag re-init
1163                            post_ins = false;
1164                            post_del = false;
1165                            changes = true;
1166                            continue;
1167                        }
1168                        post_ins = false;
1169                        post_del = false;
1170                    }
1171                    changes = true;
1172                }
1173            }
1174            i += 1;
1175        }
1176        if changes {
1177            self.diff_cleanup_merge(diffs);
1178        }
1179    }
1180
1181    /**
1182      Compute and return the source text (all equalities and deletions).
1183
1184      Args:
1185          diffs: Vectoe of diff object.
1186
1187      Returns:
1188          Source text.
1189    */
1190    pub fn diff_text1(&self, diffs: &[Diff]) -> Chars {
1191        let mut text = Chars::new();
1192        for d in diffs {
1193            if !d.is_insert() {
1194                text += d.text();
1195            }
1196        }
1197        text
1198    }
1199
1200    /**
1201      Compute and return the destination text (all equalities and insertions).
1202
1203      Args:
1204          diffs: Vector of diff object.
1205
1206      Returns:
1207          destination text.
1208    */
1209    pub fn diff_text2(&self, diffs: &[Diff]) -> Chars {
1210        let mut text = Chars::new();
1211        for d in diffs {
1212            if !d.is_delete() {
1213                text += d.text();
1214            }
1215        }
1216        text
1217    }
1218
1219    /**
1220    loc is a location in text1, compute and return the equivalent location
1221    in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
1222
1223    Args:
1224        diffs: Vector of diff object.
1225        loc: Location within text1.
1226
1227    Returns:
1228        Location within text2.
1229    */
1230    pub fn diff_xindex(&self, diffs: &[Diff], loc: usize) -> usize {
1231        let mut chars1 = 0;
1232        let mut chars2 = 0;
1233        let mut last_chars1 = 0;
1234        let mut last_chars2 = 0;
1235        let mut lastdiff = &Diff::empty();
1236        let z = 0;
1237        for diffs_item in diffs {
1238            if !diffs_item.is_insert() {
1239                // Equality or deletion.
1240                chars1 += diffs_item.text().len();
1241            }
1242            if !diffs_item.is_delete() {
1243                // Equality or insertion.
1244                chars2 += diffs_item.text().len();
1245            }
1246            if chars1 > loc {
1247                // Overshot the location.
1248                lastdiff = diffs_item;
1249                break;
1250            }
1251            last_chars1 = chars1;
1252            last_chars2 = chars2;
1253        }
1254        if lastdiff.is_delete() && diffs.len() != z {
1255            // The location was deleted.
1256            return last_chars2;
1257        }
1258        // Add the remaining len(character).
1259        last_chars2 + (loc - last_chars1)
1260    }
1261
1262    /*
1263      Compute the Levenshtein distance; the number of inserted, deleted or
1264      substituted characters.
1265
1266      Args:
1267          diffs: Vector of diff object.
1268
1269      Returns:
1270          Number of changes.
1271    */
1272    pub fn diff_levenshtein(&self, diffs: &[Diff]) -> usize {
1273        let mut levenshtein = 0;
1274        let mut insertions = 0;
1275        let mut deletions = 0;
1276        for adiff in diffs {
1277            if adiff.is_insert() {
1278                insertions += adiff.text().len();
1279            } else if adiff.is_delete() {
1280                deletions += adiff.text().len();
1281            } else {
1282                // A deletion and an insertion is one substitution.
1283                levenshtein += usize::max(insertions, deletions);
1284                insertions = 0;
1285                deletions = 0;
1286            }
1287        }
1288        levenshtein += usize::max(insertions, deletions);
1289        levenshtein
1290    }
1291
1292    pub fn diff_to_delta(&self, diffs: &[Diff]) -> String {
1293        diffs
1294            .iter()
1295            .map(|d| match d {
1296                Diff::Insert(text) => {
1297                    format!("+{}", text.to_safe_encode())
1298                }
1299                Diff::Delete(text) => {
1300                    format!("-{}", text.len())
1301                }
1302                Diff::Equal(text) => {
1303                    format!("={}", text.len())
1304                }
1305            })
1306            .collect::<Vec<_>>()
1307            .join("\t")
1308    }
1309
1310    pub fn diff_from_delta(&self, text1: &Chars, delta: &str) -> Result<Vec<Diff>, ()> {
1311        let mut diffs: Vec<Diff> = vec![];
1312        let mut pointer: usize = 0; // Cursor in text1
1313        for token in delta.split('\t') {
1314            if token.is_empty() {
1315                continue;
1316            }
1317            let (operation, param) = token.split_at(1);
1318            if operation == "+" {
1319                match Chars::from(param).to_safe_decode() {
1320                    Err(_) => return Err(()),
1321                    Ok(p) => diffs.push(Diff::Insert(p)),
1322                };
1323            } else if operation == "-" || operation == "=" {
1324                let n: isize = param.parse().map_err(|_| ())?;
1325                if n < 0 {
1326                    return Err(());
1327                }
1328                let to = pointer + (n as usize);
1329                let text = Chars::from(&text1[pointer..to]);
1330                pointer = to;
1331                if operation == "=" {
1332                    diffs.push(Diff::Equal(text));
1333                } else {
1334                    diffs.push(Diff::Delete(text));
1335                }
1336            } else {
1337                return Err(());
1338            }
1339        }
1340        if pointer != text1.len() {
1341            return Err(());
1342        }
1343        Ok(diffs)
1344    }
1345
1346    pub fn diff_to_html(&self, diffs: &[Diff]) -> String {
1347        diffs
1348            .iter()
1349            .map(|d| match d {
1350                Diff::Insert(text) => {
1351                    format!("<ins>{text}</ins>")
1352                }
1353                Diff::Delete(text) => {
1354                    format!("<del>{text}</del>")
1355                }
1356                Diff::Equal(text) => {
1357                    format!("{text}")
1358                }
1359            })
1360            .collect::<Vec<_>>()
1361            .concat()
1362    }
1363
1364    /**
1365    Find the 'middle snake' of a diff, split the problem in two
1366    and return the recursively constructed diff.
1367    See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
1368
1369    Args:
1370        text1: Old chars to be diffed.
1371        text2: New chars to be diffed.
1372
1373    Returns:
1374            Vector of diffs as changes.
1375    */
1376    pub fn diff_bisect(&mut self, text1: &[char], text2: &[char]) -> Vec<Diff> {
1377        self.diff_bisect_internal(text1, text2, Instant::now())
1378    }
1379
1380    fn diff_bisect_internal(
1381        &self,
1382        text1: &[char],
1383        text2: &[char],
1384        start_time: Instant,
1385    ) -> Vec<Diff> {
1386        let text1_length = text1.len() as isize;
1387        let text2_length = text2.len() as isize;
1388        let max_d: isize = (text1_length + text2_length + 1) / 2;
1389        let v_offset: isize = max_d;
1390        let v_length: isize = 2 * max_d;
1391        let mut v1: Vec<isize> = vec![-1; v_length as usize];
1392        let mut v2: Vec<isize> = vec![-1; v_length as usize];
1393        v1[v_offset as usize + 1] = 0;
1394        v2[v_offset as usize + 1] = 0;
1395        let delta: isize = text1_length - text2_length;
1396        // If the total number of characters is odd, then the front path will
1397        // collide with the reverse path.
1398        let front: isize = (delta % 2 != 0) as isize;
1399        // Offsets for start and end of k loop.
1400        // Prevents mapping of space beyond the grid.
1401        let mut k1start: isize = 0;
1402        let mut k1end: isize = 0;
1403        let mut k2start: isize = 0;
1404        let mut k2end: isize = 0;
1405        for d in 0..max_d {
1406            if self.diff_timeout.is_some()
1407                && start_time.elapsed() >= *self.diff_timeout.as_ref().unwrap()
1408            {
1409                break;
1410            }
1411
1412            let d1 = d;
1413            let mut k1 = -d1 + k1start;
1414            let mut x1: isize;
1415            let mut k1_offset: isize;
1416            let mut k2_offset;
1417            let mut x2;
1418            let mut y1;
1419            // Walk the front path one step.
1420            while k1 < d1 + 1 - k1end {
1421                k1_offset = v_offset + k1;
1422                if k1 == -d1
1423                    || (k1 != d1 && v1[k1_offset as usize - 1] < v1[k1_offset as usize + 1])
1424                {
1425                    x1 = v1[k1_offset as usize + 1];
1426                } else {
1427                    x1 = v1[k1_offset as usize - 1] + 1;
1428                }
1429                y1 = x1 - k1;
1430                while x1 < text1_length && y1 < text2_length {
1431                    let i1 = if x1 < 0 {
1432                        text1_length + x1
1433                    } else {
1434                        x1
1435                    };
1436                    let i2 = if y1 < 0 {
1437                        text2_length + y1
1438                    } else {
1439                        y1
1440                    };
1441                    if text1[i1 as usize] != text2[i2 as usize] {
1442                        break;
1443                    }
1444                    x1 += 1;
1445                    y1 += 1;
1446                }
1447                v1[k1_offset as usize] = x1;
1448                if x1 > text1_length {
1449                    // Ran off the right of the graph.
1450                    k1end += 2;
1451                } else if y1 > text2_length {
1452                    // Ran off the bottom of the graph.
1453                    k1start += 2;
1454                } else if front != 0 {
1455                    k2_offset = v_offset + delta - k1;
1456                    if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset as usize] != -1 {
1457                        // Mirror x2 onto top-left coordinate system.
1458                        x2 = text1_length - v2[k2_offset as usize];
1459                        if x1 >= x2 {
1460                            // Overlap detected.
1461                            return self.diff_bisect_split(
1462                                text1,
1463                                text2,
1464                                x1 as usize,
1465                                y1 as usize,
1466                                start_time,
1467                            );
1468                        }
1469                    }
1470                }
1471                k1 += 2;
1472            }
1473            let mut k2 = -d1 + k2start;
1474            let mut y2;
1475            // Walk the reverse path one step.
1476            while k2 < d1 + 1 - k2end {
1477                k2_offset = v_offset + k2;
1478                if k2 == -d1
1479                    || (k2 != d1 && v2[k2_offset as usize - 1] < v2[k2_offset as usize + 1])
1480                {
1481                    x2 = v2[k2_offset as usize + 1];
1482                } else {
1483                    x2 = v2[k2_offset as usize - 1] + 1;
1484                }
1485                y2 = x2 - k2;
1486                while x2 < text1_length && y2 < text2_length {
1487                    let i1 = if text1_length - x2 > 0 {
1488                        text1_length - x2 - 1
1489                    } else {
1490                        x2 + 1
1491                    };
1492                    let i2 = if text2_length - y2 > 0 {
1493                        text2_length - y2 - 1
1494                    } else {
1495                        y2 + 1
1496                    };
1497                    if text1[i1 as usize] != text2[i2 as usize] {
1498                        break;
1499                    }
1500                    x2 += 1;
1501                    y2 += 1;
1502                }
1503                v2[k2_offset as usize] = x2;
1504                if x2 > text1_length {
1505                    // Ran off the left of the graph.
1506                    k2end += 2;
1507                } else if y2 > text2_length {
1508                    // Ran off the top of the graph.
1509                    k2start += 2;
1510                } else if front == 0 {
1511                    k1_offset = v_offset + delta - k2;
1512                    if k1_offset >= 0 && k1_offset < v_length && v1[k1_offset as usize] != -1 {
1513                        x1 = v1[k1_offset as usize];
1514                        y1 = v_offset + x1 - k1_offset;
1515                        // Mirror x2 onto top-left coordinate system.
1516                        x2 = text1_length - x2;
1517                        if x1 >= x2 {
1518                            // Overlap detected.
1519                            return self.diff_bisect_split(
1520                                text1,
1521                                text2,
1522                                x1 as usize,
1523                                y1 as usize,
1524                                start_time,
1525                            );
1526                        }
1527                    }
1528                }
1529                k2 += 2;
1530            }
1531        }
1532        // number of diffs equals number of characters, no commonality at all.
1533        vec![Diff::Delete(text1.into()), Diff::Insert(text2.into())]
1534    }
1535
1536    /**
1537    Given the location of the 'middle snake', split the diff in two parts
1538    and recurse.
1539
1540    Args:
1541        text1: Old text1 to be diffed.
1542        text2: New text1 to be diffed.
1543        x: Index of split point in text1.
1544        y: Index of split point in text2.
1545
1546    Returns:
1547            Vector of diffs as changes.
1548    */
1549    fn diff_bisect_split(
1550        &self,
1551        text1: &[char],
1552        text2: &[char],
1553        x: usize,
1554        y: usize,
1555        start_time: Instant,
1556    ) -> Vec<Diff> {
1557        let text1a = &text1[..x];
1558        let text2a = &text2[..y];
1559        let text1b = &text1[x..];
1560        let text2b = &text2[y..];
1561
1562        // Compute both diffs serially.
1563        let mut diffs = self.diff_main_internal(text1a, text2a, false, start_time);
1564        let mut diffsb = self.diff_main_internal(text1b, text2b, false, start_time);
1565        diffs.append(&mut diffsb);
1566        diffs
1567    }
1568
1569    /**
1570    Find the differences between two texts.  Simplifies the problem by
1571      stripping any common prefix or suffix off the texts before diffing.
1572
1573    Args:
1574        text1: Old string to be diffed.
1575        text2: New string to be diffed.
1576        checklines: Optional speedup flag. If present and false, then don't run
1577            a line-level diff first to identify the changed areas.
1578            Defaults to true, which does a faster, slightly less optimal diff.
1579    Returns:
1580        Vector of diffs as changes.
1581    */
1582    pub fn diff_main(&self, text1: &[char], text2: &[char], checklines: bool) -> Vec<Diff> {
1583        self.diff_main_internal(text1, text2, checklines, Instant::now())
1584    }
1585
1586    fn diff_main_internal(
1587        &self,
1588        text1: &[char],
1589        text2: &[char],
1590        checklines: bool,
1591        start_time: Instant,
1592    ) -> Vec<Diff> {
1593        let mut text1 = text1;
1594        let mut text2 = text2;
1595        // check for empty text
1596        if text1.is_empty() && text2.is_empty() {
1597            return vec![];
1598        } else if text1.is_empty() {
1599            return vec![Diff::Insert(text2.into())];
1600        } else if text2.is_empty() {
1601            return vec![Diff::Delete(text1.into())];
1602        }
1603
1604        // check for equality
1605        if text1 == text2 {
1606            return vec![Diff::Equal(text1.into())];
1607        }
1608
1609        // Trim off common prefix (speedup).
1610        let mut commonlength = self.diff_common_prefix(text1, text2);
1611        let commonprefix = &text1[0..commonlength];
1612        text1 = &text1[commonlength..];
1613        text2 = &text2[commonlength..];
1614
1615        // Trim off common suffix (speedup).
1616        commonlength = self.diff_common_suffix(text1, text2);
1617        let commonsuffix = &text1[(text1.len() - commonlength)..];
1618        text1 = &text1[..(text1.len() - commonlength)];
1619        text2 = &text2[..(text2.len() - commonlength)];
1620
1621        let mut diffs: Vec<Diff> = Vec::new();
1622
1623        //Restore the prefix
1624        if !commonprefix.is_empty() {
1625            diffs.push(Diff::Equal(commonprefix.into()));
1626        }
1627
1628        // Compute the diff on the middle block.
1629        let temp = self.diff_compute(text1, text2, checklines, start_time);
1630        for z in temp {
1631            diffs.push(z);
1632        }
1633
1634        // Restore the suffix
1635        if !commonsuffix.is_empty() {
1636            diffs.push(Diff::Equal(commonsuffix.into()));
1637        }
1638        self.diff_cleanup_merge(&mut diffs);
1639        diffs
1640    }
1641
1642    /**
1643    Find the differences between two texts.  Assumes that the texts do not
1644    have any common prefix or suffix.
1645
1646    Args:
1647        text1: Old chars to be diffed.
1648        text2: New chars to be diffed.
1649        checklines: Speedup flag.  If false, then don't run a line-level diff
1650        first to identify the changed areas.
1651        If true, then run a faster, slightly less optimal diff.
1652
1653    Returns:
1654        Vector of diffs as changes.
1655    */
1656    fn diff_compute(
1657        &self,
1658        text1: &[char],
1659        text2: &[char],
1660        checklines: bool,
1661        start_time: Instant,
1662    ) -> Vec<Diff> {
1663        let mut diffs: Vec<Diff> = Vec::new();
1664        if text1.is_empty() {
1665            // Just add some text (speedup).
1666            diffs.push(Diff::Insert(text2.into()));
1667            return diffs;
1668        } else if text2.is_empty() {
1669            // Just delete some text (speedup).
1670            diffs.push(Diff::Delete(text1.into()));
1671            return diffs;
1672        }
1673        {
1674            let len1 = text1.len();
1675            let len2 = text2.len();
1676            let (longtext, shorttext) = if len1 >= len2 {
1677                (text1, text2)
1678            } else {
1679                (text2, text1)
1680            };
1681            if let Some(i) = self.kmp(longtext, shorttext, 0) {
1682                // Shorter text is inside the longer text (speedup).
1683                if len1 > len2 {
1684                    if i != 0 {
1685                        diffs.push(Diff::Delete(text1[..i].into()));
1686                    }
1687                    diffs.push(Diff::Equal(text2.into()));
1688                    if i + text2.len() != text1.len() {
1689                        diffs.push(Diff::Delete(text1[i + text2.len()..].into()));
1690                    }
1691                } else {
1692                    if i != 0 {
1693                        diffs.push(Diff::Insert(text2[..i].into()));
1694                    }
1695                    diffs.push(Diff::Equal(text1.into()));
1696                    if i + text1.len() != text2.len() {
1697                        diffs.push(Diff::Insert(text2[i + text1.len()..].into()));
1698                    }
1699                }
1700                return diffs;
1701            }
1702            if shorttext.len() == 1 {
1703                // Single character string.
1704                // After the previous speedup, the character can't be an equality.
1705                diffs.push(Diff::Delete(text1.into()));
1706                diffs.push(Diff::Insert(text2.into()));
1707                return diffs;
1708            }
1709        }
1710        // Check to see if the problem can be split in two.
1711        let hm = self.diff_half_match(text1, text2);
1712        if let Some(hm) = hm {
1713            // A half-match was found, sort out the return data.
1714            match hm[..] {
1715                [text1_a, text1_b, text2_a, text2_b, mid_common] => {
1716                    // Send both pairs off for separate processing.
1717                    let mut diffs_a =
1718                        self.diff_main_internal(text1_a, text2_a, checklines, start_time);
1719                    let diffs_b = self.diff_main_internal(text1_b, text2_b, checklines, start_time);
1720                    // Merge the result.
1721                    diffs_a.push(Diff::Equal(mid_common.into()));
1722                    diffs_a.extend(diffs_b);
1723                    return diffs_a;
1724                }
1725                _ => unreachable!("vec used as 5-tuple"),
1726            }
1727        }
1728
1729        if checklines && text1.len() > 100 && text2.len() > 100 {
1730            self.diff_linemode_internal(text1, text2, start_time)
1731        } else {
1732            self.diff_bisect_internal(text1, text2, start_time)
1733        }
1734    }
1735
1736    /**
1737    Do a quick line-level diff on both chars, then rediff the parts for
1738    greater accuracy.
1739    This speedup can produce non-minimal diffs.
1740
1741    Args:
1742        text1: Old chars to be diffed.
1743        text2: New chars to be diffed.
1744
1745    Returns:
1746        Vector of diffs as changes.
1747    */
1748    pub fn diff_linemode(&mut self, text1: &[char], text2: &[char]) -> Vec<Diff> {
1749        self.diff_linemode_internal(text1, text2, Instant::now())
1750    }
1751
1752    fn diff_linemode_internal(
1753        &self,
1754        text1: &[char],
1755        text2: &[char],
1756        start_time: Instant,
1757    ) -> Vec<Diff> {
1758        // Scan the text on a line-by-line basis first.
1759        let (text3, text4, linearray) = self.diff_lines_to_chars(text1, text2);
1760
1761        let dmp = DiffMatchPatch::new();
1762        let mut diffs: Vec<Diff> = dmp.diff_main_internal(&text3, &text4, false, start_time);
1763
1764        // Convert the diff back to original text.
1765        self.diff_chars_to_lines(&mut diffs, &linearray);
1766        // Eliminate freak matches (e.g. blank lines)
1767        self.diff_cleanup_semantic(&mut diffs);
1768
1769        // Rediff any replacement blocks, this time character-by-character.
1770        // Add a dummy entry at the end.
1771        diffs.push(Diff::empty());
1772        let mut count_delete = 0;
1773        let mut count_insert = 0;
1774        let mut text_delete = Chars::new();
1775        let mut text_insert = Chars::new();
1776        let mut i = 0;
1777        let mut temp: Vec<Diff> = vec![];
1778        while i < diffs.len() {
1779            if diffs[i].is_insert() {
1780                count_insert += 1;
1781                text_insert += diffs[i].text();
1782            } else if diffs[i].is_delete() {
1783                count_delete += 1;
1784                text_delete += diffs[i].text();
1785            } else {
1786                // Upon reaching an equality, check for prior redundancies.
1787                if count_delete >= 1 && count_insert >= 1 {
1788                    // Delete the offending records and add the merged ones.
1789                    let sub_diff =
1790                        self.diff_main_internal(&text_delete, &text_insert, false, start_time);
1791                    for z in sub_diff {
1792                        temp.push(z);
1793                    }
1794                    temp.push(diffs[i].clone());
1795                } else {
1796                    if !text_delete.is_empty() {
1797                        temp.push(Diff::Delete(text_delete));
1798                    }
1799                    if !text_insert.is_empty() {
1800                        temp.push(Diff::Insert(text_insert));
1801                    }
1802                    temp.push(diffs[i].clone());
1803                }
1804                count_delete = 0;
1805                count_insert = 0;
1806                text_delete = Chars::new();
1807                text_insert = Chars::new();
1808            }
1809            i += 1;
1810        }
1811        temp.pop(); //Remove the dummy entry at the end.
1812        temp
1813    }
1814
1815    // unimplemented:
1816    // DiffPrettyHtml
1817    // DiffFromDelta
1818
1819    /// Patch make method 1
1820    /// a = text1, b = text2
1821    pub fn patch_make1(&mut self, text1: &[char], text2: &[char]) -> Vec<Patch> {
1822        let checklines = false;
1823        let mut diffs = self.diff_main(text1, text2, checklines);
1824        if diffs.len() > 2 {
1825            if checklines {
1826                self.diff_cleanup_semantic(&mut diffs);
1827            }
1828            self.diff_cleanup_efficiency(&mut diffs);
1829        }
1830
1831        if diffs.is_empty() {
1832            return vec![];
1833        }
1834
1835        let mut patches = vec![];
1836        let mut patch = Patch::default();
1837
1838        let mut char_count1 = 0;
1839        let mut char_count2 = 0;
1840
1841        let mut prepatch_text: Vec<_> = text1.to_vec();
1842        let mut postpatch_text: Vec<_> = text1.to_vec();
1843
1844        for (x, diff) in diffs.iter().enumerate() {
1845            if patch.diffs.is_empty() && !diff.is_equal() {
1846                // A new patch starts here.
1847                patch.start1 = char_count1;
1848                patch.start2 = char_count2;
1849            }
1850
1851            if diff.is_insert() {
1852                patch.diffs.push(diff.clone());
1853                patch.length2 += diff.text().len();
1854
1855                let tmp = postpatch_text[char_count2..].to_vec();
1856                postpatch_text = postpatch_text[..char_count2].to_vec();
1857                postpatch_text.extend_from_slice(diff.text());
1858                postpatch_text.extend(tmp);
1859            } else if diff.is_delete() {
1860                patch.length1 += diff.text().len();
1861                patch.diffs.push(diff.clone());
1862
1863                let tmp = postpatch_text[char_count2 + diff.text().len()..].to_vec();
1864                postpatch_text = postpatch_text[..char_count2].to_vec();
1865                postpatch_text.extend(tmp);
1866            } else if diff.is_equal()
1867                && diff.text().len() <= 2 * self.patch_margin
1868                && !patch.diffs.is_empty()
1869                && diffs.len() != x + 1
1870            {
1871                // Small equality inside a patch.
1872                patch.diffs.push(diff.clone());
1873                patch.length1 += diff.text().len();
1874                patch.length2 += diff.text().len();
1875            }
1876
1877            if diff.is_equal() && diff.text().len() >= 2 * self.patch_margin {
1878                // Time for a new patch.
1879                if !patch.diffs.is_empty() {
1880                    self.patch_add_context(&mut patch, &prepatch_text);
1881                    patches.push(patch);
1882                    patch = Patch::default();
1883                    // Unlike Unidiff, our patch lists have a rolling context.
1884                    // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1885                    // Update prepatch text & pos to reflect the application of the just completed
1886                    // patch.
1887                    prepatch_text = postpatch_text.to_vec();
1888                    char_count1 = char_count2;
1889                }
1890            }
1891
1892            // Update the current character count.
1893            if !diff.is_insert() {
1894                char_count2 += diff.text().len();
1895            }
1896            if !diff.is_delete() {
1897                char_count1 += diff.text().len();
1898            }
1899        }
1900
1901        // Pick up the leftover patch if not empty.
1902        if !patch.diffs.is_empty() {
1903            self.patch_add_context(&mut patch, &prepatch_text);
1904            patches.push(patch);
1905        }
1906
1907        patches
1908    }
1909
1910    /*
1911    Increase the context until it is unique,
1912             but don't let the pattern expand beyond Match_MaxBits.
1913
1914             Args:
1915                 patch: The patch to grow.
1916                 text: Source text.
1917    */
1918    fn patch_add_context(&mut self, patch: &mut Patch, text: &[char]) {
1919        if text.is_empty() {
1920            return;
1921        }
1922
1923        let mut pattern = &text[patch.start1..patch.start1 + patch.length1];
1924        let mut padding = 0;
1925
1926        // Look for the first and last matches of pattern in text.  If two different
1927        // matches are found, increase the pattern length.
1928        while text.windows(pattern.len()).filter(|w| *w == pattern).nth(1).is_some()
1929            && (self.match_maxbits == 0
1930                || (pattern.len() < self.match_maxbits - self.patch_margin * 2))
1931        {
1932            padding += self.patch_margin;
1933            pattern = &text[patch.start2.checked_sub(padding).unwrap_or(0)
1934                ..patch.start2 + patch.length1 + padding];
1935        }
1936
1937        // Add one chunk for good luck.
1938        padding += self.patch_margin;
1939
1940        // Add the prefix.
1941        // TODO: max 0
1942        let prefix = &text[patch.start2.checked_sub(padding).unwrap_or(0)..patch.start2];
1943        if !prefix.is_empty() {
1944            patch.diffs.insert(0, Diff::Equal(prefix.into()));
1945        }
1946        // Add the suffix.
1947        let suffix = &text[patch.start2 + patch.length1
1948            ..usize::min(patch.start2 + patch.length1 + padding, text.len())];
1949        if !suffix.is_empty() {
1950            patch.diffs.push(Diff::Equal(suffix.into()));
1951        }
1952
1953        // Roll back the start points.
1954        patch.start1 -= prefix.len();
1955        patch.start2 -= prefix.len();
1956
1957        // Extend the lengths.
1958        patch.length1 += prefix.len() + suffix.len();
1959        patch.length2 += prefix.len() + suffix.len();
1960    }
1961
1962    pub fn patch_make2(&mut self, diffs: &mut Vec<Diff>) -> Vec<Patch> {
1963        todo!()
1964    }
1965}
diffmatchpatch/lib.rs

diffmatchpatch/
lib.rs