diff_match_patch_rs/
dmp.rs

1use core::str;
2use std::{char, collections::HashMap, fmt::Display};
3
4#[cfg(target_arch = "wasm32")]
5use chrono::{NaiveTime, TimeDelta, Utc};
6#[cfg(not(target_arch = "wasm32"))]
7use std::time::{Duration, Instant};
8
9#[cfg(target_arch = "wasm32")]
10pub(crate) type Time = NaiveTime;
11#[cfg(not(target_arch = "wasm32"))]
12pub(crate) type Time = Instant;
13
14use crate::{errors::Error, html::HtmlConfig, DType, PatchInput};
15
16/// Enum representing the different ops of diff
17#[derive(Debug, PartialEq, Eq, Clone, Copy)]
18pub enum Ops {
19    Delete = -1,
20    Equal,
21    Insert,
22}
23
24/// A structure representing a diff
25/// (Ops::Delete, String::new("Hello")) means delete `Hello`
26/// (Ops::Insert, String::new("Goodbye")) means add `Goodbye`
27/// (Ops::Equal, String::new("World")) means keep world
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct Diff<T: DType>(pub(crate) Ops, pub(crate) Vec<T>);
30
31impl Display for Diff<u8> {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        write!(
34            f,
35            "({:?}, {})",
36            self.op(),
37            std::str::from_utf8(self.data()).unwrap()
38        )
39    }
40}
41
42impl Display for Diff<char> {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        write!(
45            f,
46            "({:?}, {})",
47            self.op(),
48            self.data().iter().collect::<String>()
49        )
50    }
51}
52
53impl<T: DType> Diff<T> {
54    /// Create a new diff object
55    pub fn new(op: Ops, data: &[T]) -> Self {
56        Self(op, data.to_vec())
57    }
58
59    /// helper functions to create op::Delete
60    pub fn delete(data: &[T]) -> Self {
61        Self::new(Ops::Delete, data)
62    }
63
64    /// helper functions to create op::Insert
65    pub fn insert(data: &[T]) -> Self {
66        Self::new(Ops::Insert, data)
67    }
68
69    /// helper functions to create op::Equal
70    pub fn equal(data: &[T]) -> Self {
71        Self::new(Ops::Equal, data)
72    }
73
74    /// returns the operation of the current diff
75    pub fn op(&self) -> Ops {
76        self.0
77    }
78
79    /// returns slice of the data
80    pub fn data(&self) -> &[T] {
81        &self.1[..]
82    }
83
84    // returns length of data
85    pub fn size(&self) -> usize {
86        self.1.len()
87    }
88}
89
90pub struct DiffMatchPatch {
91    /// a speedup flag, If present and false, then don't run
92    /// a line-level diff first to identify the changed areas.
93    /// Defaults to true, which does a faster, slightly less optimal diff.
94    checklines: bool,
95    /// A default timeout in num milliseconda, defaults to 1000 (1 second)
96    timeout: Option<u32>,
97    // Cost of an empty edit operation in terms of edit characters. Defaults to 4
98    edit_cost: usize,
99    /// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
100    match_threshold: f32,
101    /// How far to search for a match (0 = exact location, 1000+ = broad match).
102    /// A match this many characters away from the expected location will add
103    /// 1.0 to the score (0.0 is a perfect match).
104    /// int Match_Distance;
105    match_distance: usize,
106    /// The number of bits in an int.
107    match_max_bits: usize,
108    /// When deleting a large block of text (over ~64 characters), how close does
109    /// the contents have to match the expected contents. (0.0 = perfection,
110    /// 1.0 = very loose).  Note that `match_threshold` controls how closely the
111    /// end points of a delete need to match.
112    delete_threshold: f32,
113    /// Chunk size for context length.
114    patch_margin: u8,
115}
116
117impl Default for DiffMatchPatch {
118    fn default() -> Self {
119        Self {
120            checklines: true,
121            timeout: Some(1000),
122            edit_cost: 4,
123            match_threshold: 0.5,
124            match_distance: 1000,
125            match_max_bits: 32,
126            patch_margin: 4,
127            delete_threshold: 0.5,
128        }
129    }
130}
131
132#[derive(Debug, PartialEq, Eq)]
133struct HalfMatch<'a, T: DType> {
134    prefix_long: &'a [T],
135    suffix_long: &'a [T],
136    prefix_short: &'a [T],
137    suffix_short: &'a [T],
138    common: &'a [T],
139}
140
141impl DiffMatchPatch {
142    fn checklines(&self) -> bool {
143        self.checklines
144    }
145
146    /// Enables or disables `line mode` optimization.
147    /// When enabled, the diff algorithm tries to find the `lines` that have changes and apply diff on the same
148    ///
149    /// This optimization makes sense for text with many lines (~100s), defaults to `true`
150    pub fn set_checklines(&mut self, checklines: bool) {
151        self.checklines = checklines;
152    }
153
154    // returns the configured timeout, defaults to `1`, None or `0` would mean infinite timeout
155    fn timeout(&self) -> Option<i64> {
156        self.timeout.map(|t| t as i64)
157    }
158
159    // returns the current edit cost saved
160    fn edit_cost(&self) -> usize {
161        self.edit_cost
162    }
163
164    /// Update edit cost
165    pub fn set_edit_cost(&mut self, edit_cost: usize) {
166        self.edit_cost = edit_cost;
167    }
168
169    /// Set a timeout in number of `milliseconds`. This creates a cutoff for internal `recursive` function calls
170    ///
171    /// Defaults to `1000ms` (1 second)
172    ///
173    /// None means `infinite time`
174    pub fn set_timeout(&mut self, tout: Option<u32>) {
175        self.timeout = tout;
176    }
177
178    /// creates a deadline from the given timeout
179    #[cfg(target_arch = "wasm32")]
180    pub fn deadline(&self) -> Option<Time> {
181        self.timeout()
182            .and_then(|t| Utc::now().checked_add_signed(TimeDelta::milliseconds(t)))
183            .map(|t| t.time())
184    }
185
186    #[cfg(not(target_arch = "wasm32"))]
187    pub fn deadline(&self) -> Option<Time> {
188        self.timeout()
189            .and_then(|t| Instant::now().checked_add(Duration::from_millis(t as u64)))
190    }
191
192    // returns configured match_threshold
193    fn match_threshold(&self) -> f32 {
194        self.match_threshold
195    }
196
197    /// The `match_threshold` property determines the cut-off value for a valid match.
198    /// If `match_threshold` is closer to 0, the requirements for accuracy increase.
199    /// If `match_threshold` is closer to 1 then it is more likely that a match will be found.
200    /// The `match_threshold` is, the slower `match_main()` may take to compute.
201    ///
202    /// defaults to 0.5
203    pub fn set_match_threshold(&mut self, threshold: f32) {
204        self.match_threshold = threshold
205    }
206
207    // returns the current patch margin
208    fn patch_margin(&self) -> u8 {
209        self.patch_margin
210    }
211
212    // returns the configured patch delete threshold
213    fn delete_threshold(&self) -> f32 {
214        self.delete_threshold
215    }
216
217    /// When deleting a large block of text (over ~64 characters), how close does
218    /// the contents have to match the expected contents. (0.0 = perfection,
219    /// 1.0 = very loose).  Note that `match_threshold` controls how closely the
220    /// end points of a delete need to match.
221    ///
222    /// Defaults to `0.5`
223    pub fn set_delete_threshold(&mut self, threshold: f32) {
224        self.delete_threshold = threshold;
225    }
226
227    // returns the configured max_bits
228    fn match_max_bits(&self) -> usize {
229        self.match_max_bits
230    }
231
232    fn match_distance(&self) -> usize {
233        self.match_distance
234    }
235
236    /// How far to search for a match (0 = exact location, 1000+ = broad match).
237    /// A match this many characters away from the expected location will add
238    /// 1.0 to the score (0.0 is a perfect match).
239    pub fn set_match_distance(&mut self, distance: usize) {
240        self.match_distance = distance
241    }
242
243    pub(crate) fn diff_internal<'a, T: DType>(
244        &self,
245        old_t: &'a [T],
246        new_t: &'a [T],
247        linemode: bool,
248        deadline: Option<Time>,
249    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
250        // First, check if lhs and rhs are equal
251        if old_t == new_t {
252            if old_t.is_empty() {
253                return Ok(Vec::new());
254            }
255
256            return Ok(vec![Diff::equal(old_t)]);
257        }
258
259        if old_t.is_empty() {
260            return Ok(vec![Diff::insert(new_t)]);
261        }
262
263        if new_t.is_empty() {
264            return Ok(vec![Diff::delete(old_t)]);
265        }
266
267        // Trim common prefix
268        let (common_prefix, common_suffix) = Self::common_prefix_suffix(old_t, new_t);
269
270        let (old_uniq_end, new_uniq_end) =
271            (old_t.len() - common_suffix, new_t.len() - common_suffix);
272
273        // Some elaborate checks to avoid bound checks
274        // slice_start_index_len_fail
275        // slice_end_index_len_fail
276        // slice_index_order_fail
277        let mut diffs = if common_prefix <= old_t.len()
278            && common_prefix <= new_t.len()
279            && old_uniq_end <= old_t.len()
280            && new_uniq_end <= new_t.len()
281            && common_prefix <= old_uniq_end
282            && common_prefix <= new_uniq_end
283        {
284            self.compute(
285                &old_t[common_prefix..old_uniq_end],
286                &new_t[common_prefix..new_uniq_end],
287                linemode,
288                deadline,
289            )?
290        } else {
291            // This is part of the bound check optimization! Should never fail!
292            return Err(crate::errors::Error::InvalidInput);
293        };
294
295        // Restore the prefix and suffix.
296        if common_prefix > 0 && common_prefix <= old_t.len() {
297            let mut d = vec![Diff::equal(&old_t[..common_prefix])];
298            d.append(&mut diffs);
299            diffs = d;
300        }
301
302        if common_suffix > 0 && new_t.len() >= common_suffix && new_uniq_end < new_t.len() {
303            diffs.push(Diff::new(Ops::Equal, &new_t[new_uniq_end..]));
304        }
305
306        Self::cleanup_merge(&mut diffs);
307
308        Ok(diffs)
309    }
310
311    fn compute<'a, T: DType>(
312        &self,
313        old: &'a [T],
314        new: &'a [T],
315        linemode: bool,
316        deadline: Option<Time>,
317    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
318        // returning all of the new part
319        if old.is_empty() {
320            return Ok(vec![Diff::insert(new)]);
321        }
322
323        // return everything deleted
324        if new.is_empty() {
325            return Ok(vec![Diff::delete(old)]);
326        }
327
328        let (long, short, old_gt_new) = if old.len() > new.len() {
329            (old, new, true)
330        } else {
331            (new, old, false)
332        };
333
334        // found a subsequence which contains the short text
335        if let Some(idx) = long.windows(short.len()).position(|k| k == short) {
336            // Shorter text is inside the longer text (speedup).
337            let op = if old_gt_new { Ops::Delete } else { Ops::Insert };
338            let diffs = vec![
339                Diff::new(op, if idx <= long.len() { &long[..idx] } else { &[] }),
340                Diff::equal(short),
341                Diff::new(
342                    op,
343                    if idx + short.len() < long.len() {
344                        &long[idx + short.len()..]
345                    } else {
346                        &[]
347                    },
348                ),
349            ];
350
351            return Ok(diffs);
352        }
353
354        if short.len() == 1 {
355            // After previous case, this can't be an equality
356            return Ok(vec![Diff::delete(old), Diff::insert(new)]);
357        }
358
359        // Check if the problem can be split in two
360        if let Some(half_match) = self.half_match(old, new) {
361            let old_a = half_match.prefix_long;
362            let old_b = half_match.suffix_long;
363
364            let new_a = half_match.prefix_short;
365            let new_b = half_match.suffix_short;
366
367            let mid_common = half_match.common;
368
369            // Send both pairs off for separate processing.
370            let mut diffs_a = match self.diff_internal(old_a, new_a, linemode, deadline) {
371                Ok(d) => d,
372                Err(_) => return Err(crate::errors::Error::Utf8Error),
373            };
374            let mut diffs_b = match self.diff_internal(old_b, new_b, linemode, deadline) {
375                Ok(d) => d,
376                Err(_) => return Err(crate::errors::Error::Utf8Error),
377            };
378
379            // Merge the results
380            diffs_a.push(Diff::equal(mid_common));
381            diffs_a.append(&mut diffs_b);
382
383            return Ok(diffs_a);
384        }
385
386        if linemode && old.len() > 100 && new.len() > 100 {
387            return self.line_mode(old, new, deadline);
388        }
389
390        match self.bisect(old, new, deadline) {
391            Ok(b) => Ok(b),
392            Err(_) => Err(crate::errors::Error::Utf8Error),
393        }
394    }
395
396    fn half_match<'a, T: DType>(&self, old: &'a [T], new: &'a [T]) -> Option<HalfMatch<'a, T>> {
397        // Don't risk returning a suboptimal diff when we have unlimited time
398        self.timeout()?;
399
400        let (long, short) = if old.len() > new.len() {
401            (old, new)
402        } else {
403            (new, old)
404        };
405
406        // pointless - two small for this algo
407        if long.len() < 4 || (short.len() << 1) < long.len() {
408            return None;
409        }
410
411        // First check if the second quarter is the seed for a half-match.
412        let hm1 = Self::half_match_i(long, short, (long.len() + 3) >> 2);
413        // Check again based on the third quarter.
414        let hm2 = Self::half_match_i(long, short, (long.len() + 1) >> 1);
415
416        if hm1.is_none() && hm2.is_none() {
417            return None;
418        }
419
420        let hm = if let (Some(hm1), None) = (&hm1, &hm2) {
421            hm1
422        } else if let (None, Some(hm2)) = (&hm1, &hm2) {
423            hm2
424        } else if let (Some(hm1), Some(hm2)) = (&hm1, &hm2) {
425            // both match, select the longest
426            if hm1.common.len() > hm2.common.len() {
427                hm1
428            } else {
429                hm2
430            }
431        } else {
432            return None;
433        };
434
435        // A half-match was found, sort out the return data.
436        let half_match = if old.len() > new.len() {
437            HalfMatch {
438                prefix_long: hm.prefix_long,
439                suffix_long: hm.suffix_long,
440                prefix_short: hm.prefix_short,
441                suffix_short: hm.suffix_short,
442                common: hm.common,
443            }
444        } else {
445            HalfMatch {
446                prefix_long: hm.prefix_short,
447                suffix_long: hm.suffix_short,
448                prefix_short: hm.prefix_long,
449                suffix_short: hm.suffix_long,
450                common: hm.common,
451            }
452        };
453
454        Some(half_match)
455    }
456
457    // Quick line-level diff on both strings, then rediff the parts for greater accuracy
458    // This speedup can produce non-minimal diffs
459    fn line_mode<T: DType>(
460        &self,
461        old: &[T],
462        new: &[T],
463        deadline: Option<Time>,
464    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
465        let mut diffs = {
466            let to_chars = Self::lines_to_chars(old, new);
467            let diffs =
468                self.diff_lines(&to_chars.chars_old[..], &to_chars.chars_new[..], deadline)?;
469            // Convert diffs back to text
470            Self::chars_to_lines(&diffs[..], &to_chars.lines[..])
471        };
472
473        // Eliminate freak matches
474        Self::cleanup_semantic(&mut diffs);
475
476        // Rediff any replacement blocks, this time character-by-character.
477        let mut pointer = 0_usize;
478
479        // count of bytes inserted
480        let mut insert_n = 0_usize;
481        // count of bytes to delete
482        let mut delete_n = 0_usize;
483        // a temp holder for consequetive data being inserted
484        let mut insert_data = Vec::new();
485        // same for delete
486        let mut delete_data = Vec::new();
487
488        while pointer < diffs.len() {
489            match diffs[pointer].op() {
490                Ops::Insert => {
491                    insert_n += 1;
492                    insert_data = [&insert_data[..], diffs[pointer].data()].concat();
493                }
494                Ops::Delete => {
495                    delete_n += 1;
496                    delete_data = [&delete_data[..], diffs[pointer].data()].concat();
497                }
498                Ops::Equal => {
499                    // Upon reaching an equality, check for prior redundancies.
500                    self.line_mode_equality(
501                        &mut diffs,
502                        (insert_n, delete_n),
503                        &mut pointer,
504                        insert_data,
505                        delete_data,
506                        deadline,
507                    )?;
508                    // resetting counters
509                    insert_n = 0;
510                    delete_n = 0;
511                    delete_data = Vec::new();
512                    insert_data = Vec::new();
513                }
514            }
515
516            pointer += 1;
517        }
518
519        self.line_mode_equality(
520            &mut diffs,
521            (insert_n, delete_n),
522            &mut pointer,
523            insert_data,
524            delete_data,
525            deadline,
526        )?;
527
528        Ok(diffs)
529    }
530
531    fn line_mode_equality<T: DType>(
532        &self,
533        diffs: &mut Vec<Diff<T>>,
534        n: (usize, usize),
535        pointer: &mut usize,
536        insert_data: Vec<T>,
537        delete_data: Vec<T>,
538        deadline: Option<Time>,
539    ) -> Result<(), crate::errors::Error> {
540        let insert_n = n.0;
541        let delete_n = n.1;
542
543        if delete_n >= 1 && insert_n >= 1 {
544            // Delete the offending records and add the merged ones.
545            let idxstart = *pointer - delete_n - insert_n;
546            let idxend = idxstart + delete_n + insert_n;
547
548            if idxstart <= idxend && idxstart < diffs.len() && idxend <= diffs.len() {
549                diffs.drain(idxstart..idxend);
550            }
551
552            *pointer = idxstart;
553
554            let mut subdiffs = self.diff_internal(&delete_data, &insert_data, false, deadline)?;
555            let subdifflen = subdiffs.len();
556            subdiffs.drain(..).rev().for_each(|d| {
557                diffs.insert(*pointer, d);
558            });
559
560            *pointer += subdifflen;
561        }
562
563        Ok(())
564    }
565
566    pub(crate) fn diff_lines(
567        &self,
568        old: &[usize],
569        new: &[usize],
570        deadline: Option<Time>,
571    ) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
572        if old == new {
573            if old.is_empty() {
574                return Ok(Vec::new());
575            }
576
577            return Ok(vec![Diff::equal(old)]);
578        }
579
580        if old.is_empty() {
581            return Ok(vec![Diff::insert(new)]);
582        }
583
584        if new.is_empty() {
585            return Ok(vec![Diff::delete(old)]);
586        }
587
588        // Trim common prefix
589        let (common_prefix, common_suffix) = Self::common_prefix_suffix(old, new);
590
591        let (old_uniq_end, new_uniq_end) = (old.len() - common_suffix, new.len() - common_suffix);
592        let mut diffs = if common_prefix <= old.len()
593            && common_prefix <= new.len()
594            && old_uniq_end <= old.len()
595            && new_uniq_end <= new.len()
596            && common_prefix <= old_uniq_end
597            && common_prefix <= new_uniq_end
598        {
599            self.compute_lines(
600                &old[common_prefix..old_uniq_end],
601                &new[common_prefix..new_uniq_end],
602                deadline,
603            )?
604        } else {
605            // This is part of the bound check optimization! Should never fail!
606            return Err(crate::errors::Error::InvalidInput);
607        };
608
609        if common_prefix > 0 && common_prefix <= old.len() {
610            diffs.insert(0, Diff::equal(&old[..common_prefix]));
611        }
612
613        if common_suffix > 0 && new_uniq_end < new.len() {
614            diffs.push(Diff::new(Ops::Equal, &new[new_uniq_end..]));
615        }
616
617        Self::cleanup_merge(&mut diffs);
618
619        Ok(diffs)
620    }
621
622    fn compute_lines(
623        &self,
624        old: &[usize],
625        new: &[usize],
626        deadline: Option<Time>,
627    ) -> Result<Vec<Diff<usize>>, crate::errors::Error> {
628        // returning all of the new part
629        if old.is_empty() {
630            return Ok(vec![Diff::insert(new)]);
631        }
632
633        // return everything deleted
634        if new.is_empty() {
635            return Ok(vec![Diff::delete(old)]);
636        }
637
638        let (long, short, old_gt_new) = if old.len() > new.len() {
639            (old, new, true)
640        } else {
641            (new, old, false)
642        };
643
644        // found a subsequence which contains the short text
645        if let Some(idx) = long.windows(short.len()).position(|k| k == short) {
646            if idx <= long.len() && idx + short.len() < long.len() {
647                // Shorter text is inside the longer text (speedup).
648                let op = if old_gt_new { Ops::Delete } else { Ops::Insert };
649                let diffs = vec![
650                    Diff::new(op, &long[..idx]),
651                    Diff::equal(short),
652                    Diff::new(op, &long[idx + short.len()..]),
653                ];
654
655                return Ok(diffs);
656            }
657        }
658
659        if short.len() == 1 {
660            // After previous case, this can't be an equality
661            return Ok(vec![Diff::delete(old), Diff::insert(new)]);
662        }
663
664        // Check if the problem can be split in two
665        if let Some(half_match) = self.half_match(old, new) {
666            let old_a = half_match.prefix_long;
667            let old_b = half_match.suffix_long;
668
669            let new_a = half_match.prefix_short;
670            let new_b = half_match.suffix_short;
671
672            let mid_common = half_match.common;
673
674            // Send both pairs off for separate processing.
675            let mut diffs_a = match self.diff_lines(old_a, new_a, deadline) {
676                Ok(d) => d,
677                Err(_) => return Err(crate::errors::Error::Utf8Error),
678            };
679            let mut diffs_b = match self.diff_lines(old_b, new_b, deadline) {
680                Ok(d) => d,
681                Err(_) => return Err(crate::errors::Error::Utf8Error),
682            };
683
684            // Merge the results
685            diffs_a.push(Diff::equal(mid_common));
686            diffs_a.append(&mut diffs_b);
687
688            return Ok(diffs_a);
689        }
690
691        match self.bisect(old, new, deadline) {
692            Ok(b) => Ok(b),
693            Err(_) => Err(crate::errors::Error::Utf8Error),
694        }
695    }
696
697    // Find the 'middle snake' of a diff, split the problem in two
698    // and return the recursively constructed diff.
699    // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
700    pub fn bisect<T: DType>(
701        &self,
702        old: &[T],
703        new: &[T],
704        deadline: Option<Time>,
705    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
706        type Int = isize;
707
708        // Micro optimization:
709        // Do all setup before casting to isize
710        let mut v;
711        let (v_offset, v_len, v1, v2, old_len, new_len) = {
712            let v_offset = (old.len() + new.len()).div_ceil(2);
713            let v_len = v_offset * 2;
714
715            v = vec![-1 as Int; v_len * 2];
716            let (v1, v2) = v.split_at_mut(v_len);
717            let v_trg = v_offset + 1;
718            if v_trg < v1.len() {
719                v1[v_trg] = 0;
720            }
721            if v_trg < v2.len() {
722                v2[v_trg] = 0;
723            }
724
725            (
726                v_offset as Int,
727                v_len as Int,
728                v1,
729                v2,
730                old.len() as Int,
731                new.len() as Int,
732            )
733        };
734
735        let delta = old_len - new_len;
736        // If the total number of characters is odd, then the front path will collide
737        // with the reverse path.
738        let front = (delta & 1) != 0;
739
740        // Offsets for start and end of k loop.
741        // Prevents mapping of space beyond the grid.
742        let mut k1start = 0;
743        let mut k1end = 0;
744        let mut k2start = 0;
745        let mut k2end = 0;
746
747        for edit_dist in 0..v_offset {
748            // Bail out if deadline is reached.
749            if let Some(tout) = deadline {
750                #[cfg(target_arch = "wasm32")]
751                if Utc::now().time() > tout {
752                    break;
753                }
754                #[cfg(not(target_arch = "wasm32"))]
755                if Instant::now() > tout {
756                    break;
757                }
758            }
759
760            (k1start, k1end) = {
761                let (k1s, k1e, overlap) = Self::bisect_fwd(
762                    old,
763                    new,
764                    (old_len, new_len, delta, front),
765                    (v_offset, v_len),
766                    v1,
767                    v2,
768                    (k1start, k1end),
769                    edit_dist,
770                );
771                if let Some((x1, x2)) = overlap {
772                    return T::bisect_split(self, old, new, x1, x2, deadline);
773                }
774
775                (k1s, k1e)
776            };
777
778            (k2start, k2end) = {
779                let (k1s, k1e, overlap) = Self::bisect_rev(
780                    old,
781                    new,
782                    (old_len, new_len, delta, front),
783                    (v_offset, v_len),
784                    v1,
785                    v2,
786                    (k2start, k2end),
787                    edit_dist,
788                );
789                if let Some((x1, x2)) = overlap {
790                    return T::bisect_split(self, old, new, x1, x2, deadline);
791                }
792
793                (k1s, k1e)
794            };
795        }
796
797        Ok(vec![Diff::delete(old), Diff::insert(new)])
798    }
799
800    // Walk the front path one step.
801    #[allow(clippy::too_many_arguments)]
802    #[inline]
803    fn bisect_fwd<T: DType>(
804        old: &[T],
805        new: &[T],
806        (old_len, new_len, delta, front): (isize, isize, isize, bool), // length related: old_len, new_len, delta, front
807        (v_offset, v_len): (isize, isize),                             // v_offset, v_len
808        v1: &mut [isize],
809        v2: &mut [isize],
810        (mut k1start, mut k1end): (isize, isize), // k related: k1start, k1end
811        edit_dist: isize,
812    ) -> (isize, isize, Option<(usize, usize)>) {
813        let mut k1 = k1start - edit_dist;
814
815        while k1 < edit_dist + 1 - k1end {
816            let (x1, y1) = {
817                let k1_offset = v_offset + k1;
818
819                let v1_prev = {
820                    let prev_idx = (k1_offset - 1) as usize;
821                    if prev_idx < v1.len() {
822                        v1[prev_idx]
823                    } else {
824                        -1
825                    }
826                };
827                let v1_next = {
828                    let next_idx = (k1_offset + 1) as usize;
829                    if next_idx < v1.len() {
830                        v1[next_idx]
831                    } else {
832                        -1
833                    }
834                };
835
836                let mut x1 = if k1 == -edit_dist || (k1 != edit_dist && v1_prev < v1_next) {
837                    v1_next
838                } else {
839                    v1_prev + 1
840                };
841
842                let mut y1 = x1 - k1;
843                let (xi, yi) = Self::bisect_fwd_path_i(old, new, x1 as usize, y1 as usize);
844
845                y1 = yi as isize;
846                x1 = xi as isize;
847
848                if (0..v1.len() as isize).contains(&k1_offset) {
849                    v1[k1_offset as usize] = x1;
850                }
851
852                (x1, y1)
853            };
854
855            if x1 > old_len {
856                // Ran off the right of the graph.
857                k1end += 2;
858            } else if y1 > new_len {
859                // Ran off the bottom of the graph.
860                k1start += 2;
861            } else if front {
862                let k2_offset = (v_offset + delta - k1) as usize;
863                if k2_offset < v_len as usize && k2_offset < v2.len() && v2[k2_offset] != -1 {
864                    // Mirror x2 onto top-left coordinate system.
865                    let x2 = old_len - v2[k2_offset];
866                    if x1 >= x2 {
867                        // Overlap detected.
868                        return (k1start, k1end, Some((x1 as usize, y1 as usize)));
869                    }
870                }
871            }
872            k1 += 2;
873        }
874
875        (k1start, k1end, None)
876    }
877
878    fn bisect_fwd_path_i<T: DType>(old: &[T], new: &[T], x1: usize, y1: usize) -> (usize, usize) {
879        let mut x1 = x1;
880        let mut y1 = y1;
881
882        while x1 < old.len() && y1 < new.len() {
883            if old[x1] != new[y1] {
884                break;
885            }
886
887            x1 += 1;
888            y1 += 1;
889        }
890
891        (x1, y1)
892    }
893
894    // Walk the reverse path one step.
895    #[allow(clippy::too_many_arguments)]
896    #[inline]
897    fn bisect_rev<T: DType>(
898        old: &[T],
899        new: &[T],
900        (old_len, new_len, delta, front): (isize, isize, isize, bool), // length related: old_len, new_len, delta, front
901        (v_offset, v_len): (isize, isize),                             // v_offset, v_len
902        v1: &mut [isize],
903        v2: &mut [isize],
904        (mut k2start, mut k2end): (isize, isize), // k related: k1start, k1end
905        edit_dist: isize,
906    ) -> (isize, isize, Option<(usize, usize)>) {
907        let mut k2 = k2start - edit_dist;
908        while k2 < edit_dist + 1 - k2end {
909            let (mut x2, y2) = {
910                let k2_offset = (v_offset + k2) as usize;
911                let prev_idx = k2_offset - 1;
912                let next_idx = k2_offset + 1;
913
914                let v2_prev = if prev_idx < v2.len() {
915                    v2[prev_idx]
916                } else {
917                    -1
918                };
919                let v2_next = if next_idx < v2.len() {
920                    v2[next_idx]
921                } else {
922                    -1
923                };
924
925                let mut x2 = if k2 == -edit_dist || (k2 != edit_dist && v2_prev < v2_next) {
926                    v2_next
927                } else {
928                    v2_prev + 1
929                };
930
931                let mut y2 = x2 - k2;
932                let (xi, yi) = Self::bisect_rev_path_i(old, new, x2 as usize, y2 as usize);
933                y2 = yi as isize;
934                x2 = xi as isize;
935
936                if k2_offset < v2.len() {
937                    v2[k2_offset] = x2;
938                }
939
940                (x2, y2)
941            };
942
943            if x2 > old_len {
944                // Ran off the left of the graph.
945                k2end += 2;
946            } else if y2 > new_len {
947                // Ran off the top of the graph.
948                k2start += 2;
949            } else if !front {
950                let k1_offset = (v_offset + delta - k2) as usize;
951                if k1_offset < v_len as usize && k1_offset < v1.len() && v1[k1_offset] != -1 {
952                    // if (0..v_len).contains(&k1_offset) && k1_offset < v1.len() as isize && v1[k1_offset as usize] != -1 {
953                    let x1 = v1[k1_offset];
954                    let y1 = v_offset + x1 - k1_offset as isize;
955                    // Mirror x2 onto top-left coordinate system.
956                    x2 = old_len - x2;
957                    if x1 >= x2 {
958                        // Overlap detected.
959                        return (k2start, k2end, Some((x1 as usize, y1 as usize)));
960                        // return T::bisect_split(
961                        //     self,
962                        //     old,
963                        //     new,
964                        //     x1 as usize,
965                        //     y1 as usize,
966                        //     deadline,
967                        // );
968                    }
969                }
970            }
971            k2 += 2;
972        }
973
974        (k2start, k2end, None)
975    }
976
977    fn bisect_rev_path_i<T: DType>(old: &[T], new: &[T], x2: usize, y2: usize) -> (usize, usize) {
978        let mut x2 = x2;
979        let mut y2 = y2;
980
981        while x2 < old.len() && y2 < new.len() {
982            let (i1, i2) = {
983                let xt = old.len() - x2;
984                let yt = new.len() - y2;
985
986                (
987                    if xt > 0 { xt - 1 } else { x2 + 1 },
988                    if yt > 0 { yt - 1 } else { y2 + 1 },
989                )
990            };
991
992            if old[i1] != new[i2] {
993                break;
994            }
995            x2 += 1;
996            y2 += 1;
997        }
998
999        (x2, y2)
1000    }
1001
1002    // Does a substring of shorttext exist within longtext such that the substring
1003    // is at least half the length of longtext?
1004    // idx Start index of quarter length substring within longtext.
1005    fn half_match_i<'a, T: DType>(
1006        long: &'a [T],
1007        short: &'a [T],
1008        idx: usize,
1009    ) -> Option<HalfMatch<'a, T>> {
1010        // Start with a 1/4 length substring at position i as a seed.
1011        let seed = {
1012            let end = idx + (long.len() / 4);
1013            if idx < long.len() && end < long.len() {
1014                &long[idx..end]
1015            } else {
1016                return None;
1017            }
1018        };
1019        let mut j = 0;
1020
1021        let mut best_common: &[T] = &[];
1022        let mut best_long_a: &[T] = &[];
1023        let mut best_long_b: &[T] = &[];
1024        let mut best_short_a: &[T] = &[];
1025        let mut best_short_b: &[T] = &[];
1026
1027        while j < short.len() {
1028            if let Some(pos) = &short[j..].windows(seed.len()).position(|p| p == seed) {
1029                j += *pos;
1030
1031                let (prefix_len, suffix_len) = if idx < long.len() && j < short.len() {
1032                    (
1033                        Self::common_prefix(&long[idx..], &short[j..], false),
1034                        Self::common_prefix(&long[..idx], &short[..j], true),
1035                    )
1036                } else {
1037                    // this bound check shouldn't fail .. since it has, no point going further
1038                    return None;
1039                };
1040
1041                if best_common.len() < suffix_len + prefix_len {
1042                    let j_min_suf = j - suffix_len;
1043                    let j_pls_prf = j + prefix_len;
1044                    let i_min_suf = idx - suffix_len;
1045                    let i_pls_prf = idx + prefix_len;
1046
1047                    // Optimizing for bound checks
1048                    if j_min_suf < j_pls_prf
1049                        && j_min_suf <= short.len()
1050                        && j_pls_prf <= short.len()
1051                        && i_min_suf <= long.len()
1052                        && i_pls_prf <= long.len()
1053                    {
1054                        best_common = &short[j_min_suf..j_pls_prf];
1055
1056                        best_long_a = &long[..i_min_suf];
1057                        best_long_b = &long[i_pls_prf..];
1058
1059                        best_short_a = &short[..j_min_suf];
1060                        best_short_b = &short[j_pls_prf..];
1061                    }
1062                }
1063
1064                j += 1;
1065            } else {
1066                break;
1067            }
1068        }
1069
1070        if (best_common.len() << 1) >= long.len() {
1071            Some(HalfMatch {
1072                prefix_long: best_long_a,
1073                suffix_long: best_long_b,
1074                prefix_short: best_short_a,
1075                suffix_short: best_short_b,
1076                common: best_common,
1077            })
1078        } else {
1079            None
1080        }
1081    }
1082
1083    // returns the number of bytes common in both the str - this is the position in bytes not chars, [0 .. n] is your prefix
1084    // We are doing a binary search here, and I've observed similar performance as noted by https://neil.fraser.name/news/2007/10/09/
1085    // Some benchmark code can be found in benches/prefix.rs
1086    // Reverse prefix is suffix
1087    fn common_prefix<T: DType>(lhs: &[T], rhs: &[T], reverse: bool) -> usize {
1088        if lhs.is_empty()
1089            || rhs.is_empty()
1090            || (!reverse && (lhs.first() != rhs.first()))
1091            || (reverse && (lhs.last() != rhs.last()))
1092        {
1093            return 0;
1094        }
1095
1096        let mut pointmin = 0;
1097        let mut pointmax = lhs.len().min(rhs.len());
1098        let mut pointmid = pointmax;
1099
1100        let mut pointstart = 0;
1101
1102        while pointmin < pointmid {
1103            let (lhsrange, rhsrange) = if !reverse {
1104                (pointstart..pointmid, pointstart..pointmid)
1105            } else {
1106                (
1107                    lhs.len() - pointmid..lhs.len() - pointstart,
1108                    rhs.len() - pointmid..rhs.len() - pointstart,
1109                )
1110            };
1111
1112            if lhsrange.end <= lhs.len()
1113                && rhsrange.end <= rhs.len()
1114                && lhsrange.start < lhsrange.end
1115                && rhsrange.start < rhsrange.end
1116                && lhs[lhsrange] == rhs[rhsrange]
1117            {
1118                pointmin = pointmid;
1119                pointstart = pointmin;
1120            } else {
1121                pointmax = pointmid;
1122            }
1123
1124            pointmid = ((pointmax - pointmin) / 2) + pointmin;
1125        }
1126
1127        pointmid
1128    }
1129
1130    fn common_prefix_suffix<T: DType>(lhs: &[T], rhs: &[T]) -> (usize, usize) {
1131        let common_prefix = Self::common_prefix(lhs, rhs, false);
1132        let common_suffix = {
1133            if common_prefix < lhs.len() && common_prefix < rhs.len() {
1134                Self::common_prefix(&lhs[common_prefix..], &rhs[common_prefix..], true)
1135            } else {
1136                0
1137            }
1138        };
1139
1140        (common_prefix, common_suffix)
1141    }
1142
1143    fn common_overlap<T: DType>(lhs: &[T], rhs: &[T]) -> usize {
1144        if lhs.is_empty() || rhs.is_empty() {
1145            return 0;
1146        }
1147
1148        // A working set with longer string truncated
1149        let (l, r) = if lhs.len() > rhs.len() {
1150            (&lhs[lhs.len() - rhs.len()..], rhs)
1151        } else {
1152            (lhs, &rhs[..lhs.len()])
1153        };
1154
1155        // Quick check for the worst case.
1156        if l == r {
1157            return l.len();
1158        }
1159
1160        // Start by looking for a single character match
1161        // and increase length until no match is found.
1162        // Performance analysis: https://neil.fraser.name/news/2010/11/04/
1163        let mut len = 1;
1164        let mut best = 0;
1165
1166        while let Some(found) = r.windows(len).position(|p| p == &l[l.len() - len..]) {
1167            len += found;
1168            let idx_st = l.len() - len;
1169            if found == 0 || (idx_st < l.len() && len <= r.len() && l[idx_st..] == r[..len]) {
1170                best = len;
1171                len += 1;
1172            }
1173        }
1174
1175        best
1176    }
1177
1178    // Reduce the number of edits by eliminating semantically trivial equalities
1179    fn cleanup_semantic<T: DType>(diffs: &mut Vec<Diff<T>>) {
1180        let mut changes = false;
1181
1182        let mut pointer = 0_usize;
1183        // reducing runtime allocation by giving this vec max capacity
1184        let mut equalities = Vec::with_capacity(diffs.len());
1185        let mut last_equality = None;
1186
1187        // Number of bytes changed pre equality
1188        let mut insert_len_pre = 0_usize;
1189        let mut delete_len_pre = 0_usize;
1190
1191        // Number of bytes changed post equality
1192        let mut insert_len_post = 0_usize;
1193        let mut delete_len_post = 0_usize;
1194
1195        while pointer < diffs.len() {
1196            let mut diff_mod = false;
1197            if diffs[pointer].op() == Ops::Equal {
1198                equalities.push(pointer);
1199                // Updating pre equality changes
1200                insert_len_pre = insert_len_post;
1201                delete_len_pre = delete_len_post;
1202
1203                // Resetting post insertion changes to 0
1204                insert_len_post = 0;
1205                delete_len_post = 0;
1206
1207                last_equality = Some(diffs[pointer].data());
1208            } else {
1209                // Ops::Insert || Ops::Delete
1210                // Increasing changes of post_equality metrics
1211                if diffs[pointer].op() == Ops::Insert {
1212                    insert_len_post += diffs[pointer].size();
1213                } else {
1214                    delete_len_post += diffs[pointer].size();
1215                }
1216
1217                // Eliminate an equality that is smaller or equal to the edits on both
1218                // sides of it.
1219                if let Some(last_eq) = &last_equality {
1220                    if last_eq.len() <= insert_len_pre.max(delete_len_pre)
1221                        && last_eq.len() <= insert_len_post.max(delete_len_post)
1222                    {
1223                        if let Some(&last) = equalities.last() {
1224                            // Duplicate record
1225                            diffs.insert(last, Diff::delete(last_eq));
1226                            // Change the other copy to insert
1227                            if last + 1 < diffs.len() {
1228                                diffs[last + 1].0 = Ops::Insert;
1229                            }
1230
1231                            // Throw away the equality we just deleted.
1232                            equalities.pop();
1233                            // Throw away the previous equality (it needs to be reevaluated).
1234                            equalities.pop();
1235
1236                            diff_mod = true;
1237                            changes = true;
1238
1239                            if let Some(&e) = equalities.last() {
1240                                pointer = e;
1241                            } else {
1242                                pointer = 0;
1243                            }
1244
1245                            // reset all counters
1246                            insert_len_pre = 0;
1247                            delete_len_pre = 0;
1248                            insert_len_post = 0;
1249                            delete_len_post = 0;
1250
1251                            last_equality = None;
1252                        }
1253                    }
1254                }
1255            }
1256
1257            pointer += if diff_mod && pointer == 0 { 0 } else { 1 };
1258        }
1259
1260        // Normalize the diff
1261        if changes {
1262            Self::cleanup_merge(diffs);
1263        }
1264
1265        Self::cleanup_semantic_lossless(diffs);
1266
1267        // Find any overlaps between deletions and insertions.
1268        // e.g: <del>abcxxx</del><ins>xxxdef</ins>
1269        //   -> <del>abc</del>xxx<ins>def</ins>
1270        // e.g: <del>xxxabc</del><ins>defxxx</ins>
1271        //   -> <ins>def</ins>xxx<del>abc</del>
1272        // Only extract an overlap if it is as big as the edit ahead or behind it.
1273        pointer = 1;
1274        while !diffs.is_empty() && pointer < diffs.len() {
1275            let p_prev = pointer - 1;
1276            let p_next = pointer + 1;
1277
1278            if p_prev < diffs.len()
1279                && diffs[p_prev].op() == Ops::Delete
1280                && diffs[pointer].op() == Ops::Insert
1281            {
1282                let delete = diffs[p_prev].data().to_vec();
1283                let insert = diffs[pointer].data().to_vec();
1284
1285                let delete_thres = (delete.len() / 2) + delete.len() % 2;
1286                let insert_thres = (insert.len() / 2) + insert.len() % 2;
1287
1288                let overlap_1 = Self::common_overlap(&delete[..], &insert[..]);
1289                let overlap_2 = Self::common_overlap(&insert[..], &delete[..]);
1290
1291                if overlap_1 >= overlap_2
1292                    && (overlap_1 >= delete_thres || overlap_1 >= insert_thres)
1293                    && overlap_1 <= insert.len()
1294                {
1295                    // Overlap found.  Insert an equality and trim the surrounding edits.
1296                    diffs.insert(pointer, Diff::equal(&insert[..overlap_1]));
1297
1298                    let del_idx_end = delete.len() - overlap_1;
1299
1300                    if p_prev < diffs.len() && del_idx_end <= delete.len() {
1301                        diffs[p_prev].1 = delete[..del_idx_end].to_vec();
1302                    }
1303                    if p_next < diffs.len() && overlap_1 <= insert.len() {
1304                        diffs[p_next].1 = insert[overlap_1..].to_vec();
1305                    }
1306                    pointer += 1;
1307                } else if (overlap_2 >= delete_thres || overlap_2 >= insert_thres)
1308                    && overlap_2 <= delete.len()
1309                {
1310                    // Reverse overlap
1311                    // Insert equality and swap and trim surrounding edits
1312                    diffs.insert(pointer, Diff::equal(&delete[..overlap_2]));
1313
1314                    let ins_idx_end = insert.len() - overlap_2;
1315
1316                    if p_prev < diffs.len() && ins_idx_end <= insert.len() {
1317                        diffs[p_prev] = Diff::insert(&insert[..ins_idx_end]);
1318                    }
1319
1320                    if p_next < diffs.len() && overlap_2 <= delete.len() {
1321                        diffs[p_next] = Diff::delete(&delete[overlap_2..]);
1322                    }
1323
1324                    pointer += 1;
1325                }
1326                pointer += 1;
1327            }
1328            pointer += 1;
1329        }
1330    }
1331
1332    // Look for single edits surrounded on both sides by equalities
1333    // e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
1334    fn cleanup_semantic_lossless<T: DType>(diffs: &mut Vec<Diff<T>>) {
1335        let mut pointer = 1_usize;
1336
1337        // Intentionally ignore the first and last element (don't need checking).
1338        while !diffs.is_empty() && (1..diffs.len() - 1).contains(&pointer) {
1339            let p_prev = pointer - 1;
1340            let p_next = pointer + 1;
1341
1342            // an edit surrounded by equalities
1343            if p_prev < diffs.len()
1344                && p_next < diffs.len()
1345                && diffs[p_prev].op() == Ops::Equal
1346                && diffs[p_next].op() == Ops::Equal
1347            {
1348                // Shift the edit as far left as possible
1349                let (equality_prev, edit, equality_next) = {
1350                    let commonlen =
1351                        Self::common_prefix(diffs[p_prev].data(), diffs[pointer].data(), true);
1352                    if commonlen > 0 {
1353                        let ptr_strt = diffs[pointer].size() - commonlen;
1354                        let ptr_prv_end = diffs[p_prev].size() - commonlen;
1355
1356                        if ptr_strt <= diffs[pointer].size() && ptr_prv_end <= diffs[p_prev].size()
1357                        {
1358                            let common = &diffs[pointer].data()[ptr_strt..];
1359
1360                            (
1361                                diffs[p_prev].data()[..ptr_prv_end].to_vec(),
1362                                [common, &diffs[pointer].data()[..ptr_strt]].concat(),
1363                                [common, diffs[p_next].data()].concat(),
1364                            )
1365                        } else {
1366                            continue;
1367                        }
1368                    } else {
1369                        (
1370                            diffs[p_prev].data().to_vec(),
1371                            diffs[pointer].data().to_vec(),
1372                            diffs[p_next].data().to_vec(),
1373                        )
1374                    }
1375                };
1376
1377                let (best_equality_prev, best_edit, best_equality_next) =
1378                    Self::cleanup_semantic_lossless_best_fit(equality_prev, edit, equality_next);
1379
1380                // We have an improvement, save it back to the diff.
1381                if diffs[p_prev].data() != best_equality_prev {
1382                    if !best_equality_prev.is_empty() {
1383                        diffs[p_prev].1.clone_from(&best_equality_prev);
1384                    } else {
1385                        diffs.remove(p_prev);
1386                        pointer -= 1;
1387                    }
1388
1389                    let p_next = pointer + 1;
1390
1391                    if pointer < diffs.len() && p_next < diffs.len() {
1392                        diffs[pointer].1.clone_from(&best_edit);
1393
1394                        if !best_equality_next.is_empty() {
1395                            diffs[p_next].1.clone_from(&best_equality_next);
1396                        } else {
1397                            diffs.remove(p_next);
1398                            pointer -= 1;
1399                        }
1400                    }
1401                }
1402            }
1403
1404            pointer += 1;
1405        }
1406    }
1407
1408    fn cleanup_semantic_lossless_best_fit<T: DType>(
1409        equality_prev: Vec<T>,
1410        edit: Vec<T>,
1411        equality_next: Vec<T>,
1412    ) -> (Vec<T>, Vec<T>, Vec<T>) {
1413        let mut equality_prev = equality_prev;
1414        let mut edit = edit;
1415        let mut equality_next = equality_next;
1416
1417        // Step byte by byte right looking for the best fit
1418        let mut best_equality_prev = equality_prev.clone();
1419        let mut best_edit = edit.clone();
1420        let mut best_equality_next = equality_next.clone();
1421
1422        let mut best_score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..])
1423            + Self::cleanup_semantic_score(&edit[..], &equality_next[..]);
1424
1425        while !edit.is_empty() && edit.first() == equality_next.first() {
1426            equality_prev.push(edit[0]);
1427            edit.remove(0);
1428            edit.push(equality_next[0]);
1429
1430            equality_next.remove(0);
1431
1432            let score = Self::cleanup_semantic_score(&equality_prev[..], &edit[..])
1433                + Self::cleanup_semantic_score(&edit[..], &equality_next[..]);
1434
1435            // The >= encourages trailing rather than leading whitespace on edits.
1436            if score >= best_score {
1437                best_score = score;
1438                best_equality_prev.clone_from(&equality_prev);
1439                best_edit.clone_from(&edit);
1440                best_equality_next.clone_from(&equality_next);
1441            }
1442        }
1443
1444        (best_equality_prev, best_edit, best_equality_next)
1445    }
1446
1447    // Given two strings, compute a score representing whether the internal
1448    // boundary falls on logical boundaries
1449    // Scores range from 6 (best) to 0 (worst)
1450    fn cleanup_semantic_score<T: DType>(one: &[T], two: &[T]) -> u8 {
1451        let (char1, char2) = if let (Some(&char1), Some(&char2)) = (one.last(), two.first()) {
1452            (char1, char2)
1453        } else {
1454            return 6;
1455        };
1456
1457        // Each port of this function behaves slightly differently due to
1458        // subtle differences in each language's definition of things like
1459        // 'whitespace'.  Since this function's purpose is largely cosmetic,
1460        // the choice has been made to use each language's native features
1461        // rather than force total conformity.
1462
1463        let whitespace_1 = char1.is_whitespace();
1464        let whitespace_2 = char2.is_whitespace();
1465
1466        let linebreak_1 = whitespace_1 && (char1.is_newline() || char1.is_carriage());
1467        let linebreak_2 = whitespace_2 && (char2.is_newline() || char2.is_carriage());
1468
1469        let blankline_1 = linebreak_1 && T::is_linebreak_end(one);
1470        let blankline_2 = linebreak_2 && T::is_linebreak_start(two);
1471
1472        if blankline_1 || blankline_2 {
1473            // 5 for blank lines
1474            5
1475        } else if linebreak_1 || linebreak_2 {
1476            // Four points for line breaks.
1477            4
1478        } else if !char1.is_alphanum() && !whitespace_1 && whitespace_2 {
1479            // Three points for end of sentences.
1480            3
1481        } else if whitespace_1 || whitespace_2 {
1482            // 2 for whitespace
1483            2
1484        } else if !char1.is_alphanum() || !char2.is_alphanum() {
1485            // 1 for not alphanumeric
1486            1
1487        } else {
1488            0
1489        }
1490    }
1491
1492    // Reorder and merge like edit sections.  Merge equalities.
1493    // Any edit section can move as long as it doesn't cross an equality.
1494    fn cleanup_merge<T: DType>(diffs: &mut Vec<Diff<T>>) {
1495        Self::cleanup_merge_first(diffs);
1496
1497        if Self::cleanup_merge_second(diffs) {
1498            Self::cleanup_merge(diffs);
1499        }
1500    }
1501
1502    fn cleanup_merge_first<T: DType>(diffs: &mut Vec<Diff<T>>) {
1503        // Push a dummy diff ... this triggers the equality as a last step
1504        let mut pointer = 0_usize;
1505
1506        let mut insert_n = 0;
1507        let mut delete_n = 0;
1508
1509        let mut insert_data = vec![];
1510        let mut delete_data = vec![];
1511
1512        while pointer < diffs.len() {
1513            match diffs[pointer].op() {
1514                Ops::Insert => {
1515                    insert_n += 1;
1516                    insert_data = [&insert_data[..], diffs[pointer].data()].concat();
1517                    pointer += 1;
1518                }
1519                Ops::Delete => {
1520                    delete_n += 1;
1521                    delete_data = [&delete_data[..], diffs[pointer].data()].concat();
1522                    pointer += 1;
1523                }
1524                Ops::Equal => {
1525                    if Self::cleanup_merge_proc_equal(
1526                        diffs,
1527                        insert_n,
1528                        delete_n,
1529                        insert_data,
1530                        delete_data,
1531                        &mut pointer,
1532                    ) {
1533                        pointer += 1;
1534                    };
1535
1536                    insert_n = 0;
1537                    delete_n = 0;
1538                    insert_data = Vec::new();
1539                    delete_data = Vec::new();
1540                }
1541            }
1542        }
1543
1544        Self::cleanup_merge_proc_equal(
1545            diffs,
1546            insert_n,
1547            delete_n,
1548            insert_data,
1549            delete_data,
1550            &mut pointer,
1551        );
1552    }
1553
1554    // Second pass: look for single edits surrounded on both sides by equalities
1555    // which can be shifted sideways to eliminate an equality.
1556    // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1557    fn cleanup_merge_second<T: DType>(diffs: &mut Vec<Diff<T>>) -> bool {
1558        let mut changes = false;
1559        let mut pointer = 1;
1560
1561        while !diffs.is_empty() {
1562            let p_prev = pointer - 1;
1563            let p_next = pointer + 1;
1564
1565            if p_next >= diffs.len() {
1566                break;
1567            }
1568
1569            let (diff_prev, diff, diff_next) = (&diffs[p_prev], &diffs[pointer], &diffs[p_next]);
1570
1571            // This is a single edit surrounded by equalities.
1572            if diff_prev.op() == Ops::Equal && diff_next.op() == Ops::Equal {
1573                let substr_idx = if diff.size() >= diff_prev.size() {
1574                    diff.size() - diff_prev.size()
1575                } else {
1576                    0
1577                };
1578
1579                let diff_d = if substr_idx < diff.size() {
1580                    &diff.data()[substr_idx..]
1581                } else {
1582                    &[]
1583                };
1584
1585                if diff_d == diff_prev.data() {
1586                    let diff_d = {
1587                        let end = diff.size() - diff_prev.size();
1588                        if end <= diff.data().len() {
1589                            &diff.data()[..end]
1590                        } else {
1591                            &[]
1592                        }
1593                    };
1594
1595                    // Shift the edit over the previous equality
1596                    let new_current_data = [diff_prev.data(), diff_d].concat();
1597
1598                    let new_next_data = [diff_prev.data(), diff_next.data()].concat();
1599
1600                    diffs[pointer].1 = new_current_data;
1601                    diffs[p_next].1 = new_next_data;
1602                    diffs.remove(p_prev);
1603
1604                    changes = true;
1605                } else if &diff.data()[..diff_next.size().min(diff.size())] == diff_next.data() {
1606                    // Shift the edit over the next equality.
1607                    diffs[p_prev].1 = [diffs[p_prev].data(), diffs[p_next].data()].concat();
1608
1609                    let cur_data = if diffs[p_next].size() < diffs[pointer].size() {
1610                        &diffs[pointer].data()[diffs[p_next].size()..]
1611                    } else {
1612                        &[]
1613                    };
1614
1615                    diffs[pointer].1 = [cur_data, diffs[p_next].data()].concat();
1616                    diffs.remove(p_next);
1617
1618                    changes = true;
1619                }
1620            }
1621
1622            pointer += 1;
1623        }
1624
1625        changes
1626    }
1627
1628    // Handling `Equality` in the merge process
1629    fn cleanup_merge_proc_equal<T: DType>(
1630        diffs: &mut Vec<Diff<T>>,
1631        insert_n: usize,
1632        delete_n: usize,
1633        insert_data: Vec<T>,
1634        delete_data: Vec<T>,
1635        pointer: &mut usize,
1636    ) -> bool {
1637        let mut insert_data = insert_data;
1638        let mut delete_data = delete_data;
1639
1640        // Upon reaching an equality, check for prior redundancies.
1641        if delete_n + insert_n > 1 {
1642            if delete_n != 0 && insert_n != 0 && !insert_data.is_empty() && !delete_data.is_empty()
1643            {
1644                // Factor out any common prefixies.
1645                let commonlen = Self::common_prefix(&insert_data[..], &delete_data[..], false);
1646                if commonlen != 0 && commonlen < insert_data.len() && commonlen < delete_data.len()
1647                {
1648                    let tmpidx = *pointer - delete_n - insert_n - 1;
1649                    if (0..diffs.len()).contains(&tmpidx) && diffs[tmpidx].op() == Ops::Equal {
1650                        diffs[tmpidx].1 =
1651                            [&diffs[tmpidx].1[..], &insert_data[..commonlen]].concat();
1652                    } else {
1653                        diffs.insert(0, Diff::equal(&insert_data[..commonlen]));
1654                        *pointer += 1;
1655                    }
1656                    insert_data = insert_data[commonlen..].to_vec();
1657                    delete_data = delete_data[commonlen..].to_vec();
1658                }
1659
1660                // Factor out any common suffixies.
1661                let commonlen = Self::common_prefix(&insert_data[..], &delete_data[..], true);
1662
1663                if commonlen > 0 {
1664                    let ins_strt = insert_data.len() - commonlen;
1665
1666                    let data = [
1667                        if ins_strt < insert_data.len() {
1668                            &insert_data[insert_data.len() - commonlen..]
1669                        } else {
1670                            &[]
1671                        },
1672                        if *pointer < diffs.len() {
1673                            diffs[*pointer].data()
1674                        } else {
1675                            &[]
1676                        },
1677                    ]
1678                    .concat();
1679
1680                    if *pointer < diffs.len() {
1681                        diffs[*pointer].1 = data;
1682                    } else {
1683                        diffs.push(Diff::equal(&data));
1684                    };
1685
1686                    // bound check
1687                    let ins_end = insert_data.len() - commonlen;
1688                    let del_end = delete_data.len() - commonlen;
1689
1690                    if ins_end <= insert_data.len() {
1691                        insert_data = insert_data[..ins_end].to_vec();
1692                    }
1693                    if del_end <= delete_data.len() {
1694                        delete_data = delete_data[..del_end].to_vec();
1695                    }
1696                }
1697            }
1698
1699            // Delete the offending records and add the merged ones.
1700            *pointer -= delete_n + insert_n;
1701
1702            // Reversing because index will not change
1703            (*pointer..*pointer + delete_n + insert_n)
1704                .rev()
1705                .for_each(|i| {
1706                    diffs.remove(i);
1707                });
1708
1709            if !delete_data.is_empty() {
1710                diffs.insert(*pointer, Diff::delete(&delete_data));
1711                *pointer += 1;
1712            }
1713
1714            if !insert_data.is_empty() {
1715                diffs.insert(*pointer, Diff::insert(&insert_data));
1716                *pointer += 1;
1717            }
1718
1719            true
1720        } else if (1..diffs.len()).contains(pointer) && diffs[*pointer - 1].op() == Ops::Equal {
1721            // Merge this equality with the previous one.;
1722            let mut d = diffs.remove(*pointer);
1723            diffs[*pointer - 1].1.append(&mut d.1);
1724
1725            false
1726        } else {
1727            true
1728        }
1729    }
1730
1731    // Reduce the number of edits by eliminating operationally trivial equalities.
1732    fn cleanup_efficiency<T: DType>(&self, diffs: &mut Vec<Diff<T>>) {
1733        if diffs.is_empty() {
1734            return;
1735        }
1736        let edit_cost = self.edit_cost();
1737
1738        let mut changes = false;
1739        let mut pointer = 0;
1740
1741        let mut equalities = vec![];
1742
1743        let mut last_eq = None;
1744
1745        // Is there an insertion operation before the last equality.
1746        let mut pre_ins = false;
1747        // Is there a deletion operation before the last equality.
1748        let mut pre_del = false;
1749        // Is there an insertion operation after the last equality.
1750        let mut post_ins = false;
1751        // Is there a deletion operation after the last equality.
1752        let mut post_del = false;
1753
1754        while pointer < diffs.len() {
1755            if diffs[pointer].op() == Ops::Equal {
1756                // Equality found
1757                if diffs[pointer].size() < edit_cost && (post_ins || post_del) {
1758                    // Candidate found.
1759                    equalities.push(pointer);
1760                    pre_ins = post_ins;
1761                    pre_del = post_del;
1762
1763                    last_eq = Some(diffs[pointer].data().to_vec());
1764                } else {
1765                    // Not a candidate, and can never become one.
1766                    equalities = vec![];
1767                    last_eq = None;
1768                }
1769
1770                post_ins = false;
1771                post_del = false
1772            } else {
1773                // Insert or delete
1774                if diffs[pointer].op() == Ops::Delete {
1775                    post_del = true;
1776                } else {
1777                    post_ins = true;
1778                }
1779
1780                // Five types to be split:
1781                // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1782                // <ins>A</ins>X<ins>C</ins><del>D</del>
1783                // <ins>A</ins><del>B</del>X<ins>C</ins>
1784                // <ins>A</del>X<ins>C</ins><del>D</del>
1785                // <ins>A</ins><del>B</del>X<del>C</del>
1786
1787                if let Some(le) = &mut last_eq {
1788                    if (pre_ins && pre_del && post_ins && post_del)
1789                        || (le.len() < (edit_cost >> 1)
1790                            && pre_ins as u8 + pre_del as u8 + post_ins as u8 + post_del as u8 == 3)
1791                    {
1792                        // Duplicate record.
1793                        if let Some(item) = equalities.pop() {
1794                            if (0..diffs.len()).contains(&item) {
1795                                // change the second copy (after the insert in next line) to Insert
1796                                diffs[item].0 = Ops::Insert;
1797                                // add an item
1798                                diffs.insert(item, Diff::delete(le));
1799                            }
1800                        }
1801
1802                        last_eq = None;
1803
1804                        if pre_ins && pre_del {
1805                            // No changes made which could affect previous entry, keep going.
1806                            post_ins = true;
1807                            post_del = true;
1808
1809                            equalities = vec![];
1810                        } else {
1811                            equalities.pop();
1812
1813                            if let Some(&l) = equalities.last() {
1814                                pointer = l;
1815                            } else {
1816                                pointer = 0;
1817                                post_ins = false;
1818                                post_del = false;
1819                                changes = true;
1820                                continue;
1821                            };
1822
1823                            post_ins = false;
1824                            post_del = false;
1825                        }
1826                        changes = true;
1827                    }
1828                }
1829            }
1830            pointer += 1;
1831        }
1832
1833        if changes {
1834            Self::cleanup_merge(diffs)
1835        }
1836    }
1837
1838    #[inline]
1839    fn x_index<T: DType>(diffs: &[Diff<T>], loc: usize) -> usize {
1840        let mut char1 = 0;
1841        let mut char2 = 0;
1842
1843        let mut last_char1 = 0;
1844        let mut last_char2 = 0;
1845
1846        let mut last_diff = None;
1847
1848        for diff in diffs.iter() {
1849            if diff.op() != Ops::Insert {
1850                // Equality or deletion
1851                char1 += diff.size();
1852            }
1853
1854            if diff.op() != Ops::Delete {
1855                // Equality or insertion
1856                char2 += diff.size();
1857            }
1858
1859            if char1 > loc {
1860                // overshot location
1861                last_diff = Some(diff);
1862                break;
1863            }
1864
1865            last_char1 = char1;
1866            last_char2 = char2;
1867        }
1868
1869        if let Some(ld) = last_diff {
1870            if ld.op() == Ops::Delete {
1871                // The location was deleted.
1872                return last_char2;
1873            }
1874        }
1875
1876        // Add the remaining character length.
1877        last_char2 + (loc - last_char1)
1878    }
1879
1880    #[inline]
1881    pub fn diff_text_old<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
1882        diffs
1883            .iter()
1884            .filter_map(|diff| {
1885                if diff.op() != Ops::Insert {
1886                    Some(diff.data())
1887                } else {
1888                    None
1889                }
1890            })
1891            .collect::<Vec<_>>()
1892            .concat()
1893    }
1894
1895    #[inline]
1896    pub fn diff_text_new<T: DType>(diffs: &[Diff<T>]) -> Vec<T> {
1897        diffs
1898            .iter()
1899            .filter_map(|diff| {
1900                if diff.op() != Ops::Delete {
1901                    Some(diff.data())
1902                } else {
1903                    None
1904                }
1905            })
1906            .collect::<Vec<_>>()
1907            .concat()
1908    }
1909
1910    // Look through the patches and break up any which are longer than the maximum
1911    // limit of the match algorithm.
1912    // Intended to be called only from within patch_apply.
1913    #[inline]
1914    fn split_max<T: DType>(&self, patches: &mut Patches<T>) {
1915        let max_bit = self.match_max_bits();
1916        let patch_margin = self.patch_margin() as usize;
1917
1918        let mut idx = 0;
1919        while idx < patches.len() {
1920            if patches[idx].length1 <= max_bit {
1921                idx += 1;
1922                continue;
1923            }
1924
1925            let mut bigpatch = patches.remove(idx);
1926            let mut start1 = bigpatch.start1;
1927            let mut start2 = bigpatch.start2;
1928
1929            let mut precontext = vec![];
1930            let mut patch_to_ins = vec![];
1931
1932            while !bigpatch.diffs.is_empty() {
1933                let mut patch = Patch::default();
1934                let mut empty = true;
1935
1936                patch.start1 = start1 - precontext.len();
1937                patch.start2 = start2 - precontext.len();
1938                if !precontext.is_empty() {
1939                    patch.length1 = precontext.len();
1940                    patch.length2 = precontext.len();
1941
1942                    patch.diffs.push(Diff::equal(&precontext));
1943                }
1944
1945                while !bigpatch.diffs.is_empty() && patch.length1 < max_bit - patch_margin {
1946                    if bigpatch.diffs[0].op() == Ops::Insert {
1947                        // Insertions are harmless.
1948                        patch.length2 += bigpatch.diffs[0].size();
1949                        start2 += bigpatch.diffs[0].size();
1950                        let d = bigpatch.diffs.remove(0);
1951                        patch.diffs.push(d);
1952                        empty = false;
1953                        // patch.diffs.push(value)
1954                    } else if bigpatch.diffs[0].op() == Ops::Delete
1955                        && patch.diffs.len() == 1
1956                        && patch.diffs[0].op() == Ops::Equal
1957                        && bigpatch.diffs[0].size() > 2 * max_bit
1958                    {
1959                        // This is a large deletion.  Let it pass in one chunk.
1960                        patch.length1 += bigpatch.diffs[0].size();
1961                        start1 += bigpatch.diffs[0].size();
1962                        empty = false;
1963                        patch
1964                            .diffs
1965                            .push(Diff::new(bigpatch.diffs[0].op(), bigpatch.diffs[0].data()));
1966                        bigpatch.diffs.remove(0);
1967                    } else {
1968                        // Deletion or equality.  Only take as much as we can stomach.
1969                        let diff_text = bigpatch.diffs[0].data()[..bigpatch.diffs[0]
1970                            .size()
1971                            .min(max_bit - patch.length1 - patch_margin)]
1972                            .to_vec();
1973                        patch.length1 += diff_text.len();
1974                        start1 += diff_text.len();
1975
1976                        if bigpatch.diffs[0].op() == Ops::Equal {
1977                            patch.length2 += diff_text.len();
1978                            start2 += diff_text.len();
1979                        } else {
1980                            empty = false;
1981                        }
1982
1983                        patch
1984                            .diffs
1985                            .push(Diff::new(bigpatch.diffs[0].op(), &diff_text));
1986
1987                        let cond = if let Some(d) = bigpatch.diffs.first() {
1988                            diff_text == d.data()
1989                        } else {
1990                            false
1991                        };
1992
1993                        if cond {
1994                            bigpatch.diffs.remove(0);
1995                        } else if let Some(bd) = bigpatch.diffs.first_mut() {
1996                            bd.1 = bd.data()[diff_text.len()..].to_vec();
1997                        }
1998                    }
1999                }
2000
2001                // Compute the head context for the next patch.
2002                precontext = Self::diff_text_new(&patch.diffs);
2003                if precontext.len() > patch_margin {
2004                    precontext = precontext[precontext.len() - patch_margin..].to_vec();
2005                }
2006
2007                // Append the end context for this patch.
2008                let mut postcontext = Self::diff_text_old(&bigpatch.diffs);
2009                // [0 .. patch_margin.min(other)].to_vec()
2010                if patch_margin < postcontext.len() {
2011                    postcontext = postcontext[..patch_margin].to_vec();
2012                }
2013
2014                if !postcontext.is_empty() {
2015                    patch.length1 += postcontext.len();
2016                    patch.length2 += postcontext.len();
2017
2018                    let other = if let Some(pd) = patch.diffs.last_mut() {
2019                        if pd.op() == Ops::Equal {
2020                            pd.1 = [&pd.1, &postcontext[..]].concat();
2021                            false
2022                        } else {
2023                            true
2024                        }
2025                    } else {
2026                        true
2027                    };
2028
2029                    if other {
2030                        patch.diffs.push(Diff::equal(&postcontext));
2031                    }
2032                }
2033
2034                if !empty {
2035                    patch_to_ins.push(patch);
2036                }
2037            }
2038
2039            if !patch_to_ins.is_empty() {
2040                patches.splice(idx..idx, patch_to_ins.into_iter());
2041            }
2042
2043            idx += 1;
2044        }
2045    }
2046}
2047
2048#[derive(Debug, Eq, PartialEq)]
2049struct LineToChars<'a, T: DType> {
2050    chars_old: Vec<usize>,
2051    chars_new: Vec<usize>,
2052    lines: Vec<&'a [T]>,
2053}
2054
2055impl DiffMatchPatch {
2056    #[inline]
2057    fn lines_to_chars<'a, T: DType>(old: &'a [T], new: &'a [T]) -> LineToChars<'a, T> {
2058        let mut lines: Vec<&'a [T]> = vec![];
2059        let mut linehash: HashMap<&'a [T], usize> = HashMap::new();
2060
2061        // Allocate 2/3rds of the UTF16::MAX (65535) value space for text1, the rest for text2.
2062        let chars_old = Self::lines_to_chars_internal(old, &mut lines, &mut linehash, 40000);
2063
2064        // This basically represents the U16::MAX value
2065        let chars_new = Self::lines_to_chars_internal(new, &mut lines, &mut linehash, 65535);
2066
2067        LineToChars {
2068            chars_old,
2069            chars_new,
2070            lines,
2071        }
2072    }
2073
2074    fn lines_to_chars_internal<'a, T: DType>(
2075        text: &'a [T],
2076        array: &mut Vec<&'a [T]>,
2077        hash: &mut HashMap<&'a [T], usize>,
2078        maxlines: usize,
2079    ) -> Vec<usize> {
2080        let take = maxlines - array.len();
2081
2082        let mut charlist = Vec::with_capacity(take);
2083
2084        let mut broke = false;
2085        let mut cursor = 0;
2086
2087        text.split_inclusive(|u| u.is_newline())
2088            .enumerate()
2089            .take(take)
2090            .for_each(|(idx, line)| {
2091                cursor += line.len();
2092
2093                let e = hash.entry(line).or_insert(array.len());
2094                // Fresh insert
2095                if *e == array.len() {
2096                    array.push(line)
2097                }
2098
2099                // upcasting, should never fail
2100                charlist.push(*e);
2101
2102                // break at max lines
2103                broke = idx == take - 1;
2104            });
2105
2106        // We broke at max lines, so we'll account for the remaining text
2107        if broke && cursor <= text.len() {
2108            let line = &text[cursor..];
2109            let e = hash.entry(line).or_insert(array.len());
2110            // Fresh insert
2111            if *e == array.len() {
2112                array.push(line)
2113            }
2114
2115            // upcasting should never fail
2116            charlist.push(*e);
2117        }
2118        charlist
2119    }
2120
2121    fn chars_to_lines<T: DType>(diffs: &[Diff<usize>], lines: &[&[T]]) -> Vec<Diff<T>> {
2122        diffs
2123            .iter()
2124            .map(|d| {
2125                let mut line = vec![];
2126                d.data().iter().for_each(|d| {
2127                    if let Some(l) = lines.get(*d) {
2128                        line.append(&mut l.to_vec())
2129                    }
2130                });
2131
2132                Diff::new(d.op(), &line)
2133            })
2134            .collect::<Vec<_>>()
2135    }
2136}
2137
2138// Match methods
2139impl DiffMatchPatch {
2140    fn match_internal<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> {
2141        // Check for null inputs.
2142        // Nothing to match.
2143        if text.is_empty() {
2144            return None;
2145        }
2146
2147        // loc = Math.max(0, Math.min(loc, text.length));
2148        let loc = loc.min(text.len());
2149
2150        if text == pattern {
2151            // Shortcut (potentially not guaranteed by the algorithm)
2152            Some(0)
2153        } else if &text[loc..(loc + pattern.len()).min(text.len())] == pattern {
2154            // Perfect match at the perfect spot!  (Includes case of null pattern)
2155            Some(loc)
2156        } else {
2157            // Do a fuzzy compare.
2158            // return this.match_bitap_(text, pattern, loc);
2159            self.match_bitap(text, pattern, loc)
2160        }
2161    }
2162
2163    #[inline]
2164    fn match_bitap<T: DType>(&self, text: &[T], pattern: &[T], loc: usize) -> Option<usize> {
2165        if pattern.len() > self.match_max_bits() {
2166            return None;
2167        }
2168
2169        let alphabet = Self::match_alphabet(pattern);
2170
2171        // Highest score beyond which we give up.
2172        let mut score_thres = self.match_threshold();
2173
2174        // Is there a nearby exact match? (speedup)
2175        if let Some(best_loc) = text
2176            .windows(pattern.len())
2177            .skip(loc)
2178            .position(|p| p == pattern)
2179            .map(|pos| pos + loc)
2180        {
2181            score_thres = self
2182                .bitap_score(loc, pattern.len(), 0, best_loc)
2183                .min(score_thres);
2184
2185            // What about in the other direction? (speedup)
2186            // best_loc = text.lastIndexOf(pattern, loc + pattern.length);
2187            if let Some(best_loc_rev) = text
2188                .windows(pattern.len())
2189                .skip(loc)
2190                .rev()
2191                .position(|p| p == pattern)
2192                .map(|pos| text.len() - pos - pattern.len())
2193            {
2194                score_thres = self.bitap_score(loc, pattern.len(), 0, best_loc_rev);
2195            }
2196        }
2197
2198        // Initialise the bit arrays.
2199        let matchmask = 1 << (pattern.len() - 1);
2200
2201        // var matchmask = 1 << (pattern.length - 1);
2202        let mut best_loc = None;
2203
2204        let mut bin_min;
2205        let mut bin_mid;
2206        let mut bin_max = pattern.len() + text.len();
2207        let mut last_rd = vec![];
2208
2209        for d in 0..pattern.len() {
2210            // Scan for the best match; each iteration allows for one more error.
2211            // Run a binary search to determine how far from 'loc' we can stray at
2212            // this error level.
2213            bin_min = 0;
2214            bin_mid = bin_max;
2215
2216            while bin_min < bin_mid {
2217                let score = self.bitap_score(loc, pattern.len(), d, loc + bin_mid);
2218                if score <= score_thres {
2219                    bin_min = bin_mid;
2220                } else {
2221                    bin_max = bin_mid;
2222                }
2223
2224                bin_mid = (bin_max - bin_min) / 2 + bin_min;
2225            }
2226
2227            // Use the result from this iteration as the maximum for the next.
2228            bin_max = bin_mid;
2229            let mut start = if loc > bin_mid {
2230                1.max(loc - bin_mid + 1)
2231            } else {
2232                1
2233            };
2234            let finish = (loc + bin_mid).min(text.len()) + pattern.len();
2235
2236            let mut rd = vec![0; finish + 2];
2237            rd[finish + 1] = (1 << d) - 1;
2238
2239            let mut j = finish;
2240            while j >= start {
2241                let char_match = if text.len() < j {
2242                    0
2243                } else {
2244                    alphabet.get(&text[j - 1]).map_or(0, |&v| v)
2245                };
2246
2247                rd[j] = if d == 0 {
2248                    // first pass: exact match
2249                    ((rd[j + 1] << 1) | 1) & char_match
2250                } else {
2251                    // Subsequent passes: fuzzy match.
2252                    ((rd[j + 1] << 1) | 1) & char_match
2253                        | (((last_rd[j + 1] | last_rd[j]) << 1) | 1_usize)
2254                        | last_rd[j + 1]
2255                };
2256
2257                if (rd[j] & matchmask) != 0 {
2258                    let score = self.bitap_score(loc, pattern.len(), d, j - 1);
2259                    // This match will almost certainly be better than any existing
2260                    // match.  But check anyway.
2261                    if score <= score_thres {
2262                        score_thres = score;
2263                        let bst_loc = j - 1;
2264
2265                        best_loc = Some(bst_loc);
2266                        if bst_loc > loc {
2267                            // When passing loc, don't exceed our current distance from loc.
2268                            start = 1.max(loc.saturating_sub(bst_loc));
2269                        } else {
2270                            // Already passed loc, downhill from here on in.
2271                            break;
2272                        }
2273                    }
2274                }
2275
2276                j -= 1;
2277            }
2278            // No hope for a (better) match at greater error levels.
2279            if self.bitap_score(loc, pattern.len(), d + 1, loc) > score_thres {
2280                break;
2281            }
2282            last_rd.clone_from(&rd);
2283        }
2284
2285        best_loc
2286    }
2287
2288    #[inline]
2289    fn match_alphabet<T: DType>(pattern: &[T]) -> HashMap<T, usize> {
2290        let mut map = HashMap::with_capacity(pattern.len());
2291
2292        pattern.iter().enumerate().for_each(|(i, &p)| {
2293            let v = map.entry(p).or_insert(0_usize);
2294            *v |= 1 << (pattern.len() - i - 1)
2295        });
2296
2297        map
2298    }
2299
2300    // Compute and return the score for a match with e errors and x location.
2301    // Accesses loc and pattern through being a closure.
2302    #[inline]
2303    fn bitap_score(&self, org_loc: usize, pattern_len: usize, errs: usize, loc: usize) -> f32 {
2304        let accuracy = errs as f32 / pattern_len as f32;
2305        let proximity = (org_loc as i32 - loc as i32).abs();
2306
2307        if self.match_distance() == 0 {
2308            if proximity > 0 {
2309                return 1.;
2310            } else {
2311                return accuracy;
2312            }
2313        }
2314
2315        accuracy + proximity as f32 / self.match_distance() as f32
2316    }
2317}
2318
2319// Patch Methods
2320#[derive(Debug, Clone)]
2321pub struct Patch<T: DType> {
2322    diffs: Vec<Diff<T>>,
2323    start1: usize,
2324    start2: usize,
2325    length1: usize,
2326    length2: usize,
2327}
2328
2329impl<T: DType> Default for Patch<T> {
2330    fn default() -> Self {
2331        Self {
2332            diffs: Vec::new(),
2333            start1: 0,
2334            start2: 0,
2335            length1: 0,
2336            length2: 0,
2337        }
2338    }
2339}
2340
2341impl<T: DType> Display for Patch<T> {
2342    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2343        let coord1 = if self.length1 == 0 {
2344            format!("{},0", self.start1)
2345        } else {
2346            format!(
2347                "{}{}",
2348                self.start1 + 1,
2349                if self.length1 != 1 {
2350                    format!(",{}", self.length1)
2351                } else {
2352                    String::new()
2353                }
2354            )
2355        };
2356
2357        let coord2 = if self.length2 == 0 {
2358            format!("{},0", self.start2)
2359        } else {
2360            format!(
2361                "{}{}",
2362                self.start2 + 1,
2363                if self.length2 != 1 {
2364                    format!(",{}", self.length2)
2365                } else {
2366                    String::new()
2367                }
2368            )
2369        };
2370
2371        let mut segments = vec![
2372            "@@ -".to_string(),
2373            coord1,
2374            " +".to_string(),
2375            coord2,
2376            " @@\n".to_string(),
2377        ];
2378        for diff in self.diffs.iter() {
2379            let sign = match diff.op() {
2380                Ops::Insert => '+',
2381                Ops::Delete => '-',
2382                Ops::Equal => ' ',
2383            };
2384
2385            let enc = T::percent_encode(diff.data());
2386            let segment = format!(
2387                "{sign}{}\n",
2388                T::to_string(&enc).map_err(|_| std::fmt::Error)?
2389            );
2390
2391            segments.push(segment)
2392        }
2393
2394        write!(f, "{}", segments.join(""))
2395    }
2396}
2397
2398impl<T: DType> Patch<T> {
2399    pub fn diffs(&self) -> &[Diff<T>] {
2400        &self.diffs[..]
2401    }
2402}
2403
2404pub type Patches<T> = Vec<Patch<T>>;
2405
2406impl DiffMatchPatch {
2407    #[inline]
2408    fn parse_patch_header<T: DType>(
2409        s: &[T],
2410    ) -> Option<(usize, Option<usize>, usize, Option<usize>)> {
2411        let mut section = Vec::with_capacity(64);
2412        let mut current_sect = 0;
2413
2414        let mut old_line = 0;
2415        let mut old_cols = None;
2416        let mut new_line = 0;
2417        let mut new_cols = None;
2418
2419        for &c in s.iter() {
2420            if c == T::from_char(' ') {
2421                match current_sect {
2422                    0 => {
2423                        if section != T::from_str("@@") {
2424                            return None;
2425                        }
2426                    }
2427                    1 => {
2428                        if section.is_empty() {
2429                            return None;
2430                        }
2431
2432                        let splits = section[1..]
2433                            .split(|&p| p == T::from_char(','))
2434                            .collect::<Vec<_>>();
2435
2436                        let ol = splits.first()?;
2437                        old_line = T::to_string(ol).ok()?.parse::<usize>().ok()?;
2438                        if let Some(&oc) = splits.get(1) {
2439                            old_cols = Some(T::to_string(oc).ok()?.parse::<usize>().ok()?);
2440                        }
2441                    }
2442                    2 => {
2443                        let splits = section[if *section.first()? == T::from_char('+') {
2444                            1
2445                        } else {
2446                            0
2447                        }..]
2448                            .split(|&p| p == T::from_char(','))
2449                            .collect::<Vec<_>>();
2450
2451                        let nl = splits.first()?;
2452                        new_line = T::to_string(nl).ok()?.parse::<usize>().ok()?;
2453                        if let Some(&nc) = splits.get(1) {
2454                            new_cols = Some(T::to_string(nc).ok()?.parse::<usize>().ok()?);
2455                        }
2456                    }
2457                    _ => {
2458                        // invalid pattern
2459                        return None;
2460                    }
2461                }
2462
2463                section = Vec::with_capacity(64);
2464                current_sect += 1;
2465                continue;
2466            }
2467
2468            if current_sect == 1 && section.is_empty() && c != T::from_char('-') {
2469                return None;
2470            }
2471
2472            section.push(c);
2473        }
2474
2475        if section != T::from_str("@@") {
2476            return None;
2477        }
2478
2479        Some((old_line, old_cols, new_line, new_cols))
2480    }
2481
2482    #[inline]
2483    fn patch_make_internal<T: DType>(
2484        &self,
2485        txt: &[T],
2486        diffs: &[Diff<T>],
2487    ) -> Result<Patches<T>, crate::errors::Error> {
2488        // No diffs -> no patches
2489        if diffs.is_empty() {
2490            return Ok(Vec::new());
2491        }
2492
2493        let patch_margin = self.patch_margin() as usize;
2494
2495        let mut patches = vec![];
2496
2497        let mut patch = Patch::default();
2498        let mut char_n1 = 0;
2499        let mut char_n2 = 0;
2500
2501        let mut prepatch: Vec<T> = txt.to_vec();
2502        let mut postpatch: Vec<T> = prepatch.clone();
2503
2504        diffs.iter().enumerate().for_each(|(idx, diff)| {
2505            // a new patch starts here
2506            if patch.diffs.is_empty() && diff.op() != Ops::Equal {
2507                patch.start1 = char_n1;
2508                patch.start2 = char_n2;
2509            }
2510
2511            match diff.op() {
2512                Ops::Insert => {
2513                    patch.length2 += diff.size();
2514                    postpatch =
2515                        [&postpatch[..char_n2], diff.data(), &postpatch[char_n2..]].concat();
2516                    patch.diffs.push(diff.clone());
2517                }
2518                Ops::Delete => {
2519                    patch.length1 += diff.size();
2520                    postpatch =
2521                        [&postpatch[..char_n2], &postpatch[char_n2 + diff.size()..]].concat();
2522
2523                    patch.diffs.push(diff.clone());
2524                }
2525                Ops::Equal => {
2526                    if diff.size() <= 2 * patch_margin
2527                        && !patch.diffs.is_empty()
2528                        && diffs.len() != idx + 1
2529                    {
2530                        // Small equality inside a patch.
2531                        patch.length1 += diff.size();
2532                        patch.length2 += diff.size();
2533
2534                        patch.diffs.push(diff.clone());
2535                    } else if diff.size() >= 2 * patch_margin && !patch.diffs.is_empty() {
2536                        // Time for a new patch.
2537                        self.patch_add_context(&mut patch, &prepatch);
2538                        patches.push(patch.clone());
2539                        patch = Patch::default();
2540
2541                        // Unlike Unidiff, our patch lists have a rolling context.
2542                        // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
2543                        // Update prepatch text & pos to reflect the application of the
2544                        // just completed patch.
2545                        prepatch.clone_from(&postpatch);
2546                        char_n1 = char_n2;
2547                    }
2548                }
2549            }
2550
2551            if diff.op() != Ops::Insert {
2552                char_n1 += diff.size();
2553            }
2554            if diff.op() != Ops::Delete {
2555                char_n2 += diff.size();
2556            }
2557        });
2558
2559        // Pick up the leftover patch if not empty.
2560        if !patch.diffs.is_empty() {
2561            self.patch_add_context(&mut patch, &prepatch);
2562            patches.push(patch);
2563        }
2564
2565        Ok(patches)
2566    }
2567
2568    fn patch_add_context<T: DType>(&self, patch: &mut Patch<T>, text: &[T]) {
2569        if text.is_empty() {
2570            return;
2571        }
2572
2573        let patch_margin = self.patch_margin() as usize;
2574
2575        let mut pattern = &text[patch.start2..patch.start2 + patch.length1];
2576        let mut padding = 0;
2577
2578        // Look for the first and last matches of pattern in text.  If two different
2579        // matches are found, increase the pattern length.
2580        while pattern.is_empty()
2581            || (text.windows(pattern.len()).position(|p| p == pattern)
2582                != text
2583                    .windows(pattern.len())
2584                    .rev()
2585                    .position(|p| p == pattern)
2586                    .map(|p| text.len() - p - pattern.len())
2587                && pattern.len() < self.match_max_bits() - (patch_margin << 1))
2588        {
2589            padding += patch_margin;
2590
2591            let begin = if patch.start2 > padding {
2592                patch.start2 - padding
2593            } else {
2594                0
2595            };
2596            let end = patch.start2 + patch.length1 + padding;
2597
2598            pattern = &text[begin..if end > text.len() { text.len() } else { end }];
2599        }
2600
2601        // Add one chunk for good luck.
2602        padding += patch_margin;
2603
2604        // Add prefix
2605        let begin = if patch.start2 > padding {
2606            patch.start2 - padding
2607        } else {
2608            0
2609        };
2610        let prefix = &text[begin..patch.start2];
2611        if !prefix.is_empty() {
2612            patch.diffs.insert(0, Diff::equal(prefix));
2613        }
2614
2615        // Add the suffix
2616        let begin = patch.start2 + patch.length1;
2617        let end = patch.start2 + patch.length1 + padding;
2618        let suffix = &text[if begin < text.len() {
2619            begin
2620        } else {
2621            text.len()
2622        }..if end < text.len() { end } else { text.len() }];
2623        if !suffix.is_empty() {
2624            patch.diffs.push(Diff::equal(suffix));
2625        }
2626        // Roll back the start points.
2627        patch.start1 -= prefix.len();
2628        patch.start2 -= prefix.len();
2629
2630        // extend the lengths
2631        patch.length1 += prefix.len() + suffix.len();
2632        patch.length2 += prefix.len() + suffix.len();
2633    }
2634
2635    fn patch_apply_internal<T: DType>(
2636        &self,
2637        patches: &Patches<T>,
2638        source: &[T],
2639    ) -> Result<(Vec<T>, Vec<bool>), crate::errors::Error> {
2640        if patches.is_empty() {
2641            return Ok((source.to_vec(), vec![]));
2642        }
2643
2644        let deadline = self.deadline();
2645
2646        // To avoid changes to original patches!!
2647        let mut patches = patches.clone();
2648
2649        let null_pad = self.patch_add_padding(&mut patches);
2650        let mut source = [&null_pad, source, &null_pad].concat();
2651
2652        self.split_max(&mut patches);
2653
2654        // delta keeps track of the offset between the expected and actual location
2655        // of the previous patch.  If there are patches expected at positions 10 and
2656        // 20, but the first patch was found at 12, delta is 2 and the second patch
2657        // has an effective expected position of 22.
2658        let mut delta = 0_isize;
2659        let mut results = vec![false; patches.len()];
2660
2661        // patches.iter().enumerate().for_each(|(x, p)| {
2662        for (x, p) in patches.iter().enumerate() {
2663            let expected_loc = (p.start2 as isize + delta) as usize;
2664            let txt_old = Self::diff_text_old(&p.diffs);
2665            let (start_loc, end_loc) = if txt_old.len() > self.match_max_bits() {
2666                // patch_splitMax will only provide an oversized pattern in the case of
2667                // a monster delete.
2668                if let Some(sl) =
2669                    self.match_internal(&source, &txt_old[..self.match_max_bits()], expected_loc)
2670                {
2671                    let el = self.match_internal(
2672                        &source,
2673                        &txt_old[txt_old.len() - self.match_max_bits()..],
2674                        expected_loc + txt_old.len() - self.match_max_bits(),
2675                    );
2676
2677                    if el.is_none() || Some(sl) >= el {
2678                        // Can't find valid trailing context.  Drop this patch.
2679                        (None, el)
2680                    } else {
2681                        (Some(sl), el)
2682                    }
2683                } else {
2684                    (None, None)
2685                }
2686            } else {
2687                (self.match_internal(&source, &txt_old, expected_loc), None)
2688            };
2689
2690            if let Some(sl) = start_loc {
2691                // Found a match.  :)
2692                results[x] = true;
2693                delta = sl as isize - expected_loc as isize;
2694
2695                let trg_idx = if let Some(el) = end_loc {
2696                    el + self.match_max_bits()
2697                } else {
2698                    sl + txt_old.len()
2699                };
2700
2701                let txt_new = &source[sl..trg_idx.min(source.len())];
2702
2703                if txt_old == txt_new {
2704                    // Perfect match, just shove the replacement text in.
2705                    source = [
2706                        &source[..sl],
2707                        &Self::diff_text_new(&p.diffs),
2708                        &source[sl + txt_old.len()..],
2709                    ]
2710                    .concat();
2711                } else {
2712                    // Imperfect match.  Run a diff to get a framework of equivalent indices.
2713                    let mut diffs = self.diff_internal(&txt_old, txt_new, false, deadline)?;
2714                    if txt_old.len() > self.match_max_bits()
2715                        && (self.diff_levenshtein(&diffs) as f32 / txt_old.len() as f32)
2716                            > self.delete_threshold()
2717                    {
2718                        // The end points match, but the content is unacceptably bad.
2719                        results[x] = false;
2720                    } else {
2721                        Self::cleanup_semantic_lossless(&mut diffs);
2722                        let mut idx1 = 0;
2723                        p.diffs.iter().for_each(|diff| {
2724                            if diff.op() != Ops::Equal {
2725                                let idx2 = Self::x_index(&diffs, idx1);
2726                                if diff.op() == Ops::Insert {
2727                                    // Insertion
2728                                    source =
2729                                        [&source[..sl + idx2], diff.data(), &source[sl + idx2..]]
2730                                            .concat();
2731                                } else if diff.op() == Ops::Delete {
2732                                    // Deletion
2733                                    source = [
2734                                        &source[..sl + idx2],
2735                                        &source[sl + Self::x_index(&diffs, idx1 + diff.size())..],
2736                                    ]
2737                                    .concat();
2738                                }
2739                            }
2740
2741                            if diff.op() != Ops::Delete {
2742                                idx1 += diff.size()
2743                            }
2744                        });
2745                    }
2746                }
2747            } else {
2748                // No match found.  :(
2749                results[x] = false;
2750                // Subtract the delta for this failed patch from subsequent patches.
2751                delta -= (p.length2 - p.length1) as isize;
2752            }
2753        }
2754
2755        // Strip the padding off.
2756        source = source[null_pad.len()..source.len() - null_pad.len()].to_vec();
2757        Ok((source, results))
2758    }
2759
2760    fn patch_add_padding<T: DType>(&self, patches: &mut Patches<T>) -> Vec<T> {
2761        let null_pad = (1..self.patch_margin() + 1)
2762            .filter_map(|c| c.as_char().map(|c| T::from_char(c)))
2763            .collect::<Vec<_>>();
2764
2765        let pad_len = self.patch_margin() as usize;
2766
2767        // Bump all the patches forward.
2768        patches.iter_mut().for_each(|p| {
2769            p.start1 += pad_len;
2770            p.start2 += pad_len;
2771        });
2772
2773        // Add some padding on start of first diff.
2774        if let Some(first_patch) = patches.first_mut() {
2775            let (add_null_pad, pad_len_gt_txt_len) = if let Some(fd) = first_patch.diffs.first() {
2776                (fd.op() != Ops::Equal, pad_len > fd.size())
2777            } else {
2778                (true, false)
2779            };
2780
2781            if add_null_pad {
2782                first_patch.diffs.insert(0, Diff::equal(&null_pad));
2783                first_patch.start1 -= pad_len;
2784                first_patch.start2 -= pad_len;
2785                first_patch.length1 += pad_len;
2786                first_patch.length2 += pad_len;
2787            } else if pad_len_gt_txt_len {
2788                // Grow first equality.
2789                if let Some(fd) = first_patch.diffs.first_mut() {
2790                    let extra_len = pad_len - fd.size();
2791                    fd.1 = [&null_pad[fd.size()..], fd.data()].concat();
2792                    first_patch.start1 -= extra_len;
2793                    first_patch.start2 -= extra_len;
2794                    first_patch.length1 += extra_len;
2795                    first_patch.length2 += extra_len;
2796                }
2797            }
2798        }
2799
2800        // Add some padding on end of last diff.
2801        if let Some(last_patch) = patches.last_mut() {
2802            let (add_null_pad, pad_len_gt_txt_len) = if let Some(ld) = last_patch.diffs.last() {
2803                (ld.op() != Ops::Equal, pad_len > ld.size())
2804            } else {
2805                (true, false)
2806            };
2807
2808            // Add nullPadding equality.
2809            if add_null_pad {
2810                last_patch.diffs.push(Diff::equal(&null_pad));
2811                last_patch.length1 += pad_len;
2812                last_patch.length2 += pad_len;
2813            } else if pad_len_gt_txt_len {
2814                // Grow last equality.
2815                if let Some(ld) = last_patch.diffs.last_mut() {
2816                    let extra_len = pad_len - ld.size();
2817                    ld.1 = [&ld.1[..], &null_pad[..extra_len]].concat();
2818
2819                    last_patch.length1 += extra_len;
2820                    last_patch.length2 += extra_len;
2821                }
2822            }
2823        }
2824
2825        null_pad
2826    }
2827}
2828
2829// Public APIs
2830impl DiffMatchPatch {
2831    /// Create a new instance of the struct with default settings
2832    /// # Example
2833    /// ```
2834    /// use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
2835    ///
2836    /// # fn main() -> Result<(), Error> {
2837    /// let mut dmp = DiffMatchPatch::new();
2838    /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
2839    /// dmp.set_checklines(false);
2840    /// // do the diffing
2841    /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?;
2842    /// # Ok(())
2843    /// # }
2844    /// ```
2845    pub fn new() -> Self {
2846        Self::default()
2847    }
2848
2849    /// Find the differences between two texts (old and new).  Simplifies the problem by stripping any common prefix or suffix off the texts before diffing.
2850    ///
2851    /// Returns:
2852    /// Vec of changes (Diff).
2853    /// # Example
2854    /// ```
2855    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
2856    ///
2857    /// # fn main() -> Result<(), Error> {
2858    /// let mut dmp = DiffMatchPatch::new();
2859    /// // change some settings, e.g. set `line mode` optimization to `false` because you know you have a small text and not many lines
2860    /// dmp.set_checklines(false);
2861    /// // do the diffing
2862    /// let diffs = dmp.diff_main::<Efficient>("Fast enough", "Blazing fast")?;
2863    /// println!("{}", diffs.iter().map(|d| format!("d")).collect::<Vec<_>>().join("\n"));
2864    /// // You should see the following output
2865    /// // (Delete, F)
2866    /// // (Insert, Blazing f)
2867    /// // (Equal, ast)
2868    /// // (Delete,  enough)
2869    /// # Ok(())
2870    /// # }
2871    /// ```
2872    pub fn diff_main<T: DType>(
2873        &self,
2874        old: &str,
2875        new: &str,
2876    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
2877        let old = T::from_str(old);
2878        let new = T::from_str(new);
2879
2880        self.diff_internal(&old, &new, self.checklines(), self.deadline())
2881    }
2882
2883    /// A diff of two unrelated texts can be filled with coincidental matches.
2884    /// For example, the diff of "mouse" and "sofas" is [(-1, "m"), (1, "s"), (0, "o"), (-1, "u"), (1, "fa"), (0, "s"), (-1, "e")].
2885    /// While this is the optimum diff, it is difficult for humans to understand. Semantic cleanup rewrites the diff, expanding it into a more intelligible format.
2886    /// The above example would become: [(-1, "mouse"), (1, "sofas")]. If a diff is to be human-readable, it should be passed to diff_cleanup_semantic.
2887    pub fn diff_cleanup_semantic(diffs: &mut Vec<Diff<u8>>) {
2888        Self::cleanup_semantic(diffs)
2889    }
2890
2891    /// This function is similar to diff_cleanupSemantic, except that instead of optimising a diff to be human-readable, it optimises the diff to be efficient for machine processing.
2892    /// The results of both cleanup types are often the same.
2893    ///
2894    /// The efficiency cleanup is based on the observation that a diff made up of large numbers of small diffs edits may take longer to process (in downstream applications) or take more capacity to store or transmit than a smaller number of larger diffs.
2895    /// The diff_match_patch.Diff_EditCost property sets what the cost of handling a new edit is in terms of handling extra characters in an existing edit.
2896    /// The default value is 4, which means if expanding the length of a diff by three characters can eliminate one edit, then that optimisation will reduce the total costs.
2897    pub fn diff_cleanup_efficiency(&self, diffs: &mut Vec<Diff<u8>>) {
2898        self.cleanup_efficiency(diffs)
2899    }
2900
2901    /// Given a diff, measure its Levenshtein distance in terms of the number of inserted, deleted or substituted characters.
2902    /// The minimum distance is 0 which means equality, the maximum distance is the length of the longer string.
2903    pub fn diff_levenshtein<T: DType>(&self, diffs: &[Diff<T>]) -> usize {
2904        let mut levenshtein = 0;
2905        let mut insert = 0;
2906        let mut delete = 0;
2907
2908        diffs.iter().for_each(|diff| {
2909            match diff.op() {
2910                Ops::Insert => insert += diff.size(),
2911                Ops::Delete => delete += diff.size(),
2912                Ops::Equal => {
2913                    // A deletion and an insertion is one substitution.
2914                    levenshtein += insert.max(delete);
2915                    insert = 0;
2916                    delete = 0;
2917                }
2918            }
2919        });
2920
2921        levenshtein += insert.max(delete);
2922
2923        levenshtein
2924    }
2925
2926    /// Takes a diff array and returns a pretty HTML sequence. This function is mainly intended as an example from which to write ones own display functions.
2927    ///
2928    /// # Example
2929    /// ```
2930    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient, HtmlConfig};
2931    /// # fn main() -> Result<(), Error> {
2932    /// let dmp = DiffMatchPatch::new();
2933    ///
2934    /// let diffs = dmp.diff_main::<Efficient>("The old man and the new house?", "The old man and the old dog!")?;
2935    /// let htmlcfg = HtmlConfig::new();
2936    ///
2937    /// let pretty = dmp.diff_pretty_html(&diffs, &htmlcfg)?;
2938    /// // Should print: "<span>The old man and the </span><del>new house?</del><ins>old dog!</ins>"
2939    /// println!("{pretty}");
2940    ///
2941    /// # Ok(())
2942    /// # }
2943    /// ```
2944    ///
2945    /// Check out [`HtmlConfig`] options for ways to control the generated html.
2946    ///
2947    /// [`HtmlConfig`]: html/struct.HtmlConfig.html
2948    pub fn diff_pretty_html<T: DType>(
2949        &self,
2950        diffs: &[Diff<T>],
2951        html_cfg: &HtmlConfig,
2952    ) -> Result<String, crate::errors::Error> {
2953        let mut diffs = diffs.to_vec();
2954        DiffMatchPatch::cleanup_semantic(&mut diffs);
2955
2956        T::humanize(&mut diffs)?;
2957
2958        let mut is_err = false;
2959        let html = diffs
2960            .iter()
2961            .filter_map(|diff| {
2962                let txt = match T::to_string(diff.data()) {
2963                    Ok(txt) => {
2964                        let mut txt = txt
2965                            .replace("&", "&amp;")
2966                            .replace("<", "&lt;")
2967                            .replace(">", "&gt;");
2968
2969                        if html_cfg.nltobr() {
2970                            txt = txt.replace('\n', "<br>")
2971                        }
2972
2973                        txt
2974                    }
2975                    Err(e) => {
2976                        eprintln!("{e:?}");
2977                        is_err = true;
2978                        "error".to_string()
2979                    }
2980                };
2981
2982                if txt.is_empty() {
2983                    return None;
2984                }
2985
2986                match diff.op() {
2987                    Ops::Insert => Some(format!(
2988                        "<{}{}{}>{txt}</{}>",
2989                        html_cfg.insert_tag(),
2990                        if let Some(cl) = html_cfg.insert_class() {
2991                            format!(" class=\"{cl}\"")
2992                        } else {
2993                            String::new()
2994                        },
2995                        if let Some(st) = html_cfg.insert_style() {
2996                            format!(" style=\"{st}\"")
2997                        } else {
2998                            String::new()
2999                        },
3000                        html_cfg.insert_tag()
3001                    )),
3002                    Ops::Delete => Some(format!(
3003                        "<{}{}{}>{txt}</{}>",
3004                        html_cfg.delete_tag(),
3005                        if let Some(cl) = html_cfg.delete_class() {
3006                            format!(" class=\"{cl}\"")
3007                        } else {
3008                            String::new()
3009                        },
3010                        if let Some(st) = html_cfg.delete_style() {
3011                            format!(" style=\"{st}\"")
3012                        } else {
3013                            String::new()
3014                        },
3015                        html_cfg.delete_tag()
3016                    )),
3017                    Ops::Equal => Some(format!(
3018                        "<{}{}{}>{txt}</{}>",
3019                        html_cfg.equality_tag(),
3020                        if let Some(cl) = html_cfg.equality_class() {
3021                            format!(" class=\"{cl}\"")
3022                        } else {
3023                            String::new()
3024                        },
3025                        if let Some(st) = html_cfg.equality_style() {
3026                            format!(" style=\"{st}\"")
3027                        } else {
3028                            String::new()
3029                        },
3030                        html_cfg.equality_tag()
3031                    )),
3032                }
3033            })
3034            .collect::<Vec<_>>()
3035            .join("");
3036
3037        if !is_err {
3038            Ok(html)
3039        } else {
3040            Err(crate::errors::Error::HtmlWithError(html))
3041        }
3042        // Ok(html)
3043    }
3044
3045    /// Given a location in text1, compute and return the equivalent location in text2.
3046    /// e.g. "The cat" -> "The big cat", 1->1, 4->8
3047    /// # Example
3048    /// ```
3049    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
3050    /// # fn main() -> Result<(), Error> {
3051    /// let dmp = DiffMatchPatch::new();
3052    /// let diffs1 = dmp.diff_main::<Efficient>("The cat", "The big cat").unwrap();
3053    /// assert_eq!(DiffMatchPatch::diff_x_index(&diffs1, 1), 1);
3054    /// assert_eq!(DiffMatchPatch::diff_x_index(&diffs1, 4), 8);
3055    /// let diffs2 = dmp.diff_main::<Efficient>("The big cat", "The cat").unwrap();
3056    /// assert_eq!(DiffMatchPatch::diff_x_index(&diffs2, 1), 1);
3057    /// assert_eq!(DiffMatchPatch::diff_x_index(&diffs2, 9), 5);
3058    /// assert_eq!(DiffMatchPatch::diff_x_index(&diffs2, 6), 4);
3059    /// # Ok(())
3060    /// # }
3061    /// ```
3062    pub fn diff_x_index<D: DType>(diffs: &[Diff<D>], loc: usize) -> usize {
3063        let mut chars1 = 0;
3064        let mut chars2 = 0;
3065        let mut last_chars1 = 0;
3066        let mut last_chars2 = 0;
3067
3068        for diff in diffs {
3069            match diff.op() {
3070                Ops::Delete => {
3071                    chars1 += diff.data().len();
3072                }
3073                Ops::Insert => {
3074                    chars2 += diff.data().len();
3075                }
3076                Ops::Equal => {
3077                    chars1 += diff.data().len();
3078                    chars2 += diff.data().len();
3079                }
3080            }
3081
3082            if chars1 > loc {
3083                // Overshot the location
3084                match diff.op() {
3085                    Ops::Delete => {
3086                        // The location was deleted
3087                        return last_chars2;
3088                    }
3089                    _ => {
3090                        // The location was changed
3091                        return last_chars2 + (loc - last_chars1);
3092                    }
3093                }
3094            }
3095
3096            last_chars1 = chars1;
3097            last_chars2 = chars2;
3098        }
3099
3100        chars2
3101    }
3102
3103    /// Given a text to search, a pattern to search for and an expected location in the text near which to find the pattern, return the location which matches closest.
3104    /// The function will search for the best match based on both the number of character errors between the pattern and the potential match,
3105    /// as well as the distance between the expected location and the potential match.
3106    ///
3107    /// The following example is a classic dilemma. There are two potential matches, one is close to the expected location but contains a one character error,
3108    /// the other is far from the expected location but is exactly the pattern sought after: match_main("abc12345678901234567890abbc", "abc", 26)
3109    /// Which result is returned (0 or 24) is determined by the diff_match_patch.match_distance property.
3110    /// An exact letter match which is 'distance' characters away from the fuzzy location would score as a complete mismatch.
3111    /// For example, a distance of '0' requires the match be at the exact location specified, whereas a threshold of '1000' would require a perfect match to be within 800
3112    /// characters of the expected location to be found using a 0.8 threshold (see below).
3113    /// The larger match_distance is, the slower match_main() may take to compute. This variable defaults to 1000.
3114    ///
3115    /// Another property is `diff_match_patch.match_threshold` which determines the cut-off value for a valid match.
3116    /// If `match_threshold` is closer to 0, the requirements for accuracy increase.
3117    /// If `match_threshold` is closer to 1 then it is more likely that a match will be found.
3118    /// The larger `match_threshold` is, the slower match_main() may take to compute. `match_threshold` defaults to 0.5 and can be updated by `dmp.set_match_threshold()` method.
3119    ///
3120    /// If no match is found, the function returns `None`
3121    ///
3122    /// # Examle
3123    /// ```
3124    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient};
3125    ///
3126    /// # fn main() {
3127    /// let dmp = DiffMatchPatch::new();
3128    /// // Works with both `Compat` and `Efficient` modes
3129    /// let matched = dmp.match_main::<Efficient>(
3130    ///     "I am the very model of a modern major general.",
3131    ///     " that berry ",
3132    ///     5
3133    /// );
3134    ///
3135    /// # assert_eq!(matched, Some(4));
3136    /// # }
3137    /// ```
3138    pub fn match_main<T: DType>(&self, text: &str, pattern: &str, loc: usize) -> Option<usize> {
3139        let text = T::from_str(text);
3140        let pattern = T::from_str(pattern);
3141        self.match_internal(&text, &pattern, loc)
3142    }
3143
3144    /// Given two texts, or an already computed list of differences (`diffs`), return an array of patch objects.
3145    ///
3146    /// # Example
3147    /// ```
3148    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient, PatchInput};
3149    ///
3150    /// # fn main() -> Result<(), Error> {
3151    /// let dmp = DiffMatchPatch::new();
3152    ///
3153    /// // You can also make patches from the old and new string directly
3154    /// let patches = dmp.patch_make::<Efficient>(PatchInput::new_text_text("Apples are a fruit.", "Bananas are also fruit"))?;
3155    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3156    /// assert_eq!("Bananas are also fruit", new_from_old);
3157    ///
3158    /// // Or, create some diffs in `Efficient` or `Compact` mode
3159    /// let diffs = dmp.diff_main::<Efficient>("Apples are a fruit.", "Bananas are also fruit")?;
3160    /// // Now, lets convert the diffs to a bunch of patches - you can use an existing set of diffs to create patches
3161    /// let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
3162    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3163    /// assert_eq!("Bananas are also fruit", new_from_old);
3164    ///
3165    /// // Or, from the source texts and diffs
3166    /// let patches = dmp.patch_make(PatchInput::new_text_diffs("Apples are a fruit.", &diffs))?;
3167    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3168    /// assert_eq!("Bananas are also fruit", new_from_old);
3169    /// # Ok(())
3170    /// # }
3171    /// ```
3172    ///
3173    /// The [`PatchInput::new_text_diffs`] method is preferred, use it if you happen to have that data available, otherwise this function will compute the missing pieces.
3174    ///
3175    ///
3176    pub fn patch_make<T: DType>(
3177        &self,
3178        input: PatchInput<T>,
3179    ) -> Result<Patches<T>, crate::errors::Error> {
3180        let mut diff_input;
3181        let txt_old;
3182        let (txt, diffs) = match input {
3183            // No diffs provided, lets make our own
3184            PatchInput::Texts(txt1, txt2) => {
3185                diff_input = self.diff_main(txt1, txt2)?;
3186                if diff_input.len() > 2 {
3187                    Self::cleanup_semantic(&mut diff_input);
3188                    self.cleanup_efficiency(&mut diff_input);
3189                }
3190
3191                (T::from_str(txt1), &diff_input[..])
3192            }
3193            PatchInput::Diffs(diffs) => {
3194                // No origin string provided, compute our own.
3195
3196                (Self::diff_text_old(diffs), diffs)
3197            }
3198            PatchInput::TextDiffs(txt, diffs) => {
3199                txt_old = T::from_str(txt);
3200                (txt_old, diffs)
3201            }
3202        };
3203
3204        self.patch_make_internal(&txt, diffs)
3205    }
3206
3207    /// Reduces an array of patch objects to a block of text which looks extremely similar to the standard GNU diff/patch format. This text may be stored or transmitted.
3208    ///
3209    /// # Example
3210    ///
3211    /// ```
3212    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Compat, PatchInput};
3213    ///
3214    /// # fn main() -> Result<(), Error> {
3215    /// let dmp = DiffMatchPatch::new();
3216    ///
3217    /// // Making patches from source and edited text - both in `Efficient` and `Compat` mode
3218    /// let patches = dmp.patch_make::<Compat>(PatchInput::new_text_text("Apples are fruit!", "Bananas are also fruit!"))?;
3219    /// let patch_to_text = dmp.patch_to_text(&patches);
3220    ///
3221    /// // Prints patches in GNU diff/ patch format
3222    /// // You can use this format for transmission and/ or storage.
3223    /// println!("{patch_to_text}");
3224    ///
3225    /// # Ok(())
3226    /// # }
3227    /// ```
3228    ///
3229    /// Check out the [`diff_to_delta`] and [`diff_from_delta`] methods for a more compact way of representing diffs.
3230    ///
3231    /// [`diff_to_delta`]: ./method.diff_to_delta
3232    /// [`diff_from_delta`]: ./method.diff_from_delta
3233    ///
3234    pub fn patch_to_text<T: DType>(&self, patches: &Patches<T>) -> String {
3235        patches.iter().map(|p| p.to_string()).collect::<String>()
3236    }
3237
3238    /// Parses a block of text (which was presumably created by the [`patch_to_text`] method) and returns an array of patch objects.
3239    ///
3240    /// # Example
3241    ///
3242    /// ```
3243    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Compat, PatchInput};
3244    ///
3245    /// # fn main() -> Result<(), Error> {
3246    /// let dmp = DiffMatchPatch::new();
3247    ///
3248    /// // Making patches from source and edited text - both in `Efficient` and `Compat` mode
3249    /// let patches = dmp.patch_make::<Compat>(PatchInput::new_text_text("Apples are fruit!", "Bananas are also fruit!"))?;
3250    /// let patch_to_text = dmp.patch_to_text(&patches);
3251    ///
3252    /// // let's create patches back from text
3253    /// let patches_recreated = dmp.patch_from_text::<Compat>(&patch_to_text)?;
3254    ///
3255    /// // Now you can `patch_apply` the `patches_recreated` to your source text
3256    ///
3257    /// # Ok(())
3258    /// # }
3259    /// ```
3260    ///
3261    /// [`patch_to_text`]: ./method.patch_to_text
3262    pub fn patch_from_text<T: DType>(&self, text: &str) -> Result<Patches<T>, Error> {
3263        if text.is_empty() {
3264            return Ok(vec![]);
3265        }
3266
3267        let txt_t = T::from_str(text);
3268        let mut text = txt_t
3269            .split(|&p| p == T::from_char('\n'))
3270            .collect::<Vec<_>>();
3271
3272        let mut patches = vec![];
3273
3274        while let Some(&t) = text.first() {
3275            let (old_line, old_cols, new_line, new_cols) =
3276                if let Some(p) = Self::parse_patch_header(t) {
3277                    p
3278                } else {
3279                    return Err(Error::InvalidInput);
3280                };
3281
3282            let mut patch = Patch {
3283                start1: old_line,
3284                start2: new_line,
3285                ..Default::default()
3286            };
3287
3288            if let Some(old_cols) = old_cols {
3289                if old_cols != 0 {
3290                    patch.start1 -= 1;
3291                    patch.length1 = old_cols;
3292                }
3293            } else {
3294                patch.start1 -= 1;
3295                patch.length1 = 1;
3296            }
3297
3298            if let Some(new_cols) = new_cols {
3299                if new_cols != 0 {
3300                    patch.start2 -= 1;
3301                    patch.length2 = new_cols;
3302                }
3303            } else {
3304                patch.start2 -= 1;
3305                patch.length2 = 1;
3306            }
3307
3308            text.remove(0);
3309
3310            while !text.is_empty() {
3311                let txt = if let Some(&s) = text.first() {
3312                    if !s.is_empty() {
3313                        s
3314                    } else {
3315                        text.remove(0);
3316                        continue;
3317                    }
3318                } else {
3319                    text.remove(0);
3320                    continue;
3321                };
3322
3323                // Should never panic, already checked for `empty`
3324                let &sign = txt.first().unwrap();
3325
3326                let line = T::percent_decode(&txt[1..]);
3327
3328                if sign == T::from_char('-') {
3329                    patch.diffs.push(Diff::delete(&line));
3330                } else if sign == T::from_char('+') {
3331                    patch.diffs.push(Diff::insert(&line));
3332                } else if sign == T::from_char(' ') {
3333                    patch.diffs.push(Diff::equal(&line));
3334                } else if sign == T::from_char('@') {
3335                    // next patch, break
3336                    break;
3337                } else {
3338                    return Err(Error::InvalidInput);
3339                }
3340
3341                text.remove(0);
3342            }
3343
3344            patches.push(patch);
3345        }
3346
3347        Ok(patches)
3348    }
3349
3350    /// Crush the diff into an encoded string which describes the operations required to transform text_old into text_new.
3351    /// E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
3352    /// Operations are tab-separated. Inserted text is escaped using %xx notation.
3353    ///
3354    /// # Example
3355    ///
3356    /// ```
3357    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Compat, PatchInput};
3358    /// # fn main() -> Result<(), Error> {
3359    ///
3360    /// let dmp = DiffMatchPatch::new();
3361    ///
3362    /// // let's create some diffs
3363    /// let diffs = dmp.diff_main::<Compat>("The old house and the new dog!", "The old man and the new dog!")?;
3364    /// // now, you can create a `delta` string which can be used to re-create the diffs
3365    /// let delta = dmp.diff_to_delta(&diffs)?;
3366    /// println!("{delta:?}");
3367    /// // You should see something like the following
3368    /// // "=8\t-5\t+man\t=17"
3369    ///
3370    /// // now you can use the `diff_from_delta()` to recover the diffs
3371    /// let diffs_later = dmp.diff_from_delta::<Compat>("The old house and the new dog!", &delta);
3372    ///
3373    /// # Ok(())
3374    /// # }
3375    /// ```
3376    pub fn diff_to_delta<T: DType>(&self, diffs: &[Diff<T>]) -> Result<String, crate::Error> {
3377        let mut data = diffs
3378            .iter()
3379            .map(|diff| match diff.op() {
3380                Ops::Insert => {
3381                    let encoded = T::percent_encode(diff.data());
3382                    [&[T::from_char('+')], &encoded[..], &[T::from_char('\t')]].concat()
3383                }
3384                Ops::Delete => [
3385                    &[T::from_char('-')],
3386                    &T::from_str(diff.size().to_string().as_str())[..],
3387                    &[T::from_char('\t')],
3388                ]
3389                .concat(),
3390                Ops::Equal => [
3391                    &[T::from_char('=')],
3392                    &T::from_str(diff.size().to_string().as_str())[..],
3393                    &[T::from_char('\t')],
3394                ]
3395                .concat(),
3396            })
3397            .collect::<Vec<_>>()
3398            .concat();
3399
3400        data.pop();
3401
3402        T::to_string(&data)
3403    }
3404
3405    /// Given the original text `old`, and an encoded string which describes the
3406    /// operations required to transform text1 into text2, compute the full diff.
3407    ///
3408    /// # Example
3409    ///
3410    /// ```
3411    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Compat, PatchInput};
3412    /// # fn main() -> Result<(), Error> {
3413    ///
3414    /// let dmp = DiffMatchPatch::new();
3415    ///
3416    /// // let's create some diffs
3417    /// let diffs = dmp.diff_main::<Compat>("The old house and the new dog!", "The old man and the new dog!")?;
3418    /// // now, you can create a `delta` string which can be used to re-create the diffs
3419    /// let delta = dmp.diff_to_delta(&diffs)?;
3420    /// println!("{delta:?}");
3421    /// // You should see something like the following
3422    /// // "=8\t-5\t+man\t=17"
3423    ///
3424    /// // now you can use the `diff_from_delta()` to recover the diffs
3425    /// let diffs_later = dmp.diff_from_delta::<Compat>("The old house and the new dog!", &delta);
3426    /// // Now, you can use these diffs to apply patches to the source text
3427    /// let patches = dmp.patch_make(PatchInput::new_text_diffs("The old house and the new dog!", &diffs))?;
3428    /// let (new_from_old, _) = dmp.patch_apply(&patches, "The old house and the new dog!")?;
3429    ///
3430    ///
3431    /// assert_eq!("The old man and the new dog!", &new_from_old);
3432    ///
3433    /// # Ok(())
3434    /// # }
3435    /// ```
3436    pub fn diff_from_delta<T: DType>(
3437        &self,
3438        old: &str,
3439        delta: &str,
3440    ) -> Result<Vec<Diff<T>>, crate::errors::Error> {
3441        let mut pointer = 0; // cursor to text
3442        let mut diffs = vec![];
3443
3444        let old = T::from_str(old);
3445        let delta = T::from_str(delta);
3446
3447        for token in delta.split(|&k| k == T::from_char('\t')) {
3448            if token.is_empty() {
3449                continue;
3450            }
3451
3452            // Each token begins with a one character parameter which specifies the
3453            // operation of this token (delete, insert, equality).
3454            let opcode = token.first();
3455            let param = &token[1..];
3456
3457            if opcode == Some(&T::from_char('+')) {
3458                let param = T::percent_decode(param);
3459                diffs.push(Diff::insert(&param));
3460            } else if opcode == Some(&T::from_char('-')) || opcode == Some(&T::from_char('=')) {
3461                let n = T::to_string(param)?
3462                    .parse::<isize>()
3463                    .map_err(|_| Error::Utf8Error)?;
3464                if n < 0 {
3465                    return Err(crate::errors::Error::InvalidInput);
3466                }
3467
3468                let n = n as usize;
3469                let new_pointer = pointer + n;
3470                if new_pointer > old.len() {
3471                    return Err(crate::errors::Error::InvalidInput);
3472                }
3473
3474                let txt = &old[pointer..new_pointer];
3475                pointer = new_pointer;
3476
3477                if opcode == Some(&T::from_char('=')) {
3478                    diffs.push(Diff::equal(txt))
3479                } else {
3480                    diffs.push(Diff::delete(txt))
3481                }
3482            } else {
3483                return Err(crate::errors::Error::InvalidInput);
3484            }
3485        }
3486
3487        if pointer != old.len() {
3488            return Err(crate::errors::Error::InvalidInput);
3489        }
3490
3491        Ok(diffs)
3492    }
3493
3494    /// Applies a list of patches to `source_txt`. The first element of the return value is the newly patched text.
3495    /// The second element is an array of true/false values indicating which of the patches were successfully applied.
3496    /// [Note that this second element is not too useful since large patches may get broken up internally, resulting in a longer results list than the input with no way to figure out which patch succeeded or failed.
3497    /// A more informative API is in development.]
3498    ///
3499    /// The `match_distance` and `match_threshold` properties are used to evaluate patch application on text which does not match exactly.
3500    /// In addition, the `DiffMatchPatch.delete_threshold` property determines how closely the text within a major (~64 character) delete needs to match the expected text.
3501    /// If `delete_threshold` is closer to 0, then the deleted text must match the expected text more closely.
3502    /// If `delete_threshold` is closer to 1, then the deleted text may contain anything.
3503    /// In most use cases `delete_threshold` should just be set to the same value as `match_threshold`. Both values default to `0.5`
3504    ///
3505    /// # Example
3506    /// # Example
3507    /// ```
3508    /// # use diff_match_patch_rs::{DiffMatchPatch, Error, Efficient, PatchInput};
3509    ///
3510    /// # fn main() -> Result<(), Error> {
3511    /// let dmp = DiffMatchPatch::new();
3512    ///
3513    /// // You can also make patches from the old and new string directly
3514    /// let patches = dmp.patch_make::<Efficient>(PatchInput::new_text_text("Apples are a fruit.", "Bananas are also fruit"))?;
3515    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3516    /// assert_eq!("Bananas are also fruit", new_from_old);
3517    ///
3518    /// // Or, create some diffs in `Efficient` or `Compact` mode
3519    /// let diffs = dmp.diff_main::<Efficient>("Apples are a fruit.", "Bananas are also fruit")?;
3520    /// // Now, lets convert the diffs to a bunch of patches - you can use an existing set of diffs to create patches
3521    /// let patches = dmp.patch_make(PatchInput::new_diffs(&diffs))?;
3522    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3523    /// assert_eq!("Bananas are also fruit", new_from_old);
3524    ///
3525    /// // Or, from the source texts and diffs
3526    /// let patches = dmp.patch_make(PatchInput::new_text_diffs("Apples are a fruit.", &diffs))?;
3527    /// let (new_from_old, _) = dmp.patch_apply(&patches, "Apples are a fruit.")?;
3528    /// assert_eq!("Bananas are also fruit", new_from_old);
3529    /// # Ok(())
3530    /// # }
3531    /// ```
3532    pub fn patch_apply<T: DType>(
3533        &self,
3534        patches: &Patches<T>,
3535        source_txt: &str,
3536    ) -> Result<(String, Vec<bool>), crate::errors::Error> {
3537        let (str_data, results) = self.patch_apply_internal(patches, &T::from_str(source_txt))?;
3538
3539        Ok((
3540            T::to_string(&str_data).map_err(|_| crate::errors::Error::Utf8Error)?,
3541            results,
3542        ))
3543    }
3544}
3545
3546#[cfg(test)]
3547mod tests {
3548    use std::collections::HashMap;
3549
3550    use crate::{
3551        dmp::{Diff, HalfMatch, LineToChars},
3552        Compat, DiffMatchPatch, Efficient, Error, Patch, PatchInput,
3553    };
3554
3555    #[test]
3556    fn test_prefix() {
3557        // Detect any common prefix.
3558        // Null case.
3559        assert_eq!(
3560            0,
3561            DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), false)
3562        );
3563        assert_eq!(
3564            0,
3565            DiffMatchPatch::common_prefix(
3566                &"abc".chars().collect::<Vec<_>>()[..],
3567                &"xyz".chars().collect::<Vec<_>>()[..],
3568                false
3569            )
3570        );
3571
3572        // Non-null case.
3573        assert_eq!(
3574            4,
3575            DiffMatchPatch::common_prefix("1234abcdef".as_bytes(), "1234xyz".as_bytes(), false)
3576        );
3577        assert_eq!(
3578            4,
3579            DiffMatchPatch::common_prefix(
3580                &"1234abcdef".chars().collect::<Vec<_>>()[..],
3581                &"1234xyz".chars().collect::<Vec<_>>()[..],
3582                false
3583            )
3584        );
3585
3586        // Whole case.
3587        assert_eq!(
3588            4,
3589            DiffMatchPatch::common_prefix("1234".as_bytes(), "1234xyz".as_bytes(), false)
3590        );
3591        assert_eq!(
3592            4,
3593            DiffMatchPatch::common_prefix(
3594                &"1234".chars().collect::<Vec<_>>()[..],
3595                &"1234xyz".chars().collect::<Vec<_>>()[..],
3596                false
3597            )
3598        );
3599
3600        // Unicode - difference between `u8`(Efficient) & `char`(Compat)
3601        // first 3 bytes same
3602        assert_eq!(
3603            3,
3604            DiffMatchPatch::common_prefix("🤪".as_bytes(), "🤔".as_bytes(), false)
3605        );
3606        assert_eq!(
3607            0,
3608            DiffMatchPatch::common_prefix(
3609                &"🤪".chars().collect::<Vec<_>>()[..],
3610                &"🤔".chars().collect::<Vec<_>>()[..],
3611                false
3612            )
3613        );
3614        // first 2 bytes same
3615        assert_eq!(
3616            2,
3617            DiffMatchPatch::common_prefix("🍊".as_bytes(), "🌊".as_bytes(), false)
3618        );
3619        assert_eq!(
3620            0,
3621            DiffMatchPatch::common_prefix(
3622                &"🍊".chars().collect::<Vec<_>>()[..],
3623                &"🌊".chars().collect::<Vec<_>>()[..],
3624                false
3625            )
3626        );
3627    }
3628
3629    #[test]
3630    fn test_suffix() {
3631        // Detect any common suffix.
3632        // Null case.
3633        assert_eq!(
3634            0,
3635            DiffMatchPatch::common_prefix("abc".as_bytes(), "xyz".as_bytes(), true)
3636        );
3637        assert_eq!(
3638            0,
3639            DiffMatchPatch::common_prefix(
3640                &"abc".chars().collect::<Vec<_>>()[..],
3641                &"xyz".chars().collect::<Vec<_>>()[..],
3642                true
3643            )
3644        );
3645
3646        // Non-null case.
3647        assert_eq!(
3648            4,
3649            DiffMatchPatch::common_prefix("abcdef1234".as_bytes(), "xyz1234".as_bytes(), true)
3650        );
3651        assert_eq!(
3652            4,
3653            DiffMatchPatch::common_prefix(
3654                &"abcdef1234".chars().collect::<Vec<_>>()[..],
3655                &"xyz1234".chars().collect::<Vec<_>>()[..],
3656                true
3657            )
3658        );
3659
3660        // Whole case.
3661        assert_eq!(
3662            4,
3663            DiffMatchPatch::common_prefix("1234".as_bytes(), "xyz1234".as_bytes(), true)
3664        );
3665        assert_eq!(
3666            4,
3667            DiffMatchPatch::common_prefix(
3668                &"1234".chars().collect::<Vec<_>>()[..],
3669                &"xyz1234".chars().collect::<Vec<_>>()[..],
3670                true
3671            )
3672        );
3673
3674        // Unicode - difference between `u8`(Efficient) & `char`(Compat)
3675        // No common suffix
3676        assert_eq!(
3677            0,
3678            DiffMatchPatch::common_prefix("🍌".as_bytes(), "🌍".as_bytes(), true)
3679        );
3680        assert_eq!(
3681            0,
3682            DiffMatchPatch::common_prefix(
3683                &"🍌".chars().collect::<Vec<_>>()[..],
3684                &"🌍".chars().collect::<Vec<_>>()[..],
3685                true
3686            )
3687        );
3688        // Last bytes same
3689        assert_eq!(
3690            1,
3691            DiffMatchPatch::common_prefix("🍊".as_bytes(), "🌊".as_bytes(), true)
3692        );
3693        assert_eq!(
3694            0,
3695            DiffMatchPatch::common_prefix(
3696                &"🍊".chars().collect::<Vec<_>>()[..],
3697                &"🌊".chars().collect::<Vec<_>>()[..],
3698                true
3699            )
3700        );
3701        assert_eq!(
3702            6,
3703            DiffMatchPatch::common_prefix("🍊 nah!".as_bytes(), "🌊 nah!".as_bytes(), true)
3704        );
3705        assert_eq!(
3706            5,
3707            DiffMatchPatch::common_prefix(
3708                &"🍊 nah!".chars().collect::<Vec<_>>()[..],
3709                &"🌊 nah!".chars().collect::<Vec<_>>()[..],
3710                true
3711            )
3712        );
3713    }
3714
3715    #[test]
3716    fn test_diff_lines_to_chars() {
3717        // Convert lines down to characters.
3718        assert_eq!(
3719            LineToChars {
3720                chars_old: vec![0_usize, 1, 0],
3721                chars_new: vec![1_usize, 0, 1],
3722                lines: vec![b"alpha\n", b"beta\n"]
3723            },
3724            DiffMatchPatch::lines_to_chars(b"alpha\nbeta\nalpha\n", b"beta\nalpha\nbeta\n")
3725        );
3726        assert_eq!(
3727            LineToChars {
3728                chars_old: vec![0_usize, 1, 0],
3729                chars_new: vec![1_usize, 0, 1],
3730                lines: vec![
3731                    &"alpha\n".chars().collect::<Vec<_>>()[..],
3732                    &"beta\n".chars().collect::<Vec<_>>()[..]
3733                ]
3734            },
3735            DiffMatchPatch::lines_to_chars(
3736                &"alpha\nbeta\nalpha\n".chars().collect::<Vec<_>>()[..],
3737                &"beta\nalpha\nbeta\n".chars().collect::<Vec<_>>()[..]
3738            )
3739        );
3740        assert_eq!(
3741            LineToChars {
3742                chars_old: vec![],
3743                chars_new: vec![0_usize, 1, 2, 2],
3744                lines: vec![b"alpha\r\n", b"beta\r\n", b"\r\n"]
3745            },
3746            DiffMatchPatch::lines_to_chars(b"", b"alpha\r\nbeta\r\n\r\n\r\n")
3747        );
3748        assert_eq!(
3749            LineToChars {
3750                chars_old: vec![],
3751                chars_new: vec![0_usize, 1, 2, 2],
3752                lines: vec![
3753                    &"alpha\r\n".chars().collect::<Vec<_>>()[..],
3754                    &"beta\r\n".chars().collect::<Vec<_>>()[..],
3755                    &"\r\n".chars().collect::<Vec<_>>()[..]
3756                ]
3757            },
3758            DiffMatchPatch::lines_to_chars(
3759                &[],
3760                &"alpha\r\nbeta\r\n\r\n\r\n".chars().collect::<Vec<_>>()[..]
3761            )
3762        );
3763        assert_eq!(
3764            LineToChars {
3765                chars_old: vec![0_usize],
3766                chars_new: vec![1_usize],
3767                lines: vec![b"a", b"b"]
3768            },
3769            DiffMatchPatch::lines_to_chars(b"a", b"b")
3770        );
3771        assert_eq!(
3772            LineToChars {
3773                chars_old: vec![0_usize],
3774                chars_new: vec![1_usize],
3775                lines: vec![&['a'], &['b']]
3776            },
3777            DiffMatchPatch::lines_to_chars(&['a'], &['b'])
3778        );
3779
3780        // More than 256 to reveal any 8-bit limitations.
3781        const TLIMIT: usize = 300;
3782        let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
3783        let linechars = (0..TLIMIT)
3784            .map(|i| format!("{i}\n").chars().collect::<Vec<_>>())
3785            .collect::<Vec<_>>();
3786        let linelist_u8: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
3787        let charlist = (0..TLIMIT).collect::<Vec<_>>();
3788        let linestring = linestr.join("");
3789        let res = DiffMatchPatch::lines_to_chars(linestring.as_bytes(), b"");
3790        let src = LineToChars {
3791            chars_old: charlist.clone(),
3792            chars_new: vec![],
3793            lines: linelist_u8,
3794        };
3795        assert_eq!(res.chars_new, src.chars_new);
3796        assert_eq!(res.lines, src.lines);
3797        assert_eq!(res.chars_old, src.chars_old);
3798
3799        let mut linelist_ch: Vec<&[char]> = vec![];
3800        for lc in linechars.iter() {
3801            linelist_ch.push(&lc[..]);
3802        }
3803
3804        let linestringchars = linestring.chars().collect::<Vec<_>>();
3805        let res = DiffMatchPatch::lines_to_chars(&linestringchars[..], &[]);
3806        let src = LineToChars {
3807            chars_old: charlist,
3808            chars_new: vec![],
3809            lines: linelist_ch,
3810        };
3811        assert_eq!(res.chars_new, src.chars_new);
3812        assert_eq!(res.lines, src.lines);
3813        assert_eq!(res.chars_old, src.chars_old);
3814    }
3815
3816    #[test]
3817    fn test_diff_chars_to_lines() {
3818        // Convert chars up to lines.
3819        let d1 = [0_usize, 1, 0].to_vec();
3820        let d2 = [1_usize, 0, 1].to_vec();
3821        let diffs = [Diff::equal(&d1), Diff::insert(&d2)];
3822
3823        let diffs = DiffMatchPatch::chars_to_lines(&diffs, &[b"alpha\n", b"beta\n"]);
3824
3825        assert_eq!(
3826            vec![
3827                Diff::equal(b"alpha\nbeta\nalpha\n"),
3828                Diff::insert(b"beta\nalpha\nbeta\n")
3829            ],
3830            diffs
3831        );
3832
3833        let diffs = [Diff::equal(&d1), Diff::insert(&d2)];
3834        let diffs = DiffMatchPatch::chars_to_lines(
3835            &diffs,
3836            &[
3837                &"alpha\n".chars().collect::<Vec<_>>()[..],
3838                &"beta\n".chars().collect::<Vec<_>>()[..],
3839            ],
3840        );
3841
3842        assert_eq!(
3843            vec![
3844                Diff::equal(&"alpha\nbeta\nalpha\n".chars().collect::<Vec<_>>()[..]),
3845                Diff::insert(&"beta\nalpha\nbeta\n".chars().collect::<Vec<_>>()[..])
3846            ],
3847            diffs
3848        );
3849
3850        // More than 256 to reveal any 8-bit limitations.
3851        const TLIMIT: usize = 300;
3852
3853        let charlist = (0..TLIMIT).collect::<Vec<_>>();
3854        let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
3855        let linelist: Vec<&[u8]> = (0..TLIMIT).map(|i| linestr[i].as_bytes()).collect();
3856
3857        let diffs = [Diff::delete(&charlist)];
3858        let diffs = DiffMatchPatch::chars_to_lines(&diffs, &linelist[..]);
3859
3860        assert_eq!(vec![Diff::delete(linestr.join("").as_bytes())], diffs);
3861
3862        let charlist = (0..TLIMIT).collect::<Vec<_>>();
3863        let linestr = (0..TLIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
3864        let linelist: Vec<Vec<char>> = (0..TLIMIT)
3865            .map(|i| linestr[i].chars().collect::<Vec<_>>())
3866            .collect();
3867
3868        let diffs = [Diff::delete(&charlist)];
3869        let mut linelistchars = vec![];
3870        for ll in linelist.iter() {
3871            linelistchars.push(&ll[..]);
3872        }
3873        let diffs = DiffMatchPatch::chars_to_lines(&diffs, &linelistchars);
3874
3875        assert_eq!(
3876            vec![Diff::delete(
3877                &linestr.join("").chars().collect::<Vec<_>>()[..]
3878            )],
3879            diffs
3880        );
3881
3882        // More than 65536 to verify any 16-bit limitation.
3883        const ULIMIT: usize = 10;
3884        let linestr = (0..ULIMIT).map(|i| format!("{i}\n")).collect::<Vec<_>>();
3885        let lines = linestr.join("");
3886        let l2c = DiffMatchPatch::lines_to_chars(lines.as_bytes(), b"");
3887
3888        let diffs = [Diff::insert(&l2c.chars_old)];
3889        let diffs = DiffMatchPatch::chars_to_lines(&diffs, &l2c.lines);
3890
3891        assert_eq!(lines.as_bytes(), diffs[0].data());
3892
3893        let lchars = lines.chars().collect::<Vec<_>>();
3894        let l2c = DiffMatchPatch::lines_to_chars(&lchars[..], &[]);
3895
3896        let diffs = [Diff::insert(&l2c.chars_old)];
3897        let diffs = DiffMatchPatch::chars_to_lines(&diffs, &l2c.lines);
3898
3899        assert_eq!(&lines.chars().collect::<Vec<_>>()[..], diffs[0].data());
3900    }
3901
3902    #[test]
3903    fn test_diff_cleanup_merge() {
3904        // Cleanup a messy diff.
3905        // Null case.
3906        let mut diffs = vec![];
3907        let test: Vec<Diff<Efficient>> = vec![];
3908        DiffMatchPatch::cleanup_merge(&mut diffs);
3909        assert_eq!(test, diffs);
3910
3911        let mut diffs = vec![];
3912        let test: Vec<Diff<Compat>> = vec![];
3913        DiffMatchPatch::cleanup_merge(&mut diffs);
3914        assert_eq!(test, diffs);
3915
3916        // No change case
3917        let mut diffs = vec![Diff::equal(b"a"), Diff::delete(b"b"), Diff::insert(b"c")];
3918        let test = vec![Diff::equal(b"a"), Diff::delete(b"b"), Diff::insert(b"c")];
3919        DiffMatchPatch::cleanup_merge(&mut diffs);
3920        assert_eq!(test, diffs);
3921
3922        let mut diffs = vec![
3923            Diff::equal(&['a']),
3924            Diff::delete(&['b']),
3925            Diff::insert(&['c']),
3926        ];
3927        let test = vec![
3928            Diff::equal(&['a']),
3929            Diff::delete(&['b']),
3930            Diff::insert(&['c']),
3931        ];
3932        DiffMatchPatch::cleanup_merge(&mut diffs);
3933        assert_eq!(test, diffs);
3934
3935        // Merge equalities.
3936        let mut diffs = vec![Diff::equal(b"a"), Diff::equal(b"b"), Diff::equal(b"c")];
3937        let test = vec![Diff::equal(b"abc")];
3938        DiffMatchPatch::cleanup_merge(&mut diffs);
3939        assert_eq!(test, diffs);
3940
3941        let mut diffs = vec![
3942            Diff::equal(&['a']),
3943            Diff::equal(&['b']),
3944            Diff::equal(&['c']),
3945        ];
3946        let test = vec![Diff::equal(&['a', 'b', 'c'])];
3947        DiffMatchPatch::cleanup_merge(&mut diffs);
3948        assert_eq!(test, diffs);
3949
3950        // Merge deletions.
3951        let mut diffs = vec![Diff::delete(b"a"), Diff::delete(b"b"), Diff::delete(b"c")];
3952        let test = vec![Diff::delete(b"abc")];
3953        DiffMatchPatch::cleanup_merge(&mut diffs);
3954        assert_eq!(test, diffs);
3955
3956        let mut diffs = vec![
3957            Diff::delete(&['a']),
3958            Diff::delete(&['b']),
3959            Diff::delete(&['c']),
3960        ];
3961        let test = vec![Diff::delete(&['a', 'b', 'c'])];
3962        DiffMatchPatch::cleanup_merge(&mut diffs);
3963        assert_eq!(test, diffs);
3964
3965        // Merge insertions.
3966        let mut diffs = vec![Diff::insert(b"a"), Diff::insert(b"b"), Diff::insert(b"c")];
3967        let test = vec![Diff::insert(b"abc")];
3968        DiffMatchPatch::cleanup_merge(&mut diffs);
3969        assert_eq!(test, diffs);
3970
3971        let mut diffs = vec![
3972            Diff::insert(&['a']),
3973            Diff::insert(&['b']),
3974            Diff::insert(&['c']),
3975        ];
3976        let test = vec![Diff::insert(&['a', 'b', 'c'])];
3977        DiffMatchPatch::cleanup_merge(&mut diffs);
3978        assert_eq!(test, diffs);
3979
3980        // Merge interweave.
3981        let mut diffs = vec![
3982            Diff::delete(b"a"),
3983            Diff::insert(b"b"),
3984            Diff::delete(b"c"),
3985            Diff::insert(b"d"),
3986            Diff::equal(b"e"),
3987            Diff::equal(b"f"),
3988        ];
3989        let test = vec![Diff::delete(b"ac"), Diff::insert(b"bd"), Diff::equal(b"ef")];
3990        DiffMatchPatch::cleanup_merge(&mut diffs);
3991        assert_eq!(test, diffs);
3992
3993        let mut diffs = vec![
3994            Diff::delete(&['a']),
3995            Diff::insert(&['b']),
3996            Diff::delete(&['c']),
3997            Diff::insert(&['d']),
3998            Diff::equal(&['e']),
3999            Diff::equal(&['f']),
4000        ];
4001        let test = vec![
4002            Diff::delete(&['a', 'c']),
4003            Diff::insert(&['b', 'd']),
4004            Diff::equal(&['e', 'f']),
4005        ];
4006        DiffMatchPatch::cleanup_merge(&mut diffs);
4007        assert_eq!(test, diffs);
4008
4009        // Prefix and suffix detection.
4010        let mut diffs = vec![
4011            Diff::delete(b"a"),
4012            Diff::insert(b"abc"),
4013            Diff::delete(b"dc"),
4014        ];
4015        let test = vec![
4016            Diff::equal(b"a"),
4017            Diff::delete(b"d"),
4018            Diff::insert(b"b"),
4019            Diff::equal(b"c"),
4020        ];
4021        DiffMatchPatch::cleanup_merge(&mut diffs);
4022        assert_eq!(test, diffs);
4023
4024        let mut diffs = vec![
4025            Diff::delete(&['a']),
4026            Diff::insert(&['a', 'b', 'c']),
4027            Diff::delete(&['d', 'c']),
4028        ];
4029        let test = vec![
4030            Diff::equal(&['a']),
4031            Diff::delete(&['d']),
4032            Diff::insert(&['b']),
4033            Diff::equal(&['c']),
4034        ];
4035        DiffMatchPatch::cleanup_merge(&mut diffs);
4036        assert_eq!(test, diffs);
4037
4038        // Prefix and suffix detection with equalities.
4039        let mut diffs = vec![
4040            Diff::equal(b"x"),
4041            Diff::delete(b"a"),
4042            Diff::insert(b"abc"),
4043            Diff::delete(b"dc"),
4044            Diff::equal(b"y"),
4045        ];
4046        let test = vec![
4047            Diff::equal(b"xa"),
4048            Diff::delete(b"d"),
4049            Diff::insert(b"b"),
4050            Diff::equal(b"cy"),
4051        ];
4052        DiffMatchPatch::cleanup_merge(&mut diffs);
4053        assert_eq!(test, diffs);
4054        let mut diffs = vec![
4055            Diff::equal(&['x']),
4056            Diff::delete(&['a']),
4057            Diff::insert(&['a', 'b', 'c']),
4058            Diff::delete(&['d', 'c']),
4059            Diff::equal(&['y']),
4060        ];
4061        let test = vec![
4062            Diff::equal(&['x', 'a']),
4063            Diff::delete(&['d']),
4064            Diff::insert(&['b']),
4065            Diff::equal(&['c', 'y']),
4066        ];
4067        DiffMatchPatch::cleanup_merge(&mut diffs);
4068        assert_eq!(test, diffs);
4069
4070        // Slide edit left.
4071        let mut diffs = vec![Diff::equal(b"a"), Diff::insert(b"ba"), Diff::equal(b"c")];
4072        let test = vec![Diff::insert(b"ab"), Diff::equal(b"ac")];
4073        DiffMatchPatch::cleanup_merge(&mut diffs);
4074        assert_eq!(test, diffs);
4075
4076        let mut diffs = vec![
4077            Diff::equal(&['a']),
4078            Diff::insert(&['b', 'a']),
4079            Diff::equal(&['c']),
4080        ];
4081        let test = vec![Diff::insert(&['a', 'b']), Diff::equal(&['a', 'c'])];
4082        DiffMatchPatch::cleanup_merge(&mut diffs);
4083        assert_eq!(test, diffs);
4084
4085        // Slide edit right
4086        let mut diffs = vec![Diff::equal(b"c"), Diff::insert(b"ab"), Diff::equal(b"a")];
4087        let test = vec![Diff::equal(b"ca"), Diff::insert(b"ba")];
4088        DiffMatchPatch::cleanup_merge(&mut diffs);
4089        assert_eq!(test, diffs);
4090
4091        let mut diffs = vec![
4092            Diff::equal(&['c']),
4093            Diff::insert(&['a', 'b']),
4094            Diff::equal(&['a']),
4095        ];
4096        let test = vec![Diff::equal(&['c', 'a']), Diff::insert(&['b', 'a'])];
4097        DiffMatchPatch::cleanup_merge(&mut diffs);
4098        assert_eq!(test, diffs);
4099
4100        // Slide edit left recursive.
4101        let mut diffs = vec![
4102            Diff::equal(b"a"),
4103            Diff::delete(b"b"),
4104            Diff::equal(b"c"),
4105            Diff::delete(b"ac"),
4106            Diff::equal(b"x"),
4107        ];
4108        let test = vec![Diff::delete(b"abc"), Diff::equal(b"acx")];
4109        DiffMatchPatch::cleanup_merge(&mut diffs);
4110        assert_eq!(test, diffs);
4111        let mut diffs = vec![
4112            Diff::equal(&['a']),
4113            Diff::delete(&['b']),
4114            Diff::equal(&['c']),
4115            Diff::delete(&['a', 'c']),
4116            Diff::equal(&['x']),
4117        ];
4118        let test = vec![
4119            Diff::delete(&['a', 'b', 'c']),
4120            Diff::equal(&['a', 'c', 'x']),
4121        ];
4122        DiffMatchPatch::cleanup_merge(&mut diffs);
4123        assert_eq!(test, diffs);
4124
4125        // Slide edit right recursive.
4126        let mut diffs = vec![
4127            Diff::equal(b"x"),
4128            Diff::delete(b"ca"),
4129            Diff::equal(b"c"),
4130            Diff::delete(b"b"),
4131            Diff::equal(b"a"),
4132        ];
4133        let test = vec![Diff::equal(b"xca"), Diff::delete(b"cba")];
4134        DiffMatchPatch::cleanup_merge(&mut diffs);
4135        assert_eq!(test, diffs);
4136
4137        let mut diffs = vec![
4138            Diff::equal(&['x']),
4139            Diff::delete(&['c', 'a']),
4140            Diff::equal(&['c']),
4141            Diff::delete(&['b']),
4142            Diff::equal(&['a']),
4143        ];
4144        let test = vec![
4145            Diff::equal(&['x', 'c', 'a']),
4146            Diff::delete(&['c', 'b', 'a']),
4147        ];
4148        DiffMatchPatch::cleanup_merge(&mut diffs);
4149        assert_eq!(test, diffs);
4150
4151        // Empty merge.
4152        let mut diffs = vec![Diff::delete(b"b"), Diff::insert(b"ab"), Diff::equal(b"c")];
4153        let test = vec![Diff::insert(b"a"), Diff::equal(b"bc")];
4154        DiffMatchPatch::cleanup_merge(&mut diffs);
4155        assert_eq!(test, diffs);
4156
4157        let mut diffs = vec![
4158            Diff::delete(&['b']),
4159            Diff::insert(&['a', 'b']),
4160            Diff::equal(&['c']),
4161        ];
4162        let test = vec![Diff::insert(&['a']), Diff::equal(&['b', 'c'])];
4163        DiffMatchPatch::cleanup_merge(&mut diffs);
4164        assert_eq!(test, diffs);
4165
4166        // Empty equality.
4167        let mut diffs = vec![Diff::equal(b""), Diff::insert(b"a"), Diff::equal(b"b")];
4168        let test = vec![Diff::insert(b"a"), Diff::equal(b"b")];
4169        DiffMatchPatch::cleanup_merge(&mut diffs);
4170        assert_eq!(test, diffs);
4171
4172        let mut diffs = vec![Diff::equal(&[]), Diff::insert(&['a']), Diff::equal(&['b'])];
4173        let test = vec![Diff::insert(&['a']), Diff::equal(&['b'])];
4174        DiffMatchPatch::cleanup_merge(&mut diffs);
4175        assert_eq!(test, diffs);
4176    }
4177
4178    #[test]
4179    fn test_diff_cleanup_semantic_overall() {
4180        // Cleanup semantically trivial equalities.
4181        // Null case.
4182        let mut diffs = vec![];
4183        let test: Vec<Diff<u8>> = vec![];
4184        DiffMatchPatch::cleanup_semantic(&mut diffs);
4185        assert_eq!(test, diffs);
4186
4187        // No elimination #1.
4188        let mut diffs = vec![
4189            Diff::delete(b"ab"),
4190            Diff::insert(b"cd"),
4191            Diff::equal(b"12"),
4192            Diff::delete(b"e"),
4193        ];
4194        let test: Vec<Diff<u8>> = vec![
4195            Diff::delete(b"ab"),
4196            Diff::insert(b"cd"),
4197            Diff::equal(b"12"),
4198            Diff::delete(b"e"),
4199        ];
4200        DiffMatchPatch::cleanup_semantic(&mut diffs);
4201        assert_eq!(test, diffs);
4202
4203        let mut diffs = vec![
4204            Diff::delete(&['a', 'b']),
4205            Diff::insert(&['c', 'd']),
4206            Diff::equal(&['1', '2']),
4207            Diff::delete(&['e']),
4208        ];
4209        let test = vec![
4210            Diff::delete(&['a', 'b']),
4211            Diff::insert(&['c', 'd']),
4212            Diff::equal(&['1', '2']),
4213            Diff::delete(&['e']),
4214        ];
4215        DiffMatchPatch::cleanup_semantic(&mut diffs);
4216        assert_eq!(test, diffs);
4217
4218        // No elimination #2.
4219        let mut diffs = vec![
4220            Diff::delete(b"abc"),
4221            Diff::insert(b"ABC"),
4222            Diff::equal(b"1234"),
4223            Diff::delete(b"wxyz"),
4224        ];
4225        let test: Vec<Diff<u8>> = vec![
4226            Diff::delete(b"abc"),
4227            Diff::insert(b"ABC"),
4228            Diff::equal(b"1234"),
4229            Diff::delete(b"wxyz"),
4230        ];
4231        DiffMatchPatch::cleanup_semantic(&mut diffs);
4232        assert_eq!(test, diffs);
4233
4234        let mut diffs = vec![
4235            Diff::delete(&['a', 'b', 'c']),
4236            Diff::insert(&['A', 'B', 'C']),
4237            Diff::equal(&['1', '2', '3', '4']),
4238            Diff::delete(&['w', 'x', 'y', 'z']),
4239        ];
4240        let test = vec![
4241            Diff::delete(&['a', 'b', 'c']),
4242            Diff::insert(&['A', 'B', 'C']),
4243            Diff::equal(&['1', '2', '3', '4']),
4244            Diff::delete(&['w', 'x', 'y', 'z']),
4245        ];
4246        DiffMatchPatch::cleanup_semantic(&mut diffs);
4247        assert_eq!(test, diffs);
4248
4249        // Simple elimination.
4250        let mut diffs = vec![Diff::delete(b"a"), Diff::equal(b"b"), Diff::delete(b"c")];
4251        let test: Vec<Diff<u8>> = vec![Diff::delete(b"abc"), Diff::insert(b"b")];
4252        DiffMatchPatch::cleanup_semantic(&mut diffs);
4253        assert_eq!(test, diffs);
4254
4255        let mut diffs = vec![
4256            Diff::delete(&['a']),
4257            Diff::equal(&['b']),
4258            Diff::delete(&['c']),
4259        ];
4260        let test = vec![Diff::delete(&['a', 'b', 'c']), Diff::insert(&['b'])];
4261        DiffMatchPatch::cleanup_semantic(&mut diffs);
4262        assert_eq!(test, diffs);
4263
4264        // Backpass elimination.
4265        let mut diffs = vec![
4266            Diff::delete(b"ab"),
4267            Diff::equal(b"cd"),
4268            Diff::delete(b"e"),
4269            Diff::equal(b"f"),
4270            Diff::insert(b"g"),
4271        ];
4272        let test: Vec<Diff<u8>> = vec![Diff::delete(b"abcdef"), Diff::insert(b"cdfg")];
4273        DiffMatchPatch::cleanup_semantic(&mut diffs);
4274        assert_eq!(test, diffs);
4275
4276        let mut diffs = vec![
4277            Diff::delete(&['a', 'b']),
4278            Diff::equal(&['c', 'd']),
4279            Diff::delete(&['e']),
4280            Diff::equal(&['f']),
4281            Diff::insert(&['g']),
4282        ];
4283        let test = vec![
4284            Diff::delete(&['a', 'b', 'c', 'd', 'e', 'f']),
4285            Diff::insert(&['c', 'd', 'f', 'g']),
4286        ];
4287        DiffMatchPatch::cleanup_semantic(&mut diffs);
4288        assert_eq!(test, diffs);
4289
4290        // Multiple eliminations.
4291        let mut diffs = vec![
4292            Diff::insert(b"1"),
4293            Diff::equal(b"A"),
4294            Diff::delete(b"B"),
4295            Diff::insert(b"2"),
4296            Diff::equal(b"_"),
4297            Diff::insert(b"1"),
4298            Diff::equal(b"A"),
4299            Diff::delete(b"B"),
4300            Diff::insert(b"2"),
4301        ];
4302        let test: Vec<Diff<u8>> = vec![Diff::delete(b"AB_AB"), Diff::insert(b"1A2_1A2")];
4303        DiffMatchPatch::cleanup_semantic(&mut diffs);
4304        assert_eq!(test, diffs);
4305
4306        let mut diffs = vec![
4307            Diff::insert(&['1']),
4308            Diff::equal(&['A']),
4309            Diff::delete(&['B']),
4310            Diff::insert(&['2']),
4311            Diff::equal(&['_']),
4312            Diff::insert(&['1']),
4313            Diff::equal(&['A']),
4314            Diff::delete(&['B']),
4315            Diff::insert(&['2']),
4316        ];
4317        let test = vec![
4318            Diff::delete(&['A', 'B', '_', 'A', 'B']),
4319            Diff::insert(&['1', 'A', '2', '_', '1', 'A', '2']),
4320        ];
4321        DiffMatchPatch::cleanup_semantic(&mut diffs);
4322        assert_eq!(test, diffs);
4323
4324        // Word boundaries.
4325        let mut diffs = vec![
4326            Diff::equal(b"The c"),
4327            Diff::delete(b"ow and the c"),
4328            Diff::equal(b"at."),
4329        ];
4330        let test: Vec<Diff<u8>> = vec![
4331            Diff::equal(b"The "),
4332            Diff::delete(b"cow and the "),
4333            Diff::equal(b"cat."),
4334        ];
4335        DiffMatchPatch::cleanup_semantic(&mut diffs);
4336        assert_eq!(test, diffs);
4337
4338        let mut diffs = vec![
4339            Diff::equal(&"The c".chars().collect::<Vec<_>>()[..]),
4340            Diff::delete(&"ow and the c".chars().collect::<Vec<_>>()[..]),
4341            Diff::equal(&"at.".chars().collect::<Vec<_>>()[..]),
4342        ];
4343        let test = vec![
4344            Diff::equal(&"The ".chars().collect::<Vec<_>>()[..]),
4345            Diff::delete(&"cow and the ".chars().collect::<Vec<_>>()[..]),
4346            Diff::equal(&"cat.".chars().collect::<Vec<_>>()[..]),
4347        ];
4348        DiffMatchPatch::cleanup_semantic(&mut diffs);
4349        assert_eq!(test, diffs);
4350
4351        // No overlap elimination.
4352        let mut diffs = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")];
4353        let test = vec![Diff::delete(b"abcxx"), Diff::insert(b"xxdef")];
4354        DiffMatchPatch::cleanup_semantic(&mut diffs);
4355        assert_eq!(test, diffs);
4356
4357        let mut diffs = vec![
4358            Diff::delete(&"abcxx".chars().collect::<Vec<_>>()[..]),
4359            Diff::insert(&"xxdef".chars().collect::<Vec<_>>()[..]),
4360        ];
4361        let test = vec![
4362            Diff::delete(&"abcxx".chars().collect::<Vec<_>>()[..]),
4363            Diff::insert(&"xxdef".chars().collect::<Vec<_>>()[..]),
4364        ];
4365        DiffMatchPatch::cleanup_semantic(&mut diffs);
4366        assert_eq!(test, diffs);
4367
4368        // Overlap elimination.
4369        let mut diffs = vec![Diff::delete(b"abcxxx"), Diff::insert(b"xxxdef")];
4370        let test = vec![
4371            Diff::delete(b"abc"),
4372            Diff::equal(b"xxx"),
4373            Diff::insert(b"def"),
4374        ];
4375        DiffMatchPatch::cleanup_semantic(&mut diffs);
4376        assert_eq!(test, diffs);
4377
4378        let mut diffs = vec![
4379            Diff::delete(&"abcxxx".chars().collect::<Vec<_>>()[..]),
4380            Diff::insert(&"xxxdef".chars().collect::<Vec<_>>()[..]),
4381        ];
4382        let test = vec![
4383            Diff::delete(&"abc".chars().collect::<Vec<_>>()[..]),
4384            Diff::equal(&"xxx".chars().collect::<Vec<_>>()[..]),
4385            Diff::insert(&"def".chars().collect::<Vec<_>>()[..]),
4386        ];
4387        DiffMatchPatch::cleanup_semantic(&mut diffs);
4388        assert_eq!(test, diffs);
4389
4390        // Reverse overlap elimination.
4391        let mut diffs = vec![
4392            Diff::delete(&"xxxabc".chars().collect::<Vec<_>>()[..]),
4393            Diff::insert(&"defxxx".chars().collect::<Vec<_>>()[..]),
4394        ];
4395        let test = vec![
4396            Diff::insert(&"def".chars().collect::<Vec<_>>()[..]),
4397            Diff::equal(&"xxx".chars().collect::<Vec<_>>()[..]),
4398            Diff::delete(&"abc".chars().collect::<Vec<_>>()[..]),
4399        ];
4400        DiffMatchPatch::cleanup_semantic(&mut diffs);
4401        assert_eq!(test, diffs);
4402
4403        let mut diffs = vec![Diff::delete(b"xxxabc"), Diff::insert(b"defxxx")];
4404        let test = vec![
4405            Diff::insert(b"def"),
4406            Diff::equal(b"xxx"),
4407            Diff::delete(b"abc"),
4408        ];
4409        DiffMatchPatch::cleanup_semantic(&mut diffs);
4410        assert_eq!(test, diffs);
4411
4412        let mut diffs = vec![
4413            Diff::delete(&"xxxabc".chars().collect::<Vec<_>>()[..]),
4414            Diff::insert(&"defxxx".chars().collect::<Vec<_>>()[..]),
4415        ];
4416        let test = vec![
4417            Diff::insert(&"def".chars().collect::<Vec<_>>()[..]),
4418            Diff::equal(&"xxx".chars().collect::<Vec<_>>()[..]),
4419            Diff::delete(&"abc".chars().collect::<Vec<_>>()[..]),
4420        ];
4421        DiffMatchPatch::cleanup_semantic(&mut diffs);
4422        assert_eq!(test, diffs);
4423
4424        // Two overlap eliminations.
4425        let mut diffs = vec![
4426            Diff::delete(b"abcd1212"),
4427            Diff::insert(b"1212efghi"),
4428            Diff::equal(b"----"),
4429            Diff::delete(b"A3"),
4430            Diff::insert(b"3BC"),
4431        ];
4432        let test = vec![
4433            Diff::delete(b"abcd"),
4434            Diff::equal(b"1212"),
4435            Diff::insert(b"efghi"),
4436            Diff::equal(b"----"),
4437            Diff::delete(b"A"),
4438            Diff::equal(b"3"),
4439            Diff::insert(b"BC"),
4440        ];
4441        DiffMatchPatch::cleanup_semantic(&mut diffs);
4442        assert_eq!(test, diffs);
4443
4444        let mut diffs = vec![
4445            Diff::delete(&"abcd1212".chars().collect::<Vec<_>>()[..]),
4446            Diff::insert(&"1212efghi".chars().collect::<Vec<_>>()[..]),
4447            Diff::equal(&"----".chars().collect::<Vec<_>>()[..]),
4448            Diff::delete(&"A3".chars().collect::<Vec<_>>()[..]),
4449            Diff::insert(&"3BC".chars().collect::<Vec<_>>()[..]),
4450        ];
4451        let test = vec![
4452            Diff::delete(&"abcd".chars().collect::<Vec<_>>()[..]),
4453            Diff::equal(&"1212".chars().collect::<Vec<_>>()[..]),
4454            Diff::insert(&"efghi".chars().collect::<Vec<_>>()[..]),
4455            Diff::equal(&"----".chars().collect::<Vec<_>>()[..]),
4456            Diff::delete(&['A']),
4457            Diff::equal(&['3']),
4458            Diff::insert(&['B', 'C']),
4459        ];
4460        DiffMatchPatch::cleanup_semantic(&mut diffs);
4461        assert_eq!(test, diffs);
4462
4463        // overlap that fully eliminates a delete at the front of an insert
4464        let mut diffs = vec![
4465            Diff::delete(&"abcd".chars().collect::<Vec<_>>()[..]),
4466            Diff::insert(&"abcd1212".chars().collect::<Vec<_>>()[..]),
4467            Diff::equal(&"wxyz".chars().collect::<Vec<_>>()[..]),
4468        ];
4469        let test = vec![
4470            Diff::delete(&"".chars().collect::<Vec<_>>()[..]),
4471            Diff::equal(&"abcd".chars().collect::<Vec<_>>()[..]),
4472            Diff::insert(&"1212".chars().collect::<Vec<_>>()[..]),
4473            Diff::equal(&"wxyz".chars().collect::<Vec<_>>()[..]),
4474        ];
4475        DiffMatchPatch::cleanup_semantic(&mut diffs);
4476        assert_eq!(test, diffs);
4477
4478        // overlap that fully eliminates a delete at the tail of an insert
4479        let mut diffs = vec![
4480            Diff::delete(&"abcd".chars().collect::<Vec<_>>()[..]),
4481            Diff::insert(&"1212abcd".chars().collect::<Vec<_>>()[..]),
4482            Diff::delete(&"2323".chars().collect::<Vec<_>>()[..]),
4483        ];
4484        let test = vec![
4485            Diff::insert(&"1212".chars().collect::<Vec<_>>()[..]),
4486            Diff::equal(&"abcd".chars().collect::<Vec<_>>()[..]),
4487            Diff::delete(&"".chars().collect::<Vec<_>>()[..]),
4488            Diff::delete(&"2323".chars().collect::<Vec<_>>()[..]),
4489        ];
4490        DiffMatchPatch::cleanup_semantic(&mut diffs);
4491        assert_eq!(test, diffs);
4492
4493        // overlap that fully eliminates an insert
4494        let mut diffs = vec![
4495            Diff::delete(&"abcd1212".chars().collect::<Vec<_>>()[..]),
4496            Diff::insert(&"1212".chars().collect::<Vec<_>>()[..]),
4497            Diff::equal(&"wxyz".chars().collect::<Vec<_>>()[..]),
4498        ];
4499        let test = vec![
4500            Diff::delete(&"abcd".chars().collect::<Vec<_>>()[..]),
4501            Diff::equal(&"1212".chars().collect::<Vec<_>>()[..]),
4502            Diff::insert(&"".chars().collect::<Vec<_>>()[..]),
4503            Diff::equal(&"wxyz".chars().collect::<Vec<_>>()[..]),
4504        ];
4505        DiffMatchPatch::cleanup_semantic(&mut diffs);
4506        assert_eq!(test, diffs);
4507    }
4508
4509    #[test]
4510    fn test_diff_cleanup_semantic_lossless() {
4511        // Slide diffs to match logical boundaries.
4512        // Null case.
4513        let mut diffs: Vec<Diff<u8>> = vec![];
4514        let test: Vec<Diff<u8>> = vec![];
4515        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4516        assert_eq!(test, diffs);
4517
4518        let mut diffs: Vec<Diff<char>> = vec![];
4519        let test: Vec<Diff<char>> = vec![];
4520        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4521        assert_eq!(test, diffs);
4522
4523        // Blank lines.
4524        let mut diffs: Vec<Diff<u8>> = vec![
4525            Diff::equal(b"AAA\r\n\r\nBBB"),
4526            Diff::insert(b"\r\nDDD\r\n\r\nBBB"),
4527            Diff::equal(b"\r\nEEE"),
4528        ];
4529        let test: Vec<Diff<u8>> = vec![
4530            Diff::equal(b"AAA\r\n\r\n"),
4531            Diff::insert(b"BBB\r\nDDD\r\n\r\n"),
4532            Diff::equal(b"BBB\r\nEEE"),
4533        ];
4534        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4535        assert_eq!(test, diffs);
4536
4537        let mut diffs = vec![
4538            Diff::equal(&"AAA\r\n\r\nBBB".chars().collect::<Vec<_>>()[..]),
4539            Diff::insert(&"\r\nDDD\r\n\r\nBBB".chars().collect::<Vec<_>>()[..]),
4540            Diff::equal(&"\r\nEEE".chars().collect::<Vec<_>>()[..]),
4541        ];
4542        let test = vec![
4543            Diff::equal(&"AAA\r\n\r\n".chars().collect::<Vec<_>>()[..]),
4544            Diff::insert(&"BBB\r\nDDD\r\n\r\n".chars().collect::<Vec<_>>()[..]),
4545            Diff::equal(&"BBB\r\nEEE".chars().collect::<Vec<_>>()[..]),
4546        ];
4547        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4548        assert_eq!(test, diffs);
4549
4550        // Line boundaries.
4551        let mut diffs = vec![
4552            Diff::equal(&"AAA\r\nBBB".chars().collect::<Vec<_>>()[..]),
4553            Diff::insert(&" DDD\r\nBBB".chars().collect::<Vec<_>>()[..]),
4554            Diff::equal(&" EEE".chars().collect::<Vec<_>>()[..]),
4555        ];
4556        let test = vec![
4557            Diff::equal(&"AAA\r\n".chars().collect::<Vec<_>>()[..]),
4558            Diff::insert(&"BBB DDD\r\n".chars().collect::<Vec<_>>()[..]),
4559            Diff::equal(&"BBB EEE".chars().collect::<Vec<_>>()[..]),
4560        ];
4561        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4562        assert_eq!(test, diffs);
4563
4564        let mut diffs = vec![
4565            Diff::equal(b"AAA\r\nBBB"),
4566            Diff::insert(b" DDD\r\nBBB"),
4567            Diff::equal(b" EEE"),
4568        ];
4569        let test = vec![
4570            Diff::equal(b"AAA\r\n"),
4571            Diff::insert(b"BBB DDD\r\n"),
4572            Diff::equal(b"BBB EEE"),
4573        ];
4574        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4575        assert_eq!(test, diffs);
4576
4577        let mut diffs = vec![
4578            Diff::equal(&"AAA\r\nBBB".chars().collect::<Vec<_>>()[..]),
4579            Diff::insert(&" DDD\r\nBBB".chars().collect::<Vec<_>>()[..]),
4580            Diff::equal(&" EEE".chars().collect::<Vec<_>>()[..]),
4581        ];
4582        let test = vec![
4583            Diff::equal(&"AAA\r\n".chars().collect::<Vec<_>>()[..]),
4584            Diff::insert(&"BBB DDD\r\n".chars().collect::<Vec<_>>()[..]),
4585            Diff::equal(&"BBB EEE".chars().collect::<Vec<_>>()[..]),
4586        ];
4587        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4588        assert_eq!(test, diffs);
4589
4590        // Word boundaries.
4591        let mut diffs = vec![
4592            Diff::equal(b"The c"),
4593            Diff::insert(b"ow and the c"),
4594            Diff::equal(b"at."),
4595        ];
4596        let test = vec![
4597            Diff::equal(b"The "),
4598            Diff::insert(b"cow and the "),
4599            Diff::equal(b"cat."),
4600        ];
4601        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4602        assert_eq!(test, diffs);
4603
4604        let mut diffs = vec![
4605            Diff::equal(&"The c".chars().collect::<Vec<_>>()[..]),
4606            Diff::insert(&"ow and the c".chars().collect::<Vec<_>>()[..]),
4607            Diff::equal(&"at.".chars().collect::<Vec<_>>()[..]),
4608        ];
4609        let test = vec![
4610            Diff::equal(&"The ".chars().collect::<Vec<_>>()[..]),
4611            Diff::insert(&"cow and the ".chars().collect::<Vec<_>>()[..]),
4612            Diff::equal(&"cat.".chars().collect::<Vec<_>>()[..]),
4613        ];
4614        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4615        assert_eq!(test, diffs);
4616
4617        // Alphanumeric boundaries.
4618        let mut diffs = vec![
4619            Diff::equal(b"The-c"),
4620            Diff::insert(b"ow-and-the-c"),
4621            Diff::equal(b"at."),
4622        ];
4623        let test = vec![
4624            Diff::equal(b"The-"),
4625            Diff::insert(b"cow-and-the-"),
4626            Diff::equal(b"cat."),
4627        ];
4628        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4629        assert_eq!(test, diffs);
4630
4631        let mut diffs = vec![
4632            Diff::equal(&"The-c".chars().collect::<Vec<_>>()[..]),
4633            Diff::insert(&"ow-and-the-c".chars().collect::<Vec<_>>()[..]),
4634            Diff::equal(&"at.".chars().collect::<Vec<_>>()[..]),
4635        ];
4636        let test = vec![
4637            Diff::equal(&"The-".chars().collect::<Vec<_>>()[..]),
4638            Diff::insert(&"cow-and-the-".chars().collect::<Vec<_>>()[..]),
4639            Diff::equal(&"cat.".chars().collect::<Vec<_>>()[..]),
4640        ];
4641        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4642        assert_eq!(test, diffs);
4643
4644        // Hitting the start.
4645        let mut diffs = vec![Diff::equal(b"a"), Diff::delete(b"a"), Diff::equal(b"ax")];
4646        let test = vec![Diff::delete(b"a"), Diff::equal(b"aax")];
4647        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4648        assert_eq!(test, diffs);
4649
4650        let mut diffs = vec![
4651            Diff::equal(&['a']),
4652            Diff::delete(&['a']),
4653            Diff::equal(&['a', 'x']),
4654        ];
4655        let test = vec![Diff::delete(&['a']), Diff::equal(&['a', 'a', 'x'])];
4656        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4657        assert_eq!(test, diffs);
4658
4659        // Hitting the end.
4660        let mut diffs = vec![Diff::equal(b"xa"), Diff::delete(b"a"), Diff::equal(b"a")];
4661        let test = vec![Diff::equal(b"xaa"), Diff::delete(b"a")];
4662        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4663        assert_eq!(test, diffs);
4664
4665        let mut diffs = vec![
4666            Diff::equal(&['x', 'a']),
4667            Diff::delete(&['a']),
4668            Diff::equal(&['a']),
4669        ];
4670        let test = vec![Diff::equal(&['x', 'a', 'a']), Diff::delete(&['a'])];
4671        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4672        assert_eq!(test, diffs);
4673
4674        // Sentence boundaries.
4675        let mut diffs = vec![
4676            Diff::equal(b"The xxx. The "),
4677            Diff::insert(b"zzz. The "),
4678            Diff::equal(b"yyy."),
4679        ];
4680        let test = vec![
4681            Diff::equal(b"The xxx."),
4682            Diff::insert(b" The zzz."),
4683            Diff::equal(b" The yyy."),
4684        ];
4685        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4686        assert_eq!(test, diffs);
4687
4688        let mut diffs = vec![
4689            Diff::equal(&"The xxx. The ".chars().collect::<Vec<_>>()[..]),
4690            Diff::insert(&"zzz. The ".chars().collect::<Vec<_>>()[..]),
4691            Diff::equal(&"yyy.".chars().collect::<Vec<_>>()[..]),
4692        ];
4693        let test = vec![
4694            Diff::equal(&"The xxx.".chars().collect::<Vec<_>>()[..]),
4695            Diff::insert(&" The zzz.".chars().collect::<Vec<_>>()[..]),
4696            Diff::equal(&" The yyy.".chars().collect::<Vec<_>>()[..]),
4697        ];
4698        DiffMatchPatch::cleanup_semantic_lossless(&mut diffs);
4699        assert_eq!(test, diffs);
4700    }
4701
4702    #[test]
4703    fn test_diff_cleanup_effitiency() {
4704        // testDiffCleanupEfficiency
4705        // Cleanup operationally trivial equalities.
4706        let mut dmp = DiffMatchPatch {
4707            edit_cost: 4,
4708            ..Default::default()
4709        };
4710
4711        // Nullcase
4712        let mut diffs = vec![];
4713        dmp.cleanup_efficiency::<Efficient>(&mut diffs);
4714        assert!(diffs.is_empty());
4715
4716        let mut diffs = vec![];
4717        dmp.cleanup_efficiency::<Compat>(&mut diffs);
4718        assert!(diffs.is_empty());
4719
4720        // No eliminations
4721        let mut diffs = vec![
4722            Diff::delete(b"ab"),
4723            Diff::insert(b"12"),
4724            Diff::equal(b"wxyz"),
4725            Diff::delete(b"cd"),
4726            Diff::insert(b"34"),
4727        ];
4728
4729        dmp.cleanup_efficiency(&mut diffs);
4730        assert_eq!(
4731            vec![
4732                Diff::delete(b"ab"),
4733                Diff::insert(b"12"),
4734                Diff::equal(b"wxyz"),
4735                Diff::delete(b"cd"),
4736                Diff::insert(b"34")
4737            ],
4738            diffs
4739        );
4740
4741        let mut diffs = vec![
4742            Diff::delete(&['a', 'b']),
4743            Diff::insert(&['1', '2']),
4744            Diff::equal(&['w', 'x', 'y', 'z']),
4745            Diff::delete(&['c', 'd']),
4746            Diff::insert(&['3', '4']),
4747        ];
4748
4749        dmp.cleanup_efficiency(&mut diffs);
4750        assert_eq!(
4751            vec![
4752                Diff::delete(&['a', 'b']),
4753                Diff::insert(&['1', '2']),
4754                Diff::equal(&['w', 'x', 'y', 'z']),
4755                Diff::delete(&['c', 'd']),
4756                Diff::insert(&['3', '4'])
4757            ],
4758            diffs
4759        );
4760
4761        let mut diffs = vec![
4762            Diff::delete("😀".as_bytes()),
4763            Diff::insert(b"12"),
4764            Diff::equal(b"wxyz"),
4765            Diff::delete("❤️‍🔥".as_bytes()),
4766            Diff::insert(b"34"),
4767        ];
4768
4769        dmp.cleanup_efficiency(&mut diffs);
4770        assert_eq!(
4771            vec![
4772                Diff::delete("😀".as_bytes()),
4773                Diff::insert(b"12"),
4774                Diff::equal(b"wxyz"),
4775                Diff::delete("❤️‍🔥".as_bytes()),
4776                Diff::insert(b"34")
4777            ],
4778            diffs
4779        );
4780
4781        let mut diffs = vec![
4782            Diff::delete(&['😀']),
4783            Diff::insert(&['1', '2']),
4784            Diff::equal(&['w', 'x', 'y', 'z']),
4785            Diff::delete(&"❤️‍🔥".chars().collect::<Vec<_>>()[..]),
4786            Diff::insert(&['3', '4']),
4787        ];
4788
4789        dmp.cleanup_efficiency(&mut diffs);
4790        assert_eq!(
4791            vec![
4792                Diff::delete(&['😀']),
4793                Diff::insert(&['1', '2']),
4794                Diff::equal(&['w', 'x', 'y', 'z']),
4795                Diff::delete(&"❤️‍🔥".chars().collect::<Vec<_>>()[..]),
4796                Diff::insert(&['3', '4'])
4797            ],
4798            diffs
4799        );
4800
4801        // Four edit eliminations
4802        let mut diffs = vec![
4803            Diff::delete("😀".as_bytes()),
4804            Diff::insert(b"12"),
4805            Diff::equal(b"xyz"),
4806            Diff::delete("❤️‍🔥".as_bytes()),
4807            Diff::insert(b"34"),
4808        ];
4809
4810        dmp.cleanup_efficiency(&mut diffs);
4811        assert_eq!(
4812            vec![Diff::delete("😀xyz❤️‍🔥".as_bytes()), Diff::insert(b"12xyz34"),],
4813            diffs
4814        );
4815
4816        let mut diffs = vec![
4817            Diff::delete(&['😀']),
4818            Diff::insert(&['1', '2']),
4819            Diff::equal(&['x', 'y', 'z']),
4820            Diff::delete(&"❤️‍🔥".chars().collect::<Vec<_>>()),
4821            Diff::insert(&['3', '4']),
4822        ];
4823
4824        dmp.cleanup_efficiency(&mut diffs);
4825        assert_eq!(
4826            vec![
4827                Diff::delete(&"😀xyz❤️‍🔥".chars().collect::<Vec<_>>()),
4828                Diff::insert(&['1', '2', 'x', 'y', 'z', '3', '4']),
4829            ],
4830            diffs
4831        );
4832
4833        let mut diffs = vec![
4834            Diff::delete("ab".as_bytes()),
4835            Diff::insert(b"12"),
4836            Diff::equal(b"xyz"),
4837            Diff::delete("cd".as_bytes()),
4838            Diff::insert(b"34"),
4839        ];
4840
4841        dmp.cleanup_efficiency(&mut diffs);
4842        assert_eq!(
4843            vec![Diff::delete("abxyzcd".as_bytes()), Diff::insert(b"12xyz34"),],
4844            diffs
4845        );
4846
4847        let mut diffs = vec![
4848            Diff::delete(&['a', 'b']),
4849            Diff::insert(&['1', '2']),
4850            Diff::equal(&['x', 'y', 'z']),
4851            Diff::delete(&['c', 'd']),
4852            Diff::insert(&['3', '4']),
4853        ];
4854
4855        dmp.cleanup_efficiency(&mut diffs);
4856        assert_eq!(
4857            vec![
4858                Diff::delete(&"abxyzcd".chars().collect::<Vec<_>>()),
4859                Diff::insert(&['1', '2', 'x', 'y', 'z', '3', '4']),
4860            ],
4861            diffs
4862        );
4863
4864        // 3 edit eliminations
4865        let mut diffs = vec![
4866            Diff::insert("😀".as_bytes()),
4867            Diff::equal(b"x"),
4868            Diff::delete("❤️‍🔥".as_bytes()),
4869            Diff::insert(b"34"),
4870        ];
4871
4872        dmp.cleanup_efficiency(&mut diffs);
4873        assert_eq!(
4874            vec![
4875                Diff::delete("x❤️‍🔥".as_bytes()),
4876                Diff::insert("😀x34".as_bytes()),
4877            ],
4878            diffs
4879        );
4880
4881        let mut diffs = vec![
4882            Diff::insert(&['😀']),
4883            Diff::equal(&['x']),
4884            Diff::delete(&"❤️‍🔥".chars().collect::<Vec<_>>()),
4885            Diff::insert(&['3', '4']),
4886        ];
4887
4888        dmp.cleanup_efficiency(&mut diffs);
4889        assert_eq!(
4890            vec![
4891                Diff::delete(&"x❤️‍🔥".chars().collect::<Vec<_>>()),
4892                Diff::insert(&['😀', 'x', '3', '4']),
4893            ],
4894            diffs
4895        );
4896
4897        let mut diffs = vec![
4898            Diff::insert(b"12"),
4899            Diff::equal(b"x"),
4900            Diff::delete("cd".as_bytes()),
4901            Diff::insert(b"34"),
4902        ];
4903
4904        dmp.cleanup_efficiency(&mut diffs);
4905        assert_eq!(
4906            vec![Diff::delete("xcd".as_bytes()), Diff::insert(b"12x34"),],
4907            diffs
4908        );
4909
4910        let mut diffs = vec![
4911            Diff::insert(&['1', '2']),
4912            Diff::equal(&['x']),
4913            Diff::delete(&['c', 'd']),
4914            Diff::insert(&['3', '4']),
4915        ];
4916
4917        dmp.cleanup_efficiency(&mut diffs);
4918        assert_eq!(
4919            vec![
4920                Diff::delete(&"xcd".chars().collect::<Vec<_>>()),
4921                Diff::insert(&['1', '2', 'x', '3', '4']),
4922            ],
4923            diffs
4924        );
4925
4926        // backpass eliminations
4927        let mut diffs = vec![
4928            Diff::delete(b"ab"),
4929            Diff::insert(b"12"),
4930            Diff::equal(b"xy"),
4931            Diff::insert(b"34"),
4932            Diff::equal(b"z"),
4933            Diff::delete(b"cd"),
4934            Diff::insert(b"56"),
4935        ];
4936
4937        dmp.cleanup_efficiency(&mut diffs);
4938        assert_eq!(
4939            vec![
4940                Diff::delete("abxyzcd".as_bytes()),
4941                Diff::insert(b"12xy34z56"),
4942            ],
4943            diffs
4944        );
4945
4946        let mut diffs = vec![
4947            Diff::delete(&['a', 'b']),
4948            Diff::insert(&['1', '2']),
4949            Diff::equal(&['x', 'y']),
4950            Diff::insert(&['3', '4']),
4951            Diff::equal(&['z']),
4952            Diff::delete(&['c', 'd']),
4953            Diff::insert(&['5', '6']),
4954        ];
4955
4956        dmp.cleanup_efficiency(&mut diffs);
4957        assert_eq!(
4958            vec![
4959                Diff::delete(&"abxyzcd".chars().collect::<Vec<_>>()[..]),
4960                Diff::insert(&"12xy34z56".chars().collect::<Vec<_>>()[..]),
4961            ],
4962            diffs
4963        );
4964
4965        let mut diffs = vec![
4966            Diff::delete("🩵".as_bytes()),
4967            Diff::insert(b"12"),
4968            Diff::equal(b"xy"),
4969            Diff::insert("💨".as_bytes()),
4970            Diff::equal(b"z"),
4971            Diff::delete(b"cd"),
4972            Diff::insert(b"56"),
4973        ];
4974
4975        dmp.cleanup_efficiency(&mut diffs);
4976        assert_eq!(
4977            vec![
4978                Diff::delete("🩵xyzcd".as_bytes()),
4979                Diff::insert("12xy💨z56".as_bytes()),
4980            ],
4981            diffs
4982        );
4983
4984        let mut diffs = vec![
4985            Diff::delete(&"🩵".chars().collect::<Vec<_>>()[..]),
4986            Diff::insert(&['1', '2']),
4987            Diff::equal(&['x', 'y']),
4988            Diff::insert(&"💨".chars().collect::<Vec<_>>()[..]),
4989            Diff::equal(&['z']),
4990            Diff::delete(&['c', 'd']),
4991            Diff::insert(&['5', '6']),
4992        ];
4993
4994        dmp.cleanup_efficiency(&mut diffs);
4995        assert_eq!(
4996            vec![
4997                Diff::delete(&"🩵xyzcd".chars().collect::<Vec<_>>()[..]),
4998                Diff::insert(&"12xy💨z56".chars().collect::<Vec<_>>()[..]),
4999            ],
5000            diffs
5001        );
5002
5003        // High cost estimation
5004        dmp.set_edit_cost(5);
5005        let mut diffs = vec![
5006            Diff::delete(b"ab"),
5007            Diff::insert(b"12"),
5008            Diff::equal(b"wxyz"),
5009            Diff::delete(b"cd"),
5010            Diff::insert(b"34"),
5011        ];
5012
5013        dmp.cleanup_efficiency(&mut diffs);
5014        assert_eq!(
5015            vec![
5016                Diff::delete("abwxyzcd".as_bytes()),
5017                Diff::insert(b"12wxyz34"),
5018            ],
5019            diffs
5020        );
5021    }
5022
5023    #[test]
5024    fn test_diff_x_index() {
5025        // Translate a location in text1 to text2.
5026        let diffs = vec![
5027            Diff::delete(b"a"),
5028            Diff::insert(b"1234"),
5029            Diff::equal(b"xyz"),
5030        ];
5031        assert_eq!(5, DiffMatchPatch::x_index(&diffs, 2));
5032
5033        let diffs = vec![
5034            Diff::equal(b"a"),
5035            Diff::delete(b"1234"),
5036            Diff::equal(b"xyz"),
5037        ];
5038        assert_eq!(1, DiffMatchPatch::x_index(&diffs, 3));
5039
5040        let diffs = vec![
5041            Diff::delete(&['a']),
5042            Diff::insert(&['1', '2', '3', '4']),
5043            Diff::equal(&['x', 'y', 'z']),
5044        ];
5045        assert_eq!(5, DiffMatchPatch::x_index(&diffs, 2));
5046
5047        let diffs = vec![
5048            Diff::equal(&['a']),
5049            Diff::delete(&['1', '2', '3', '4']),
5050            Diff::equal(&['x', 'y', 'z']),
5051        ];
5052        assert_eq!(1, DiffMatchPatch::x_index(&diffs, 3));
5053    }
5054
5055    #[test]
5056    fn test_diff_common_overlap() {
5057        // Detect any suffix/prefix overlap.
5058        // Null case
5059        assert_eq!(0, DiffMatchPatch::common_overlap(b"", b"abcd"));
5060        assert_eq!(
5061            0,
5062            DiffMatchPatch::common_overlap(&[], &"abcd".chars().collect::<Vec<_>>()[..])
5063        );
5064
5065        // Whole case.
5066        assert_eq!(3, DiffMatchPatch::common_overlap(b"abc", b"abcd"));
5067        assert_eq!(
5068            3,
5069            DiffMatchPatch::common_overlap(
5070                &"abc".chars().collect::<Vec<_>>()[..],
5071                &"abcd".chars().collect::<Vec<_>>()[..]
5072            )
5073        );
5074
5075        // No overlap.
5076        assert_eq!(0, DiffMatchPatch::common_overlap(b"123456", b"abcd"));
5077        assert_eq!(
5078            0,
5079            DiffMatchPatch::common_overlap(
5080                &"123456".chars().collect::<Vec<_>>()[..],
5081                &"abcd".chars().collect::<Vec<_>>()[..]
5082            )
5083        );
5084
5085        // Overlap.
5086        assert_eq!(3, DiffMatchPatch::common_overlap(b"123456xxx", b"xxxabcd"));
5087        assert_eq!(
5088            3,
5089            DiffMatchPatch::common_overlap(
5090                &"123456xxx".chars().collect::<Vec<_>>()[..],
5091                &"xxxabcd".chars().collect::<Vec<_>>()[..]
5092            )
5093        );
5094
5095        // Unicode.
5096        // Some overly clever languages (C#) may treat ligatures as equal to their
5097        // component letters.  E.g. U+FB01 == 'fi'
5098        assert_eq!(
5099            0,
5100            DiffMatchPatch::common_overlap(b"fi", "\u{7FFF}".as_bytes())
5101        );
5102        assert_eq!(
5103            0,
5104            DiffMatchPatch::common_overlap(
5105                &['f', 'i'],
5106                &"\u{7FFF}".chars().collect::<Vec<_>>()[..]
5107            )
5108        );
5109
5110        // Complete overlap
5111        assert_eq!(
5112            6,
5113            DiffMatchPatch::common_overlap("❤️".as_bytes(), "❤️".as_bytes())
5114        );
5115        assert_eq!(
5116            2,
5117            DiffMatchPatch::common_overlap(
5118                &"❤️".chars().collect::<Vec<_>>()[..],
5119                &"❤️".chars().collect::<Vec<_>>()[..]
5120            )
5121        );
5122
5123        // Partial unicode overlap
5124        assert_eq!(
5125            0,
5126            DiffMatchPatch::common_overlap("ஆ".as_bytes(), "இ".as_bytes())
5127        );
5128        assert_eq!(
5129            0,
5130            DiffMatchPatch::common_overlap(
5131                &"ஆ".chars().collect::<Vec<_>>()[..],
5132                &"இ".chars().collect::<Vec<_>>()[..]
5133            )
5134        );
5135
5136        assert_eq!(
5137            3,
5138            DiffMatchPatch::common_overlap("ஆஇ".as_bytes(), "இ".as_bytes())
5139        );
5140        assert_eq!(
5141            1,
5142            DiffMatchPatch::common_overlap(
5143                &"ஆஇ".chars().collect::<Vec<_>>()[..],
5144                &"இ".chars().collect::<Vec<_>>()[..]
5145            )
5146        );
5147    }
5148
5149    #[test]
5150    fn test_diff_half_match() {
5151        let mut dmp = DiffMatchPatch::default();
5152
5153        // No match
5154        assert!(dmp
5155            .half_match("1234567890".as_bytes(), "abcdef".as_bytes())
5156            .is_none());
5157        assert!(dmp
5158            .half_match(
5159                &"1234567890".chars().collect::<Vec<_>>()[..],
5160                &"abcdef".chars().collect::<Vec<_>>()[..]
5161            )
5162            .is_none());
5163        assert!(dmp
5164            .half_match("12345".as_bytes(), "23".as_bytes())
5165            .is_none());
5166        assert!(dmp
5167            .half_match(
5168                &"12345".chars().collect::<Vec<_>>()[..],
5169                &"23".chars().collect::<Vec<_>>()[..]
5170            )
5171            .is_none());
5172
5173        // Single Match.
5174        assert_eq!(
5175            Some(HalfMatch {
5176                prefix_long: "12".as_bytes(),
5177                suffix_long: "90".as_bytes(),
5178                prefix_short: "a".as_bytes(),
5179                suffix_short: "z".as_bytes(),
5180                common: "345678".as_bytes()
5181            }),
5182            dmp.half_match("1234567890".as_bytes(), "a345678z".as_bytes())
5183        );
5184        assert_eq!(
5185            Some(HalfMatch {
5186                prefix_long: &['1', '2'],
5187                suffix_long: &['9', '0'],
5188                prefix_short: &['a'],
5189                suffix_short: &['z'],
5190                common: &['3', '4', '5', '6', '7', '8']
5191            }),
5192            dmp.half_match(
5193                &"1234567890".chars().collect::<Vec<_>>()[..],
5194                &"a345678z".chars().collect::<Vec<_>>()[..]
5195            )
5196        );
5197        assert_eq!(
5198            Some(HalfMatch {
5199                prefix_long: "a".as_bytes(),
5200                suffix_long: "z".as_bytes(),
5201                prefix_short: "12".as_bytes(),
5202                suffix_short: "90".as_bytes(),
5203                common: "345678".as_bytes()
5204            }),
5205            dmp.half_match("a345678z".as_bytes(), "1234567890".as_bytes())
5206        );
5207        assert_eq!(
5208            Some(HalfMatch {
5209                prefix_long: &['a'],
5210                suffix_long: &['z'],
5211                prefix_short: &['1', '2'],
5212                suffix_short: &['9', '0'],
5213                common: &"345678".chars().collect::<Vec<_>>()[..]
5214            }),
5215            dmp.half_match(
5216                &"a345678z".chars().collect::<Vec<_>>()[..],
5217                &"1234567890".chars().collect::<Vec<_>>()[..]
5218            )
5219        );
5220        assert_eq!(
5221            Some(HalfMatch {
5222                prefix_long: "abc".as_bytes(),
5223                suffix_long: "z".as_bytes(),
5224                prefix_short: "1234".as_bytes(),
5225                suffix_short: "0".as_bytes(),
5226                common: "56789".as_bytes()
5227            }),
5228            dmp.half_match("abc56789z".as_bytes(), "1234567890".as_bytes())
5229        );
5230        assert_eq!(
5231            Some(HalfMatch {
5232                prefix_long: &"abc".chars().collect::<Vec<_>>()[..],
5233                suffix_long: &"z".chars().collect::<Vec<_>>()[..],
5234                prefix_short: &"1234".chars().collect::<Vec<_>>()[..],
5235                suffix_short: &"0".chars().collect::<Vec<_>>()[..],
5236                common: &"56789".chars().collect::<Vec<_>>()[..]
5237            }),
5238            dmp.half_match(
5239                &"abc56789z".chars().collect::<Vec<_>>()[..],
5240                &"1234567890".chars().collect::<Vec<_>>()[..]
5241            )
5242        );
5243        assert_eq!(
5244            Some(HalfMatch {
5245                prefix_long: "a".as_bytes(),
5246                suffix_long: "xyz".as_bytes(),
5247                prefix_short: "1".as_bytes(),
5248                suffix_short: "7890".as_bytes(),
5249                common: "23456".as_bytes()
5250            }),
5251            dmp.half_match("a23456xyz".as_bytes(), "1234567890".as_bytes())
5252        );
5253        assert_eq!(
5254            Some(HalfMatch {
5255                prefix_long: &"a".chars().collect::<Vec<_>>()[..],
5256                suffix_long: &"xyz".chars().collect::<Vec<_>>()[..],
5257                prefix_short: &"1".chars().collect::<Vec<_>>()[..],
5258                suffix_short: &"7890".chars().collect::<Vec<_>>()[..],
5259                common: &"23456".chars().collect::<Vec<_>>()[..]
5260            }),
5261            dmp.half_match(
5262                &"a23456xyz".chars().collect::<Vec<_>>()[..],
5263                &"1234567890".chars().collect::<Vec<_>>()[..]
5264            )
5265        );
5266
5267        // Multiple Matches.
5268        assert_eq!(
5269            Some(HalfMatch {
5270                prefix_long: "12123".as_bytes(),
5271                suffix_long: "123121".as_bytes(),
5272                prefix_short: "a".as_bytes(),
5273                suffix_short: "z".as_bytes(),
5274                common: "1234123451234".as_bytes()
5275            }),
5276            dmp.half_match(
5277                "121231234123451234123121".as_bytes(),
5278                "a1234123451234z".as_bytes()
5279            )
5280        );
5281        assert_eq!(
5282            Some(HalfMatch {
5283                prefix_long: &"12123".chars().collect::<Vec<_>>()[..],
5284                suffix_long: &"123121".chars().collect::<Vec<_>>()[..],
5285                prefix_short: &"a".chars().collect::<Vec<_>>()[..],
5286                suffix_short: &"z".chars().collect::<Vec<_>>()[..],
5287                common: &"1234123451234".chars().collect::<Vec<_>>()[..]
5288            }),
5289            dmp.half_match(
5290                &"121231234123451234123121".chars().collect::<Vec<_>>()[..],
5291                &"a1234123451234z".chars().collect::<Vec<_>>()[..]
5292            )
5293        );
5294        assert_eq!(
5295            Some(HalfMatch {
5296                prefix_long: "".as_bytes(),
5297                suffix_long: "-=-=-=-=-=".as_bytes(),
5298                prefix_short: "x".as_bytes(),
5299                suffix_short: "".as_bytes(),
5300                common: "x-=-=-=-=-=-=-=".as_bytes()
5301            }),
5302            dmp.half_match(
5303                "x-=-=-=-=-=-=-=-=-=-=-=-=".as_bytes(),
5304                "xx-=-=-=-=-=-=-=".as_bytes()
5305            )
5306        );
5307        assert_eq!(
5308            Some(HalfMatch {
5309                prefix_long: &"".chars().collect::<Vec<_>>()[..],
5310                suffix_long: &"-=-=-=-=-=".chars().collect::<Vec<_>>()[..],
5311                prefix_short: &"x".chars().collect::<Vec<_>>()[..],
5312                suffix_short: &[],
5313                common: &"x-=-=-=-=-=-=-=".chars().collect::<Vec<_>>()[..]
5314            }),
5315            dmp.half_match(
5316                &"x-=-=-=-=-=-=-=-=-=-=-=-=".chars().collect::<Vec<_>>()[..],
5317                &"xx-=-=-=-=-=-=-=".chars().collect::<Vec<_>>()[..]
5318            )
5319        );
5320        assert_eq!(
5321            Some(HalfMatch {
5322                prefix_long: "-=-=-=-=-=".as_bytes(),
5323                suffix_long: "".as_bytes(),
5324                prefix_short: "".as_bytes(),
5325                suffix_short: "y".as_bytes(),
5326                common: "-=-=-=-=-=-=-=y".as_bytes()
5327            }),
5328            dmp.half_match(
5329                "-=-=-=-=-=-=-=-=-=-=-=-=y".as_bytes(),
5330                "-=-=-=-=-=-=-=yy".as_bytes()
5331            )
5332        );
5333        assert_eq!(
5334            Some(HalfMatch {
5335                prefix_long: &"-=-=-=-=-=".chars().collect::<Vec<_>>()[..],
5336                suffix_long: &[],
5337                prefix_short: &[],
5338                suffix_short: &"y".chars().collect::<Vec<_>>()[..],
5339                common: &"-=-=-=-=-=-=-=y".chars().collect::<Vec<_>>()[..]
5340            }),
5341            dmp.half_match(
5342                &"-=-=-=-=-=-=-=-=-=-=-=-=y".chars().collect::<Vec<_>>()[..],
5343                &"-=-=-=-=-=-=-=yy".chars().collect::<Vec<_>>()[..]
5344            )
5345        );
5346
5347        // Non-optimal halfmatch.
5348        // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
5349        assert_eq!(
5350            Some(HalfMatch {
5351                prefix_long: "qHillo".as_bytes(),
5352                suffix_long: "w".as_bytes(),
5353                prefix_short: "x".as_bytes(),
5354                suffix_short: "Hulloy".as_bytes(),
5355                common: "HelloHe".as_bytes()
5356            }),
5357            dmp.half_match("qHilloHelloHew".as_bytes(), "xHelloHeHulloy".as_bytes())
5358        );
5359        assert_eq!(
5360            Some(HalfMatch {
5361                prefix_long: &"qHillo".chars().collect::<Vec<_>>()[..],
5362                suffix_long: &"w".chars().collect::<Vec<_>>()[..],
5363                prefix_short: &"x".chars().collect::<Vec<_>>()[..],
5364                suffix_short: &"Hulloy".chars().collect::<Vec<_>>()[..],
5365                common: &"HelloHe".chars().collect::<Vec<_>>()[..]
5366            }),
5367            dmp.half_match(
5368                &"qHilloHelloHew".chars().collect::<Vec<_>>()[..],
5369                &"xHelloHeHulloy".chars().collect::<Vec<_>>()[..]
5370            )
5371        );
5372
5373        // Optimal no halfmatch.
5374        dmp.set_timeout(None);
5375        assert!(dmp
5376            .half_match(
5377                &"qHilloHelloHew".chars().collect::<Vec<_>>()[..],
5378                &"xHelloHeHulloy".chars().collect::<Vec<_>>()[..]
5379            )
5380            .is_none());
5381        assert!(dmp
5382            .half_match(
5383                &"qHilloHelloHew".chars().collect::<Vec<_>>()[..],
5384                &"xHelloHeHulloy".chars().collect::<Vec<_>>()[..]
5385            )
5386            .is_none());
5387    }
5388
5389    #[test]
5390    fn test_patch_obj() {
5391        let p = Patch {
5392            start1: 20,
5393            start2: 21,
5394            length1: 18,
5395            length2: 17,
5396            diffs: vec![
5397                Diff::equal(b"jump"),
5398                Diff::delete(b"s"),
5399                Diff::insert(b"ed"),
5400                Diff::equal(b" over "),
5401                Diff::delete(b"the"),
5402                Diff::insert(b"a"),
5403                Diff::equal(b"\nlaz"),
5404            ],
5405        };
5406        assert_eq!(
5407            "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n  over \n-the\n+a\n %0Alaz\n",
5408            p.to_string()
5409        );
5410
5411        let p = Patch {
5412            start1: 20,
5413            start2: 21,
5414            length1: 18,
5415            length2: 17,
5416            diffs: vec![
5417                Diff::equal(&"jump".chars().collect::<Vec<_>>()[..]),
5418                Diff::delete(&"s".chars().collect::<Vec<_>>()[..]),
5419                Diff::insert(&"ed".chars().collect::<Vec<_>>()[..]),
5420                Diff::equal(&" over ".chars().collect::<Vec<_>>()[..]),
5421                Diff::delete(&"the".chars().collect::<Vec<_>>()[..]),
5422                Diff::insert(&"a".chars().collect::<Vec<_>>()[..]),
5423                Diff::equal(&"\nlaz".chars().collect::<Vec<_>>()[..]),
5424            ],
5425        };
5426        assert_eq!(
5427            "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n  over \n-the\n+a\n %0Alaz\n",
5428            p.to_string()
5429        );
5430    }
5431
5432    #[test]
5433    fn test_patch_add_context() -> Result<(), Error> {
5434        let dmp = DiffMatchPatch::default();
5435
5436        let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?;
5437        let p = ps.first_mut().unwrap();
5438        dmp.patch_add_context(p, b"The quick brown fox jumps over the lazy dog.");
5439        assert_eq!(
5440            "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n",
5441            p.to_string()
5442        );
5443
5444        let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?;
5445        let p = ps.first_mut().unwrap();
5446        dmp.patch_add_context(
5447            p,
5448            &"The quick brown fox jumps over the lazy dog."
5449                .chars()
5450                .collect::<Vec<_>>()[..],
5451        );
5452        assert_eq!(
5453            "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n",
5454            p.to_string()
5455        );
5456
5457        // Same, but not enough trailing context.
5458        let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?;
5459        let p = ps.first_mut().unwrap();
5460        dmp.patch_add_context(p, b"The quick brown fox jumps.");
5461        assert_eq!(
5462            "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n",
5463            p.to_string()
5464        );
5465
5466        let mut ps = dmp.patch_from_text("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")?;
5467        let p = ps.first_mut().unwrap();
5468        dmp.patch_add_context(
5469            p,
5470            &"The quick brown fox jumps.".chars().collect::<Vec<_>>()[..],
5471        );
5472        assert_eq!(
5473            "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n",
5474            p.to_string()
5475        );
5476
5477        // Same, but not enough leading context.
5478        let mut ps = dmp.patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n")?;
5479        let p = ps.first_mut().unwrap();
5480        dmp.patch_add_context(p, b"The quick brown fox jumps.");
5481        assert_eq!("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n  qui\n", p.to_string());
5482
5483        let mut ps = dmp.patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n")?;
5484        let p = ps.first_mut().unwrap();
5485        dmp.patch_add_context(
5486            p,
5487            &"The quick brown fox jumps.".chars().collect::<Vec<_>>()[..],
5488        );
5489        assert_eq!("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n  qui\n", p.to_string());
5490
5491        // Same, but with ambiguity.
5492        let mut ps = dmp.patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n")?;
5493        let p = ps.first_mut().unwrap();
5494        dmp.patch_add_context(
5495            p,
5496            b"The quick brown fox jumps.  The quick brown fox crashes.",
5497        );
5498        assert_eq!(
5499            "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n  quick brown fox jumps. \n",
5500            p.to_string()
5501        );
5502
5503        let mut ps = dmp.patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n")?;
5504        let p = ps.first_mut().unwrap();
5505        dmp.patch_add_context(
5506            p,
5507            &"The quick brown fox jumps.  The quick brown fox crashes."
5508                .chars()
5509                .collect::<Vec<_>>()[..],
5510        );
5511        assert_eq!(
5512            "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n  quick brown fox jumps. \n",
5513            p.to_string()
5514        );
5515
5516        // Unicode?
5517        // Investigate: fails in both JS and Rust
5518        // let mut ps = dmp.patch_from_text("@@ -3 +3,2 @@\n-e\n+at\n")?;
5519        // let p = ps.first_mut().unwrap();
5520        // dmp.patch_add_context(
5521        //     p,
5522        //     &"The quick brown fox jumps🤩.  The quick brown fox crashes.".chars().collect::<Vec<_>>()[..],
5523        // );
5524        // assert_eq!(
5525        //     "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n  quick brown fox jumps🤩\n\n",
5526        //     p.to_string()
5527        // );
5528
5529        Ok(())
5530    }
5531
5532    #[test]
5533    fn test_parse_patch_header() {
5534        assert_eq!(
5535            Some((21, Some(4), 21, Some(10))),
5536            DiffMatchPatch::parse_patch_header("@@ -21,4 +21,10 @@".as_bytes())
5537        );
5538        assert_eq!(
5539            Some((3, None, 3, Some(2))),
5540            DiffMatchPatch::parse_patch_header("@@ -3 +3,2 @@".as_bytes())
5541        );
5542
5543        // Bad cases
5544        assert!(DiffMatchPatch::parse_patch_header("@@  +3,2 @@".as_bytes()).is_none());
5545        assert!(DiffMatchPatch::parse_patch_header("@@ 2046 +3,2 @@".as_bytes()).is_none());
5546
5547        assert_eq!(
5548            Some((21, Some(4), 21, Some(10))),
5549            DiffMatchPatch::parse_patch_header(
5550                &"@@ -21,4 +21,10 @@".chars().collect::<Vec<_>>()[..]
5551            )
5552        );
5553        assert_eq!(
5554            Some((3, None, 3, Some(2))),
5555            DiffMatchPatch::parse_patch_header(&"@@ -3 +3,2 @@".chars().collect::<Vec<_>>()[..])
5556        );
5557
5558        // Bad cases
5559        assert!(
5560            DiffMatchPatch::parse_patch_header(&"@@  +3,2 @@".chars().collect::<Vec<_>>()[..])
5561                .is_none()
5562        );
5563        assert!(DiffMatchPatch::parse_patch_header(
5564            &"@@ 2046 +3,2 @@".chars().collect::<Vec<_>>()[..]
5565        )
5566        .is_none());
5567    }
5568
5569    #[test]
5570    fn test_patch_add_padding() -> Result<(), Error> {
5571        let dmp = DiffMatchPatch::default();
5572        // Both edges full.
5573        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("", "test"))?;
5574        assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches));
5575
5576        dmp.patch_add_padding(&mut patches);
5577        assert_eq!(
5578            "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n",
5579            dmp.patch_to_text(&patches)
5580        );
5581
5582        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>("", "test"))?;
5583        assert_eq!("@@ -0,0 +1,4 @@\n+test\n", dmp.patch_to_text(&patches));
5584
5585        dmp.patch_add_padding(&mut patches);
5586        assert_eq!(
5587            "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n",
5588            dmp.patch_to_text(&patches)
5589        );
5590
5591        // Both edges partial.
5592        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>("XY", "XtestY"))?;
5593        assert_eq!(
5594            "@@ -1,2 +1,6 @@\n X\n+test\n Y\n",
5595            dmp.patch_to_text(&patches)
5596        );
5597        dmp.patch_add_padding(&mut patches);
5598        assert_eq!(
5599            "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n",
5600            dmp.patch_to_text(&patches)
5601        );
5602
5603        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>("XY", "XtestY"))?;
5604        assert_eq!(
5605            "@@ -1,2 +1,6 @@\n X\n+test\n Y\n",
5606            dmp.patch_to_text(&patches)
5607        );
5608        dmp.patch_add_padding(&mut patches);
5609        assert_eq!(
5610            "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n",
5611            dmp.patch_to_text(&patches)
5612        );
5613
5614        // Both edges none.
5615        let mut patches =
5616            dmp.patch_make(PatchInput::Texts::<Efficient>("XXXXYYYY", "XXXXtestYYYY"))?;
5617        assert_eq!(
5618            "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n",
5619            dmp.patch_to_text(&patches)
5620        );
5621        dmp.patch_add_padding(&mut patches);
5622        assert_eq!(
5623            percent_encoding::percent_decode(b"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n")
5624                .decode_utf8()
5625                .unwrap(),
5626            dmp.patch_to_text(&patches)
5627        );
5628
5629        let mut patches =
5630            dmp.patch_make(PatchInput::Texts::<Compat>("XXXXYYYY", "XXXXtestYYYY"))?;
5631        assert_eq!(
5632            "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n",
5633            dmp.patch_to_text(&patches)
5634        );
5635        dmp.patch_add_padding(&mut patches);
5636        assert_eq!(
5637            percent_encoding::percent_decode(b"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n")
5638                .decode_utf8()
5639                .unwrap(),
5640            dmp.patch_to_text(&patches)
5641        );
5642
5643        // Unicode
5644        let mut patches =
5645            dmp.patch_make(PatchInput::Texts::<Efficient>("XXXXYYYY", "XXXX🤩YYYY"))?;
5646
5647        dmp.patch_add_padding(&mut patches);
5648        assert_eq!(
5649            "@@ -5,8 +5,12 @@\n XXXX\n+🤩\n YYYY\n",
5650            percent_encoding::percent_decode(dmp.patch_to_text(&patches).as_bytes())
5651                .decode_utf8()
5652                .unwrap()
5653        );
5654
5655        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>("XXXXYYYY", "XXXX🤩YYYY"))?;
5656
5657        dmp.patch_add_padding(&mut patches);
5658        assert_eq!(
5659            "@@ -5,8 +5,9 @@\n XXXX\n+🤩\n YYYY\n",
5660            percent_encoding::percent_decode(dmp.patch_to_text(&patches).as_bytes())
5661                .decode_utf8()
5662                .unwrap()
5663        );
5664
5665        Ok(())
5666    }
5667
5668    #[test]
5669    fn test_patch_split_max() -> Result<(), Error> {
5670        let dmp = DiffMatchPatch::default();
5671
5672        // Assumes that dmp.Match_MaxBits is 32.
5673        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
5674            "abcdefghijklmnopqrstuvwxyz01234567890",
5675            "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0",
5676        ))?;
5677        dmp.split_max(&mut patches);
5678        assert_eq!(
5679            "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n",
5680            dmp.patch_to_text(&patches)
5681        );
5682
5683        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>(
5684            "abcdefghijklmnopqrstuvwxyz01234567890",
5685            "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0",
5686        ))?;
5687        dmp.split_max(&mut patches);
5688        assert_eq!(
5689            "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n",
5690            dmp.patch_to_text(&patches)
5691        );
5692
5693        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
5694            "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz",
5695            "abcdefuvwxyz",
5696        ))?;
5697        let p2t = dmp.patch_to_text(&patches);
5698        dmp.split_max(&mut patches);
5699        assert_eq!(p2t, dmp.patch_to_text(&patches));
5700
5701        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>(
5702            "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz",
5703            "abcdefuvwxyz",
5704        ))?;
5705
5706        let p2t = dmp.patch_to_text(&patches);
5707        dmp.split_max(&mut patches);
5708        assert_eq!(p2t, dmp.patch_to_text(&patches));
5709
5710        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
5711            "1234567890123456789012345678901234567890123456789012345678901234567890",
5712            "abc",
5713        ))?;
5714        dmp.split_max(&mut patches);
5715        assert_eq!(
5716            "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n",
5717            dmp.patch_to_text(&patches)
5718        );
5719
5720        let p2t = dmp.patch_to_text(&patches);
5721        dmp.split_max(&mut patches);
5722        assert_eq!(p2t, dmp.patch_to_text(&patches));
5723
5724        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>(
5725            "1234567890123456789012345678901234567890123456789012345678901234567890",
5726            "abc",
5727        ))?;
5728        dmp.split_max(&mut patches);
5729        assert_eq!(
5730            "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n",
5731            dmp.patch_to_text(&patches)
5732        );
5733
5734        let mut patches = dmp.patch_make(PatchInput::Texts::<Efficient>(
5735            "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1",
5736            "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1",
5737        ))?;
5738        dmp.split_max(&mut patches);
5739        assert_eq!(
5740            "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n  , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n  , t : 1 abcdef\n",
5741            dmp.patch_to_text(&patches)
5742        );
5743
5744        let mut patches = dmp.patch_make(PatchInput::Texts::<Compat>(
5745            "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1",
5746            "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1",
5747        ))?;
5748        dmp.split_max(&mut patches);
5749        assert_eq!(
5750            "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n  , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n  , t : 1 abcdef\n",
5751            dmp.patch_to_text(&patches)
5752        );
5753
5754        Ok(())
5755    }
5756
5757    #[test]
5758    fn test_match_alphabet() {
5759        // Initialise the bitmasks for Bitap.
5760        // Unique.
5761        assert_eq!(
5762            HashMap::from([(b'a', 4), (b'b', 2), (b'c', 1)]),
5763            DiffMatchPatch::match_alphabet(b"abc")
5764        );
5765        assert_eq!(
5766            HashMap::from([('a', 4), ('b', 2), ('c', 1)]),
5767            DiffMatchPatch::match_alphabet(&['a', 'b', 'c'])
5768        );
5769
5770        // Duplicates.
5771        assert_eq!(
5772            HashMap::from([('a', 37), ('b', 18), ('c', 8)]),
5773            DiffMatchPatch::match_alphabet(&"abcaba".chars().collect::<Vec<_>>()[..])
5774        )
5775    }
5776
5777    #[test]
5778    fn test_match_bitap() {
5779        // Bitap algorithm.
5780        let mut dmp = DiffMatchPatch {
5781            match_distance: 100,
5782            ..Default::default()
5783        };
5784
5785        // Exact matches.
5786        assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 5));
5787        assert_eq!(Some(5), dmp.match_bitap(b"abcdefghijk", b"fgh", 0));
5788
5789        // Fuzzy matches.
5790        assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxhi", 0));
5791        assert_eq!(Some(2), dmp.match_bitap(b"abcdefghijk", b"cdefxyhijk", 5));
5792        assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"bxy", 1));
5793
5794        // Overflow.
5795        assert_eq!(Some(2), dmp.match_bitap(b"123456789xx0", b"3456789x0", 2));
5796
5797        // Threshold test.
5798        dmp.match_threshold = 0.4;
5799        assert_eq!(Some(4), dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
5800
5801        // dmp.`match_threshold` = 0.3;
5802        dmp.match_threshold = 0.3;
5803        assert_eq!(None, dmp.match_bitap(b"abcdefghijk", b"efxyhi", 1));
5804
5805        dmp.match_threshold = 0.;
5806        assert_eq!(Some(1), dmp.match_bitap(b"abcdefghijk", b"bcdef", 1));
5807
5808        dmp.match_threshold = 0.5;
5809
5810        // Multiple select.
5811        assert_eq!(Some(0), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 3));
5812        assert_eq!(Some(8), dmp.match_bitap(b"abcdexyzabcde", b"abccde", 5));
5813
5814        // Distance test.
5815        dmp.match_distance = 10;
5816        assert_eq!(
5817            None,
5818            dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
5819        );
5820        assert_eq!(
5821            Some(0),
5822            dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdxxefg", 1)
5823        );
5824
5825        dmp.match_distance = 1000;
5826        assert_eq!(
5827            Some(0),
5828            dmp.match_bitap(b"abcdefghijklmnopqrstuvwxyz", b"abcdefg", 24)
5829        );
5830    }
5831}