diffy_fork_filenames/
range.rs

1use std::{cmp, fmt::Debug, ops};
2
3// Range type inspired by the Range type used in [dissimilar](https://docs.rs/dissimilar)
4#[derive(Debug)]
5pub struct Range<'a, T: ?Sized> {
6    inner: &'a T,
7    offset: usize,
8    len: usize,
9}
10
11impl<T: ?Sized> Copy for Range<'_, T> {}
12
13impl<T: ?Sized> Clone for Range<'_, T> {
14    fn clone(&self) -> Self {
15        *self
16    }
17}
18
19impl<'a, T: ?Sized> Range<'a, T> {
20    pub fn is_empty(&self) -> bool {
21        self.len == 0
22    }
23
24    pub fn inner(&self) -> &'a T {
25        self.inner
26    }
27
28    pub fn len(&self) -> usize {
29        self.len
30    }
31
32    pub fn offset(&self) -> usize {
33        self.offset
34    }
35
36    #[allow(dead_code)]
37    pub fn range(&self) -> ops::Range<usize> {
38        self.offset..self.offset + self.len
39    }
40
41    pub fn grow_up(&mut self, adjust: usize) {
42        self.offset -= adjust;
43        self.len += adjust;
44    }
45
46    pub fn grow_down(&mut self, adjust: usize) {
47        self.len += adjust;
48    }
49
50    pub fn shrink_front(&mut self, adjust: usize) {
51        self.offset += adjust;
52        self.len -= adjust;
53    }
54
55    pub fn shrink_back(&mut self, adjust: usize) {
56        self.len -= adjust;
57    }
58
59    pub fn shift_up(&mut self, adjust: usize) {
60        self.offset -= adjust
61    }
62
63    pub fn shift_down(&mut self, adjust: usize) {
64        self.offset += adjust;
65    }
66
67    pub fn slice(&self, bounds: impl RangeBounds) -> Self {
68        let (offset, len) = bounds.index(self.len);
69        Range {
70            inner: self.inner,
71            offset: self.offset + offset,
72            len,
73        }
74    }
75
76    pub fn get(&self, bounds: impl RangeBounds) -> Option<Self> {
77        let (offset, len) = bounds.try_index(self.len)?;
78        Some(Range {
79            inner: self.inner,
80            offset: self.offset + offset,
81            len,
82        })
83    }
84
85    pub fn split_at(&self, mid: usize) -> (Self, Self) {
86        (self.slice(..mid), self.slice(mid..))
87    }
88}
89
90impl<'a, T> Range<'a, T>
91where
92    T: ?Sized + SliceLike,
93{
94    pub fn new(inner: &'a T, bounds: impl RangeBounds) -> Self {
95        let (offset, len) = bounds.index(inner.len());
96        Range { inner, offset, len }
97    }
98
99    #[allow(dead_code)]
100    pub fn empty() -> Range<'a, T> {
101        Range {
102            inner: T::empty(),
103            offset: 0,
104            len: 0,
105        }
106    }
107
108    pub fn as_slice(&self) -> &'a T {
109        self.inner.as_slice(self.offset..self.offset + self.len)
110    }
111
112    pub fn common_prefix_len(&self, other: Range<'_, T>) -> usize {
113        self.as_slice().common_prefix_len(other.as_slice())
114    }
115
116    pub fn common_suffix_len(&self, other: Range<'_, T>) -> usize {
117        self.as_slice().common_suffix_len(other.as_slice())
118    }
119
120    #[allow(dead_code)]
121    pub fn common_overlap_len(&self, other: Range<'_, T>) -> usize {
122        self.as_slice().common_overlap_len(other.as_slice())
123    }
124
125    #[allow(dead_code)]
126    pub fn starts_with(&self, prefix: Range<'_, T>) -> bool {
127        self.as_slice().starts_with(prefix.as_slice())
128    }
129
130    #[allow(dead_code)]
131    pub fn ends_with(&self, suffix: Range<'_, T>) -> bool {
132        self.as_slice().ends_with(suffix.as_slice())
133    }
134}
135
136pub trait RangeBounds: Sized + Clone + Debug {
137    // Returns (offset, len).
138    fn try_index(self, len: usize) -> Option<(usize, usize)>;
139
140    fn index(self, len: usize) -> (usize, usize) {
141        match self.clone().try_index(len) {
142            Some(range) => range,
143            None => panic!("index out of range, index={:?}, len={}", self, len),
144        }
145    }
146}
147
148impl RangeBounds for ops::Range<usize> {
149    fn try_index(self, len: usize) -> Option<(usize, usize)> {
150        if self.start <= self.end && self.end <= len {
151            Some((self.start, self.end - self.start))
152        } else {
153            None
154        }
155    }
156}
157
158impl RangeBounds for ops::RangeFrom<usize> {
159    fn try_index(self, len: usize) -> Option<(usize, usize)> {
160        if self.start <= len {
161            Some((self.start, len - self.start))
162        } else {
163            None
164        }
165    }
166}
167
168impl RangeBounds for ops::RangeTo<usize> {
169    fn try_index(self, len: usize) -> Option<(usize, usize)> {
170        if self.end <= len {
171            Some((0, self.end))
172        } else {
173            None
174        }
175    }
176}
177
178impl RangeBounds for ops::RangeFull {
179    fn try_index(self, len: usize) -> Option<(usize, usize)> {
180        Some((0, len))
181    }
182}
183
184pub trait SliceLike: ops::Index<ops::Range<usize>> {
185    fn len(&self) -> usize;
186    fn empty<'a>() -> &'a Self;
187    fn as_slice(&self, range: ops::Range<usize>) -> &Self;
188    fn common_prefix_len(&self, other: &Self) -> usize;
189    fn common_suffix_len(&self, other: &Self) -> usize;
190    fn common_overlap_len(&self, other: &Self) -> usize;
191    fn starts_with(&self, prefix: &Self) -> bool;
192    fn ends_with(&self, suffix: &Self) -> bool;
193}
194
195impl SliceLike for str {
196    fn len(&self) -> usize {
197        self.len()
198    }
199
200    fn empty<'a>() -> &'a str {
201        ""
202    }
203
204    fn as_slice(&self, range: ops::Range<usize>) -> &str {
205        &self[range]
206    }
207
208    fn common_prefix_len(&self, other: &str) -> usize {
209        for ((i, ch1), ch2) in self.char_indices().zip(other.chars()) {
210            if ch1 != ch2 {
211                return i;
212            }
213        }
214        cmp::min(self.len(), other.len())
215    }
216
217    fn common_suffix_len(&self, other: &str) -> usize {
218        for ((i, ch1), ch2) in self.char_indices().rev().zip(other.chars().rev()) {
219            if ch1 != ch2 {
220                return self.len() - i - ch1.len_utf8();
221            }
222        }
223        cmp::min(self.len(), other.len())
224    }
225
226    // returns length of overlap of prefix of `self` with suffic of `other`
227    fn common_overlap_len(&self, mut other: &str) -> usize {
228        let mut this = self;
229        // Eliminate the null case
230        if this.is_empty() || other.is_empty() {
231            return 0;
232        }
233
234        match this.len().cmp(&other.len()) {
235            cmp::Ordering::Greater => {
236                let mut end = other.len();
237                while !this.is_char_boundary(end) {
238                    end -= 1;
239                }
240
241                this = &this[..end];
242            }
243            cmp::Ordering::Less => {
244                let mut start = other.len() - this.len();
245                while !other.is_char_boundary(start) {
246                    start += 1;
247                }
248
249                other = &other[start..]
250            }
251            cmp::Ordering::Equal => {}
252        }
253
254        // Quick check for the worst case.
255        if this == other {
256            return this.len();
257        }
258
259        // Start by looking for a single character match
260        // and increase length until no match is found.
261        // Performance analysis: https://neil.fraser.name/news/2010/11/04/
262        let mut best = 0;
263        let mut length = 0;
264        for (i, c) in other.char_indices().rev() {
265            let pattern = &other[i..];
266            let found = match this.find(pattern) {
267                Some(found) => found,
268                None => return best,
269            };
270
271            length += c.len_utf8();
272            if found == 0 {
273                best = length;
274            }
275        }
276
277        best
278    }
279
280    fn starts_with(&self, prefix: &str) -> bool {
281        self.starts_with(prefix)
282    }
283
284    fn ends_with(&self, suffix: &str) -> bool {
285        self.ends_with(suffix)
286    }
287}
288
289impl<T> SliceLike for [T]
290where
291    T: PartialEq,
292{
293    fn len(&self) -> usize {
294        self.len()
295    }
296
297    fn empty<'a>() -> &'a [T] {
298        &[]
299    }
300
301    fn as_slice(&self, range: ops::Range<usize>) -> &[T] {
302        &self[range]
303    }
304
305    fn common_prefix_len(&self, other: &[T]) -> usize {
306        for (i, (item1, item2)) in self.iter().zip(other.iter()).enumerate() {
307            if item1 != item2 {
308                return i;
309            }
310        }
311        cmp::min(self.len(), other.len())
312    }
313
314    fn common_suffix_len(&self, other: &[T]) -> usize {
315        for (i, (item1, item2)) in self.iter().rev().zip(other.iter().rev()).enumerate() {
316            if item1 != item2 {
317                return i;
318            }
319        }
320        cmp::min(self.len(), other.len())
321    }
322
323    // returns length of overlap of prefix of `self` with suffic of `other`
324    //TODO make a more efficient solution
325    fn common_overlap_len(&self, other: &[T]) -> usize {
326        let mut len = cmp::min(self.len(), other.len());
327
328        while len > 0 {
329            if self[..len] == other[other.len() - len..] {
330                break;
331            }
332            len -= 1;
333        }
334
335        len
336    }
337
338    fn starts_with(&self, prefix: &Self) -> bool {
339        self.starts_with(prefix)
340    }
341
342    fn ends_with(&self, suffix: &Self) -> bool {
343        self.ends_with(suffix)
344    }
345}
346
347#[derive(Debug)]
348pub enum DiffRange<'a, 'b, T: ?Sized> {
349    Equal(Range<'a, T>, Range<'b, T>),
350    Delete(Range<'a, T>),
351    Insert(Range<'b, T>),
352}
353
354impl<T: ?Sized> Copy for DiffRange<'_, '_, T> {}
355
356impl<T: ?Sized> Clone for DiffRange<'_, '_, T> {
357    fn clone(&self) -> Self {
358        *self
359    }
360}
361
362impl<'tmp, 'a: 'tmp, 'b: 'tmp, T> DiffRange<'a, 'b, T>
363where
364    T: ?Sized + SliceLike,
365{
366    pub fn inner(&self) -> Range<'tmp, T> {
367        match *self {
368            DiffRange::Equal(range, _) | DiffRange::Delete(range) | DiffRange::Insert(range) => {
369                range
370            }
371        }
372    }
373
374    pub fn is_empty(&self) -> bool {
375        self.inner().is_empty()
376    }
377
378    pub fn len(&self) -> usize {
379        self.inner().len()
380    }
381
382    pub fn grow_up(&mut self, adjust: usize) {
383        self.for_each(|range| range.grow_up(adjust));
384    }
385
386    pub fn grow_down(&mut self, adjust: usize) {
387        self.for_each(|range| range.grow_down(adjust));
388    }
389
390    pub fn shrink_front(&mut self, adjust: usize) {
391        self.for_each(|range| range.shrink_front(adjust));
392    }
393
394    pub fn shrink_back(&mut self, adjust: usize) {
395        self.for_each(|range| range.shrink_back(adjust));
396    }
397
398    pub fn shift_up(&mut self, adjust: usize) {
399        self.for_each(|range| range.shift_up(adjust));
400    }
401
402    pub fn shift_down(&mut self, adjust: usize) {
403        self.for_each(|range| range.shift_down(adjust));
404    }
405
406    fn for_each(&mut self, f: impl Fn(&mut Range<'_, T>)) {
407        match self {
408            DiffRange::Equal(range1, range2) => {
409                f(range1);
410                f(range2);
411            }
412            DiffRange::Delete(range) => f(range),
413            DiffRange::Insert(range) => f(range),
414        }
415    }
416}
417
418impl<'a, 'b> DiffRange<'a, 'b, [u8]> {
419    pub fn to_str(self, text1: &'a str, text2: &'b str) -> DiffRange<'a, 'b, str> {
420        fn boundary_down(text: &str, pos: usize) -> usize {
421            let mut adjust = 0;
422            while !text.is_char_boundary(pos - adjust) {
423                adjust += 1;
424            }
425            adjust
426        }
427
428        fn boundary_up(text: &str, pos: usize) -> usize {
429            let mut adjust = 0;
430            while !text.is_char_boundary(pos + adjust) {
431                adjust += 1;
432            }
433            adjust
434        }
435
436        match self {
437            DiffRange::Equal(range1, range2) => {
438                debug_assert_eq!(range1.inner().as_ptr(), text1.as_ptr());
439                debug_assert_eq!(range2.inner().as_ptr(), text2.as_ptr());
440                let mut offset1 = range1.offset();
441                let mut len1 = range1.len();
442                let mut offset2 = range2.offset();
443                let mut len2 = range2.len();
444
445                let adjust = boundary_up(text1, offset1);
446                offset1 += adjust;
447                len1 -= adjust;
448                offset2 += adjust;
449                len2 -= adjust;
450                let adjust = boundary_down(text1, offset1 + len1);
451                len1 -= adjust;
452                len2 -= adjust;
453
454                DiffRange::Equal(
455                    Range::new(text1, offset1..offset1 + len1),
456                    Range::new(text2, offset2..offset2 + len2),
457                )
458            }
459            DiffRange::Delete(range) => {
460                debug_assert_eq!(range.inner().as_ptr(), text1.as_ptr());
461                let mut offset = range.offset();
462                let mut len = range.len();
463                let adjust = boundary_down(text1, offset);
464                offset -= adjust;
465                len += adjust;
466                let adjust = boundary_up(text1, offset + len);
467                len += adjust;
468                DiffRange::Delete(Range::new(text1, offset..offset + len))
469            }
470            DiffRange::Insert(range) => {
471                debug_assert_eq!(range.inner().as_ptr(), text2.as_ptr());
472                let mut offset = range.offset();
473                let mut len = range.len();
474                let adjust = boundary_down(text2, offset);
475                offset -= adjust;
476                len += adjust;
477                let adjust = boundary_up(text2, offset + len);
478                len += adjust;
479                DiffRange::Insert(Range::new(text2, offset..offset + len))
480            }
481        }
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    #[test]
490    fn test_common_prefix() {
491        let text1 = Range::new("abc", ..);
492        let text2 = Range::new("xyz", ..);
493        assert_eq!(0, text1.common_prefix_len(text2), "Null case");
494        let text1 = Range::new(b"abc".as_ref(), ..);
495        let text2 = Range::new(b"xyz".as_ref(), ..);
496        assert_eq!(0, text1.common_prefix_len(text2), "Null case");
497
498        let text1 = Range::new("1234abcdef", ..);
499        let text2 = Range::new("1234xyz", ..);
500        assert_eq!(4, text1.common_prefix_len(text2), "Non-null case");
501        let text1 = Range::new(b"1234abcdef".as_ref(), ..);
502        let text2 = Range::new(b"1234xyz".as_ref(), ..);
503        assert_eq!(4, text1.common_prefix_len(text2), "Non-null case");
504
505        let text1 = Range::new("1234", ..);
506        let text2 = Range::new("1234xyz", ..);
507        assert_eq!(4, text1.common_prefix_len(text2), "Whole case");
508
509        let text1 = Range::new(b"1234".as_ref(), ..);
510        let text2 = Range::new(b"1234xyz".as_ref(), ..);
511        assert_eq!(4, text1.common_prefix_len(text2), "Whole case");
512
513        let snowman = "\u{2603}";
514        let comet = "\u{2604}";
515        let text1 = Range::new(snowman, ..);
516        let text2 = Range::new(comet, ..);
517        assert_eq!(0, text1.common_prefix_len(text2), "Unicode case");
518        let text1 = Range::new(snowman.as_bytes(), ..);
519        let text2 = Range::new(comet.as_bytes(), ..);
520        assert_eq!(2, text1.common_prefix_len(text2), "Unicode case");
521    }
522
523    #[test]
524    fn test_common_suffix() {
525        let text1 = Range::new("abc", ..);
526        let text2 = Range::new("xyz", ..);
527        assert_eq!(0, text1.common_suffix_len(text2), "Null case");
528        let text1 = Range::new(b"abc".as_ref(), ..);
529        let text2 = Range::new(b"xyz".as_ref(), ..);
530        assert_eq!(0, text1.common_suffix_len(text2), "Null case");
531
532        let text1 = Range::new("abcdef1234", ..);
533        let text2 = Range::new("xyz1234", ..);
534        assert_eq!(4, text1.common_suffix_len(text2), "Non-null case");
535        let text1 = Range::new(b"abcdef1234".as_ref(), ..);
536        let text2 = Range::new(b"xyz1234".as_ref(), ..);
537        assert_eq!(4, text1.common_suffix_len(text2), "Non-null case");
538
539        let text1 = Range::new("1234", ..);
540        let text2 = Range::new("xyz1234", ..);
541        assert_eq!(4, text1.common_suffix_len(text2), "Whole case");
542        let text1 = Range::new(b"1234".as_ref(), ..);
543        let text2 = Range::new(b"xyz1234".as_ref(), ..);
544        assert_eq!(4, text1.common_suffix_len(text2), "Whole case");
545    }
546
547    #[test]
548    fn test_common_overlap() {
549        let text1 = Range::empty();
550        let text2 = Range::new("abcd", ..);
551        assert_eq!(0, text1.common_overlap_len(text2), "Null case");
552        let text1 = Range::empty();
553        let text2 = Range::new(b"abcd".as_ref(), ..);
554        assert_eq!(0, text1.common_overlap_len(text2), "Null case");
555
556        let text1 = Range::new("abcd", ..);
557        let text2 = Range::new("abc", ..);
558        assert_eq!(3, text1.common_overlap_len(text2), "Whole case");
559        let text1 = Range::new(b"abcd".as_ref(), ..);
560        let text2 = Range::new(b"abc".as_ref(), ..);
561        assert_eq!(3, text1.common_overlap_len(text2), "Whole case");
562
563        let text1 = Range::new("123456", ..);
564        let text2 = Range::new("abcd", ..);
565        assert_eq!(0, text1.common_overlap_len(text2), "No overlap");
566        let text1 = Range::new(b"123456".as_ref(), ..);
567        let text2 = Range::new(b"abcd".as_ref(), ..);
568        assert_eq!(0, text1.common_overlap_len(text2), "No overlap");
569
570        let text1 = Range::new("xxxabcd", ..);
571        let text2 = Range::new("123456xxx", ..);
572        assert_eq!(3, text1.common_overlap_len(text2), "Overlap");
573        let text1 = Range::new(b"xxxabcd".as_ref(), ..);
574        let text2 = Range::new(b"123456xxx".as_ref(), ..);
575        assert_eq!(3, text1.common_overlap_len(text2), "Overlap");
576
577        // Some overly clever languages (C#) may treat ligatures as equal to their
578        // component letters. E.g. U+FB01 == 'fi'
579        let text1 = Range::new("fi", ..);
580        let text2 = Range::new("\u{fb01}i", ..);
581        assert_eq!(0, text1.common_overlap_len(text2), "Unicode");
582    }
583}