imara_diff/histogram/
lcs.rs

1use crate::histogram::{Histogram, MAX_CHAIN_LEN};
2use crate::intern::Token;
3
4pub(super) fn find_lcs(
5    before: &[Token],
6    after: &[Token],
7    histogram: &mut Histogram,
8) -> Option<Lcs> {
9    let mut search = LcsSearch {
10        lcs: Lcs::default(),
11        min_occurrences: MAX_CHAIN_LEN + 1,
12        found_cs: false,
13    };
14    search.run(before, after, histogram);
15    if search.success() {
16        Some(search.lcs)
17    } else {
18        None
19    }
20}
21
22#[derive(Default, Debug)]
23pub struct Lcs {
24    pub before_start: u32,
25    pub after_start: u32,
26    pub len: u32,
27}
28
29pub struct LcsSearch {
30    lcs: Lcs,
31    min_occurrences: u32,
32    found_cs: bool,
33}
34
35impl LcsSearch {
36    fn run(&mut self, before: &[Token], after: &[Token], histogram: &mut Histogram) {
37        let mut pos = 0;
38        while let Some(&token) = after.get(pos as usize) {
39            if histogram.num_token_occurrences(token) != 0 {
40                self.found_cs = true;
41                if histogram.num_token_occurrences(token) <= self.min_occurrences {
42                    pos = self.update_lcs(pos, token, histogram, before, after);
43                    continue;
44                }
45            }
46
47            pos += 1;
48        }
49
50        histogram.clear();
51    }
52
53    fn success(&mut self) -> bool {
54        !self.found_cs || self.min_occurrences <= MAX_CHAIN_LEN
55    }
56
57    fn update_lcs(
58        &mut self,
59        after_pos: u32,
60        token: Token,
61        histogram: &Histogram,
62        before: &[Token],
63        after: &[Token],
64    ) -> u32 {
65        let mut next_token_idx2 = after_pos + 1;
66        let mut occurrences_iter = histogram.token_occurrences(token).iter().copied();
67        let mut token_idx1 = occurrences_iter.next().unwrap();
68
69        'occurrences_iter: loop {
70            let mut occurrences = histogram.num_token_occurrences(token);
71            let mut start1 = token_idx1;
72            let mut start2 = after_pos;
73            loop {
74                if start1 == 0 || start2 == 0 {
75                    break;
76                }
77                let token1 = before.get(start1 as usize - 1);
78                let token2 = after.get(start2 as usize - 1);
79                if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
80                    start1 -= 1;
81                    start2 -= 1;
82                    let new_occurrences = histogram.num_token_occurrences(before[start1 as usize]);
83                    occurrences = occurrences.min(new_occurrences);
84                } else {
85                    break;
86                }
87            }
88
89            let mut end1 = token_idx1 + 1;
90            let mut end2 = after_pos + 1;
91            loop {
92                let token1 = before.get(end1 as usize);
93                let token2 = after.get(end2 as usize);
94                if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
95                    let new_occurrences = histogram.num_token_occurrences(before[end1 as usize]);
96                    occurrences = occurrences.min(new_occurrences);
97                    end1 += 1;
98                    end2 += 1;
99                } else {
100                    break;
101                }
102            }
103
104            if next_token_idx2 < end2 {
105                next_token_idx2 = end2;
106            }
107
108            let len = end2 - start2;
109            debug_assert_eq!(len, end1 - start1);
110            if self.lcs.len < len || self.min_occurrences > occurrences {
111                self.min_occurrences = occurrences;
112                self.lcs = Lcs {
113                    before_start: start1,
114                    after_start: start2,
115                    len,
116                };
117            }
118
119            loop {
120                if let Some(next_token_idx) = occurrences_iter.next() {
121                    if next_token_idx > end2 {
122                        token_idx1 = next_token_idx;
123                        break;
124                    }
125                } else {
126                    break 'occurrences_iter;
127                }
128            }
129        }
130
131        next_token_idx2
132    }
133}