imara_diff/
histogram.rs

1use crate::histogram::lcs::find_lcs;
2use crate::histogram::list_pool::{ListHandle, ListPool};
3use crate::intern::Token;
4use crate::myers;
5
6mod lcs;
7mod list_pool;
8
9const MAX_CHAIN_LEN: u32 = 63;
10
11struct Histogram {
12    token_occurrences: Vec<ListHandle>,
13    pool: ListPool,
14}
15
16pub fn diff(
17    before: &[Token],
18    after: &[Token],
19    removed: &mut [bool],
20    added: &mut [bool],
21    num_tokens: u32,
22) {
23    let mut histogram = Histogram::new(num_tokens);
24    histogram.run(before, after, removed, added);
25}
26
27impl Histogram {
28    fn new(num_buckets: u32) -> Histogram {
29        Histogram {
30            token_occurrences: vec![ListHandle::default(); num_buckets as usize],
31            pool: ListPool::new(2 * num_buckets),
32        }
33    }
34
35    fn clear(&mut self) {
36        self.pool.clear();
37    }
38
39    fn token_occurrences(&self, token: Token) -> &[u32] {
40        self.token_occurrences[token.0 as usize].as_slice(&self.pool)
41    }
42
43    fn num_token_occurrences(&self, token: Token) -> u32 {
44        self.token_occurrences[token.0 as usize].len(&self.pool)
45    }
46
47    fn populate(&mut self, file: &[Token]) {
48        for (i, &token) in file.iter().enumerate() {
49            self.token_occurrences[token.0 as usize].push(i as u32, &mut self.pool);
50        }
51    }
52
53    fn run(
54        &mut self,
55        mut before: &[Token],
56        mut after: &[Token],
57        mut removed: &mut [bool],
58        mut added: &mut [bool],
59    ) {
60        loop {
61            if before.is_empty() {
62                added.fill(true);
63                return;
64            } else if after.is_empty() {
65                removed.fill(true);
66                return;
67            }
68
69            self.populate(before);
70            match find_lcs(before, after, self) {
71                // no lcs was found, that means that file1 and file2 two have nothing in common
72                Some(lcs) if lcs.len == 0 => {
73                    added.fill(true);
74                    removed.fill(true);
75                    return;
76                }
77                Some(lcs) => {
78                    self.run(
79                        &before[..lcs.before_start as usize],
80                        &after[..lcs.after_start as usize],
81                        &mut removed[..lcs.before_start as usize],
82                        &mut added[..lcs.after_start as usize],
83                    );
84
85                    // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons
86                    let before_end = lcs.before_start + lcs.len;
87                    before = &before[before_end as usize..];
88                    removed = &mut removed[before_end as usize..];
89
90                    let after_end = lcs.after_start + lcs.len;
91                    after = &after[after_end as usize..];
92                    added = &mut added[after_end as usize..];
93                }
94                None => {
95                    // we are diffing two extremely large repetitive files
96                    // this is a worst case for histogram diff with O(N^2) performance
97                    // fallback to myers to maintain linear time complxity
98                    myers::diff(before, after, removed, added, false);
99                    return;
100                }
101            }
102        }
103    }
104}