imara_diff/myers/
preprocess.rs1use crate::intern::Token;
2use crate::myers::sqrt;
3
4pub fn preprocess<'a>(
5 before: &[Token],
6 after: &[Token],
7 removed: &'a mut [bool],
8 added: &'a mut [bool],
9) -> (PreprocessedFile, PreprocessedFile) {
10 let (occurances_before, occurances_after) = token_occurrences(before, after);
11 let file1 = PreprocessedFile::new(&occurances_before, before, removed);
12 let file2 = PreprocessedFile::new(&occurances_after, after, added);
13 (file1, file2)
14}
15
16fn token_occurrences(file1: &[Token], file2: &[Token]) -> (Vec<Occurrences>, Vec<Occurrences>) {
17 const MAX_EQLIMIT: u32 = 1024;
18
19 let eqlimit1 = sqrt(file1.len()).min(MAX_EQLIMIT);
21 let eqlimit2 = sqrt(file2.len()).min(MAX_EQLIMIT);
22
23 let mut occurrences1 = Vec::new();
25 for token in file1 {
26 let bucket = token.0 as usize;
27 if bucket >= occurrences1.len() {
28 occurrences1.resize(bucket + 1, 0u32);
29 }
30 occurrences1[bucket] += 1;
31 }
32
33 let mut occurrences2 = Vec::new();
35 let token_occurrences2: Vec<_> = file2
36 .iter()
37 .map(|token| {
38 let bucket = token.0 as usize;
39 if bucket >= occurrences2.len() {
40 occurrences2.resize(bucket + 1, 0);
41 }
42 occurrences2[bucket] += 1;
43 let occurrences1 = *occurrences1.get(bucket).unwrap_or(&0);
44 Occurrences::from_occurrences(occurrences1, eqlimit2)
45 })
46 .collect();
47
48 let token_occurrences1: Vec<_> = file1
49 .iter()
50 .map(|token| {
51 let bucket = token.0 as usize;
52 let occurrences2 = *occurrences2.get(bucket).unwrap_or(&0);
53 Occurrences::from_occurrences(occurrences2, eqlimit1)
54 })
55 .collect();
56
57 (token_occurrences1, token_occurrences2)
58}
59
60#[derive(Clone, Copy, Debug)]
61enum Occurrences {
62 None,
64 Some,
66 Common,
69}
70
71impl Occurrences {
72 pub fn from_occurrences(occurrences: u32, eqlimit: u32) -> Occurrences {
73 if occurrences == 0 {
74 Occurrences::None
75 } else if occurrences >= eqlimit {
76 Occurrences::Common
77 } else {
78 Occurrences::Some
79 }
80 }
81}
82
83#[derive(Debug)]
84pub struct PreprocessedFile {
85 pub indices: Vec<u32>,
86 pub tokens: Vec<Token>,
87}
88
89impl PreprocessedFile {
90 fn new(
91 token_occurrences: &[Occurrences],
92 tokens: &[Token],
93 changed: &mut [bool],
94 ) -> PreprocessedFile {
95 let (tokens, indices) = prune_unmatched_tokens(tokens, token_occurrences, changed);
96 PreprocessedFile { indices, tokens }
97 }
98}
99
100fn prune_unmatched_tokens(
101 file: &[Token],
102 token_status: &[Occurrences],
103 changed: &mut [bool],
104) -> (Vec<Token>, Vec<u32>) {
105 assert_eq!(token_status.len(), file.len());
106 file.iter()
107 .zip(token_status)
108 .enumerate()
109 .filter_map(|(i, (&token, &status))| {
110 let prune = match status {
111 Occurrences::None => true,
112 Occurrences::Some => false,
113 Occurrences::Common => should_prune_common_line(token_status, i),
114 };
115 if prune {
116 changed[i] = true;
117 None
118 } else {
119 Some((token, i as u32))
120 }
121 })
122 .unzip()
123}
124
125fn should_prune_common_line(token_status: &[Occurrences], pos: usize) -> bool {
127 const WINDOW_SIZE: usize = 100;
128
129 let mut unmatched_before = 0;
130 let mut common_before = 0;
131
132 let start = if pos > WINDOW_SIZE { WINDOW_SIZE } else { 0 };
133 for status in token_status[start..pos].iter().rev() {
134 match status {
135 Occurrences::None => {
136 unmatched_before += 1;
137 }
138 Occurrences::Common => {
139 common_before += 1;
140 }
141 Occurrences::Some => break,
142 }
143 }
144
145 if unmatched_before == 0 {
146 return false;
147 }
148
149 let end = token_status.len().min(pos + WINDOW_SIZE);
150 let mut unmatched_after = 0;
151 let mut common_after = 0;
152 for status in token_status[pos..end].iter() {
153 match status {
154 Occurrences::None => {
155 unmatched_after += 1;
156 }
157 Occurrences::Common => {
158 common_after += 1;
159 }
160 Occurrences::Some => break,
161 }
162 }
163
164 if unmatched_after == 0 {
165 return false;
166 }
167
168 let common = common_before + common_after;
169 let unmatched = unmatched_before + unmatched_after;
170
171 unmatched > 3 * common
172}