imara_diff/myers.rs
1use std::ptr::NonNull;
2
3use crate::intern::Token;
4use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult};
5use crate::myers::slice::FileSlice;
6use crate::util::sqrt;
7
8mod middle_snake;
9mod preprocess;
10mod slice;
11
12pub fn diff(
13 before: &[Token],
14 after: &[Token],
15 removed: &mut [bool],
16 added: &mut [bool],
17 minimal: bool,
18) {
19 // Preprocess the files by removing parts of the file that are not contained in the other file at all.
20 // This process remaps the token indices so we have to account for that during the rest of the diff
21 let (before, after) = preprocess::preprocess(before, after, removed, added);
22
23 // Perform the actual diff
24 Myers::new(before.tokens.len(), after.tokens.len()).run(
25 FileSlice::new(&before, removed),
26 FileSlice::new(&after, added),
27 minimal,
28 );
29}
30
31const HEUR_MIN_COST: u32 = 256;
32const MAX_COST_MIN: u32 = 256;
33
34pub struct Myers {
35 kvec: NonNull<[i32]>,
36 kforward: NonNull<i32>,
37 kbackward: NonNull<i32>,
38 max_cost: u32,
39}
40
41impl Drop for Myers {
42 fn drop(&mut self) {
43 unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
44 }
45}
46
47impl Myers {
48 fn new(len1: usize, len2: usize) -> Self {
49 let ndiags = len1 + len2 + 3;
50 let kvec: *mut [i32] = Box::into_raw(vec![0; 2 * ndiags + 2].into_boxed_slice());
51 let (kforward, kbackward) = unsafe {
52 (
53 NonNull::new_unchecked((kvec as *mut i32).add(len2 + 1)),
54 NonNull::new_unchecked((kvec as *mut i32).add(ndiags + len2 + 1)),
55 )
56 };
57 Self {
58 kvec: unsafe { NonNull::new_unchecked(kvec) },
59 kforward,
60 kbackward,
61 max_cost: sqrt(ndiags).max(MAX_COST_MIN),
62 }
63 }
64
65 fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) {
66 loop {
67 file1.strip_common(&mut file2);
68
69 if file1.is_empty() {
70 file2.mark_changed();
71 return;
72 } else if file2.is_empty() {
73 file1.mark_changed();
74 return;
75 }
76
77 let split = self.split(&file1, &file2, need_min);
78 self.run(
79 file1.borrow().slice(..split.token_idx1 as u32),
80 file2.borrow().slice(..split.token_idx2 as u32),
81 split.minimized_lo,
82 );
83
84 file1 = file1.slice(split.token_idx1 as u32..);
85 file2 = file2.slice(split.token_idx2 as u32..);
86 need_min = split.minimized_hi
87 }
88 }
89
90 /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
91 /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
92 /// the forward diagonal starting from (off1, off2) and the backward diagonal
93 /// starting from (lim1, lim2). If the K values on the same diagonal crosses
94 /// returns the furthest point of reach. We might encounter expensive edge cases
95 /// using this algorithm, so a little bit of heuristic is needed to cut the
96 /// search and to return a suboptimal point.
97 fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split {
98 let mut forward_search =
99 unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) };
100 let mut backwards_search =
101 unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) };
102 let is_odd = (file2.len() - file2.len()) & 1 != 0;
103
104 let mut ec = 0;
105
106 while ec <= self.max_cost {
107 let mut found_snake = false;
108 forward_search.next_d();
109 if is_odd {
110 if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| {
111 backwards_search.contains(k)
112 && backwards_search.x_pos_at_diagonal(k) <= token_idx1
113 }) {
114 match res {
115 SearchResult::Snake => found_snake = true,
116 SearchResult::Found {
117 token_idx1,
118 token_idx2,
119 } => {
120 return Split {
121 token_idx1,
122 token_idx2,
123 minimized_lo: true,
124 minimized_hi: true,
125 };
126 }
127 }
128 }
129 } else {
130 found_snake |= forward_search.run(file1, file2, |_, _| false).is_some()
131 };
132
133 backwards_search.next_d();
134 if !is_odd {
135 if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| {
136 forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k)
137 }) {
138 match res {
139 SearchResult::Snake => found_snake = true,
140 SearchResult::Found {
141 token_idx1,
142 token_idx2,
143 } => {
144 return Split {
145 token_idx1,
146 token_idx2,
147 minimized_lo: true,
148 minimized_hi: true,
149 };
150 }
151 }
152 }
153 } else {
154 found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some()
155 };
156
157 if need_min {
158 continue;
159 }
160
161 // If the edit cost is above the heuristic trigger and if
162 // we got a good snake, we sample current diagonals to see
163 // if some of them have reached an "interesting" path. Our
164 // measure is a function of the distance from the diagonal
165 // corner (i1 + i2) penalized with the distance from the
166 // mid diagonal itself. If this value is above the current
167 // edit cost times a magic factor (XDL_K_HEUR) we consider
168 // it interesting.
169 if found_snake && ec > HEUR_MIN_COST {
170 if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2)
171 {
172 return Split {
173 token_idx1,
174 token_idx2,
175 minimized_lo: true,
176 minimized_hi: false,
177 };
178 }
179
180 if let Some((token_idx1, token_idx2)) =
181 backwards_search.found_snake(ec, file1, file2)
182 {
183 return Split {
184 token_idx1,
185 token_idx2,
186 minimized_lo: false,
187 minimized_hi: true,
188 };
189 }
190 }
191
192 ec += 1;
193 }
194
195 let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2);
196 let (distance_backwards, token_idx1_backwards) =
197 backwards_search.best_position(file1, file2);
198 if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards {
199 Split {
200 token_idx1: token_idx1_forward,
201 token_idx2: (distance_forward - token_idx1_forward as isize) as i32,
202 minimized_lo: true,
203 minimized_hi: false,
204 }
205 } else {
206 Split {
207 token_idx1: token_idx1_backwards,
208 token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32,
209 minimized_lo: false,
210 minimized_hi: true,
211 }
212 }
213 }
214}
215
216#[derive(Debug)]
217struct Split {
218 token_idx1: i32,
219 token_idx2: i32,
220 minimized_lo: bool,
221 minimized_hi: bool,
222}
223
224// /// the mapping performed during preprocessing makes it impossible to directly call
225// /// the `sink` during the diff itself. Instead `file.changed` is set to true for all
226// /// tokens that are changed
227// /// below these arrays are used to call the sink function
228// fn process_changes_with_sink(
229// before: &PreprocessedFile,
230// after: &PreprocessedFile,
231// sink: &mut impl Sink,
232// ) {
233// let before_end = before.changed.len() as u32 + before.offset;
234// let after_end = after.changed.len() as u32 + after.offset;
235
236// let mut before = before
237// .changed
238// .iter()
239// .enumerate()
240// .map(|(i, removed)| (i as u32 + before.offset, *removed));
241
242// let mut after = after
243// .changed
244// .iter()
245// .enumerate()
246// .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
247
248// let mut next1 = before.next();
249// let mut next2 = after.next();
250
251// while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
252// if !(removed | inserted) {
253// next1 = before.next();
254// next2 = after.next();
255// continue;
256// }
257
258// let mut hunk_before = before_pos..before_pos;
259// let mut hunk_after = after_pos..after_pos;
260// if removed {
261// let end = before.find(|(_, changed)| !changed);
262// next1 = end.map(|(end, _)| (end, false));
263// hunk_before.end = end.map_or(before_end, |(end, _)| end);
264// };
265
266// if inserted {
267// let end = after.find(|(_, changed)| !changed);
268// next2 = end.map(|(end, _)| (end, false));
269// hunk_after.end = end.map_or(after_end, |(end, _)| end);
270// }
271
272// sink.process_change(hunk_before, hunk_after);
273// }
274
275// if let Some((before_pos, _)) = next1 {
276// sink.process_change(before_pos..before_end, after_end..after_end);
277// } else if let Some((after_pos, _)) = next2 {
278// sink.process_change(before_end..before_end, after_pos..after_end);
279// }
280// }