imara_diff/
myers.rs

1use std::ptr::NonNull;
2
3use crate::intern::Token;
4use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult};
5use crate::myers::slice::FileSlice;
6use crate::util::sqrt;
7
8mod middle_snake;
9mod preprocess;
10mod slice;
11
12pub fn diff(
13    before: &[Token],
14    after: &[Token],
15    removed: &mut [bool],
16    added: &mut [bool],
17    minimal: bool,
18) {
19    // Preprocess the files by removing parts of the file that are not contained in the other file at all.
20    // This process remaps the token indices so we have to account for that during the rest of the diff
21    let (before, after) = preprocess::preprocess(before, after, removed, added);
22
23    // Perform the actual diff
24    Myers::new(before.tokens.len(), after.tokens.len()).run(
25        FileSlice::new(&before, removed),
26        FileSlice::new(&after, added),
27        minimal,
28    );
29}
30
31const HEUR_MIN_COST: u32 = 256;
32const MAX_COST_MIN: u32 = 256;
33
34pub struct Myers {
35    kvec: NonNull<[i32]>,
36    kforward: NonNull<i32>,
37    kbackward: NonNull<i32>,
38    max_cost: u32,
39}
40
41impl Drop for Myers {
42    fn drop(&mut self) {
43        unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
44    }
45}
46
47impl Myers {
48    fn new(len1: usize, len2: usize) -> Self {
49        let ndiags = len1 + len2 + 3;
50        let kvec: *mut [i32] = Box::into_raw(vec![0; 2 * ndiags + 2].into_boxed_slice());
51        let (kforward, kbackward) = unsafe {
52            (
53                NonNull::new_unchecked((kvec as *mut i32).add(len2 + 1)),
54                NonNull::new_unchecked((kvec as *mut i32).add(ndiags + len2 + 1)),
55            )
56        };
57        Self {
58            kvec: unsafe { NonNull::new_unchecked(kvec) },
59            kforward,
60            kbackward,
61            max_cost: sqrt(ndiags).max(MAX_COST_MIN),
62        }
63    }
64
65    fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) {
66        loop {
67            file1.strip_common(&mut file2);
68
69            if file1.is_empty() {
70                file2.mark_changed();
71                return;
72            } else if file2.is_empty() {
73                file1.mark_changed();
74                return;
75            }
76
77            let split = self.split(&file1, &file2, need_min);
78            self.run(
79                file1.borrow().slice(..split.token_idx1 as u32),
80                file2.borrow().slice(..split.token_idx2 as u32),
81                split.minimized_lo,
82            );
83
84            file1 = file1.slice(split.token_idx1 as u32..);
85            file2 = file2.slice(split.token_idx2 as u32..);
86            need_min = split.minimized_hi
87        }
88    }
89
90    /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
91    /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
92    /// the forward diagonal starting from (off1, off2) and the backward diagonal
93    /// starting from (lim1, lim2). If the K values on the same diagonal crosses
94    /// returns the furthest point of reach. We might encounter expensive edge cases
95    /// using this algorithm, so a little bit of heuristic is needed to cut the
96    /// search and to return a suboptimal point.
97    fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split {
98        let mut forward_search =
99            unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) };
100        let mut backwards_search =
101            unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) };
102        let is_odd = (file2.len() - file2.len()) & 1 != 0;
103
104        let mut ec = 0;
105
106        while ec <= self.max_cost {
107            let mut found_snake = false;
108            forward_search.next_d();
109            if is_odd {
110                if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| {
111                    backwards_search.contains(k)
112                        && backwards_search.x_pos_at_diagonal(k) <= token_idx1
113                }) {
114                    match res {
115                        SearchResult::Snake => found_snake = true,
116                        SearchResult::Found {
117                            token_idx1,
118                            token_idx2,
119                        } => {
120                            return Split {
121                                token_idx1,
122                                token_idx2,
123                                minimized_lo: true,
124                                minimized_hi: true,
125                            };
126                        }
127                    }
128                }
129            } else {
130                found_snake |= forward_search.run(file1, file2, |_, _| false).is_some()
131            };
132
133            backwards_search.next_d();
134            if !is_odd {
135                if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| {
136                    forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k)
137                }) {
138                    match res {
139                        SearchResult::Snake => found_snake = true,
140                        SearchResult::Found {
141                            token_idx1,
142                            token_idx2,
143                        } => {
144                            return Split {
145                                token_idx1,
146                                token_idx2,
147                                minimized_lo: true,
148                                minimized_hi: true,
149                            };
150                        }
151                    }
152                }
153            } else {
154                found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some()
155            };
156
157            if need_min {
158                continue;
159            }
160
161            // If the edit cost is above the heuristic trigger and if
162            // we got a good snake, we sample current diagonals to see
163            // if some of them have reached an "interesting" path. Our
164            // measure is a function of the distance from the diagonal
165            // corner (i1 + i2) penalized with the distance from the
166            // mid diagonal itself. If this value is above the current
167            // edit cost times a magic factor (XDL_K_HEUR) we consider
168            // it interesting.
169            if found_snake && ec > HEUR_MIN_COST {
170                if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2)
171                {
172                    return Split {
173                        token_idx1,
174                        token_idx2,
175                        minimized_lo: true,
176                        minimized_hi: false,
177                    };
178                }
179
180                if let Some((token_idx1, token_idx2)) =
181                    backwards_search.found_snake(ec, file1, file2)
182                {
183                    return Split {
184                        token_idx1,
185                        token_idx2,
186                        minimized_lo: false,
187                        minimized_hi: true,
188                    };
189                }
190            }
191
192            ec += 1;
193        }
194
195        let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2);
196        let (distance_backwards, token_idx1_backwards) =
197            backwards_search.best_position(file1, file2);
198        if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards {
199            Split {
200                token_idx1: token_idx1_forward,
201                token_idx2: (distance_forward - token_idx1_forward as isize) as i32,
202                minimized_lo: true,
203                minimized_hi: false,
204            }
205        } else {
206            Split {
207                token_idx1: token_idx1_backwards,
208                token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32,
209                minimized_lo: false,
210                minimized_hi: true,
211            }
212        }
213    }
214}
215
216#[derive(Debug)]
217struct Split {
218    token_idx1: i32,
219    token_idx2: i32,
220    minimized_lo: bool,
221    minimized_hi: bool,
222}
223
224// /// the mapping performed during preprocessing makes it impossible to directly call
225// /// the `sink` during the diff itself. Instead `file.changed` is set to true for all
226// /// tokens that are changed
227// /// below these arrays are used to call the sink function
228// fn process_changes_with_sink(
229//     before: &PreprocessedFile,
230//     after: &PreprocessedFile,
231//     sink: &mut impl Sink,
232// ) {
233//     let before_end = before.changed.len() as u32 + before.offset;
234//     let after_end = after.changed.len() as u32 + after.offset;
235
236//     let mut before = before
237//         .changed
238//         .iter()
239//         .enumerate()
240//         .map(|(i, removed)| (i as u32 + before.offset, *removed));
241
242//     let mut after = after
243//         .changed
244//         .iter()
245//         .enumerate()
246//         .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
247
248//     let mut next1 = before.next();
249//     let mut next2 = after.next();
250
251//     while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
252//         if !(removed | inserted) {
253//             next1 = before.next();
254//             next2 = after.next();
255//             continue;
256//         }
257
258//         let mut hunk_before = before_pos..before_pos;
259//         let mut hunk_after = after_pos..after_pos;
260//         if removed {
261//             let end = before.find(|(_, changed)| !changed);
262//             next1 = end.map(|(end, _)| (end, false));
263//             hunk_before.end = end.map_or(before_end, |(end, _)| end);
264//         };
265
266//         if inserted {
267//             let end = after.find(|(_, changed)| !changed);
268//             next2 = end.map(|(end, _)| (end, false));
269//             hunk_after.end = end.map_or(after_end, |(end, _)| end);
270//         }
271
272//         sink.process_change(hunk_before, hunk_after);
273//     }
274
275//     if let Some((before_pos, _)) = next1 {
276//         sink.process_change(before_pos..before_end, after_end..after_end);
277//     } else if let Some((after_pos, _)) = next2 {
278//         sink.process_change(before_end..before_end, after_pos..after_end);
279//     }
280// }