imara_diff/myers/
middle_snake.rs

1use std::ptr::NonNull;
2
3use crate::myers::slice::FileSlice;
4use crate::util::{common_postfix, common_prefix};
5
6const SNAKE_CNT: u32 = 20;
7const K_HEUR: u32 = 4;
8
9pub struct MiddleSnakeSearch<const BACK: bool> {
10    kvec: NonNull<i32>,
11    kmin: i32,
12    kmax: i32,
13    dmin: i32,
14    dmax: i32,
15}
16
17impl<const BACK: bool> MiddleSnakeSearch<BACK> {
18    /// # Safety
19    /// `data` must be valid for reads and writes between `-file2.len() - 1` and `file1.len() + 1`
20    pub unsafe fn new(data: NonNull<i32>, file1: &FileSlice, file2: &FileSlice) -> Self {
21        let dmin = -(file2.len() as i32);
22        let dmax = file1.len() as i32;
23        let kmid = if BACK { dmin + dmax } else { 0 };
24        let mut res = Self {
25            kvec: data,
26            kmin: kmid,
27            kmax: kmid,
28            dmin,
29            dmax,
30        };
31        let init = if BACK { file1.len() as i32 } else { 0 };
32        res.write_xpos_at_diagonal(kmid, init);
33        res
34    }
35
36    pub fn contains(&self, k: i32) -> bool {
37        (self.kmin..=self.kmax).contains(&k)
38    }
39
40    pub fn bounds_check(&self, k: i32) {
41        debug_assert!((self.dmin - 1..=self.dmax + 1).contains(&k));
42    }
43
44    fn write_xpos_at_diagonal(&mut self, k: i32, token_idx1: i32) {
45        self.bounds_check(k);
46        unsafe { self.kvec.as_ptr().offset(k as isize).write(token_idx1) }
47    }
48
49    pub fn x_pos_at_diagonal(&self, diagonal: i32) -> i32 {
50        self.bounds_check(diagonal);
51        unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() }
52    }
53
54    pub fn pos_at_diagonal(&self, diagonal: i32) -> (i32, i32) {
55        self.bounds_check(diagonal);
56        let token_idx1 = unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() };
57        let token_idx2 = token_idx1 - diagonal;
58        (token_idx1, token_idx2)
59    }
60
61    /// We need to extend the diagonal "domain" by one. If the next
62    /// values exits the box boundaries we need to change it in the
63    /// opposite direction because (max - min) must be a power of
64    /// two.
65    ///
66    /// Also we initialize the external K value to -1 so that we can
67    /// avoid extra conditions in the check inside the core loop.
68    pub fn next_d(&mut self) {
69        let init_val = if BACK {
70            // value should always be larger then bounds
71            i32::MAX
72        } else {
73            // value should always be smaller then bounds
74            i32::MIN
75        };
76
77        if self.kmin > self.dmin {
78            self.kmin -= 1;
79            self.write_xpos_at_diagonal(self.kmin - 1, init_val);
80        } else {
81            self.kmin += 1;
82        }
83
84        if self.kmax < self.dmax {
85            self.kmax += 1;
86            self.write_xpos_at_diagonal(self.kmax + 1, init_val);
87        } else {
88            self.kmax -= 1;
89        }
90    }
91
92    pub fn run(
93        &mut self,
94        file1: &FileSlice,
95        file2: &FileSlice,
96        mut f: impl FnMut(i32, i32) -> bool,
97    ) -> Option<SearchResult> {
98        let mut res = None;
99        let mut k = self.kmax;
100        while k >= self.kmin {
101            let mut token_idx1 = if BACK {
102                if self.x_pos_at_diagonal(k - 1) < self.x_pos_at_diagonal(k + 1) {
103                    self.x_pos_at_diagonal(k - 1)
104                } else {
105                    self.x_pos_at_diagonal(k + 1) - 1
106                }
107            } else if self.x_pos_at_diagonal(k - 1) >= self.x_pos_at_diagonal(k + 1) {
108                self.x_pos_at_diagonal(k - 1) + 1
109            } else {
110                self.x_pos_at_diagonal(k + 1)
111            };
112
113            let mut token_idx2 = token_idx1 - k;
114            let off = if BACK {
115                if token_idx1 > 0 && token_idx2 > 0 {
116                    let tokens1 = &file1.tokens[..token_idx1 as usize];
117                    let tokens2 = &file2.tokens[..token_idx2 as usize];
118                    common_postfix(tokens1, tokens2)
119                } else {
120                    0
121                }
122            } else if token_idx1 < file1.len() as i32 && token_idx2 < file2.len() as i32 {
123                let tokens1 = &file1.tokens[token_idx1 as usize..];
124                let tokens2 = &file2.tokens[token_idx2 as usize..];
125                common_prefix(tokens1, tokens2)
126            } else {
127                0
128            };
129
130            if off > SNAKE_CNT {
131                res = Some(SearchResult::Snake)
132            }
133
134            if BACK {
135                token_idx1 -= off as i32;
136                token_idx2 -= off as i32;
137            } else {
138                token_idx1 += off as i32;
139                token_idx2 += off as i32;
140            }
141            self.write_xpos_at_diagonal(k, token_idx1);
142
143            if f(k, token_idx1) {
144                return Some(SearchResult::Found {
145                    token_idx1,
146                    token_idx2,
147                });
148            }
149
150            k -= 2;
151        }
152
153        res
154    }
155
156    pub fn best_position(&self, file1: &FileSlice, file2: &FileSlice) -> (isize, i32) {
157        let mut best_distance: isize = if BACK { isize::MAX } else { -1 };
158        let mut best_token_idx1 = if BACK { i32::MAX } else { -1 };
159        let mut k = self.kmax;
160        while k >= self.kmin {
161            let mut token_idx1 = self.x_pos_at_diagonal(k);
162            if BACK {
163                token_idx1 = token_idx1.max(0);
164            } else {
165                token_idx1 = token_idx1.min(file1.len() as i32);
166            }
167            let mut token_idx2 = token_idx1 - k;
168            if BACK {
169                if token_idx2 < 0 {
170                    token_idx1 = k;
171                    token_idx2 = 0;
172                }
173            } else if token_idx2 > file2.len() as i32 {
174                token_idx1 = file2.len() as i32 + k;
175                token_idx2 = file2.len() as i32;
176            }
177
178            let distance = token_idx1 as isize + token_idx2 as isize;
179            if BACK && distance < best_distance || !BACK && distance > best_distance {
180                best_distance = distance;
181                best_token_idx1 = token_idx1;
182            }
183
184            k -= 2;
185        }
186        (best_distance, best_token_idx1)
187    }
188
189    pub fn found_snake(&self, ec: u32, file1: &FileSlice, file2: &FileSlice) -> Option<(i32, i32)> {
190        let mut best_score = 0;
191        let mut best_token_idx1 = 0;
192        let mut best_token_idx2 = 0;
193        let mut k = self.kmax;
194        while k >= self.kmin {
195            let (token_idx1, token_idx2) = self.pos_at_diagonal(k);
196            if BACK {
197                if !(0..file1.len() as i32 - SNAKE_CNT as i32).contains(&token_idx1) {
198                    k -= 2;
199                    continue;
200                }
201                if !(0..file2.len() as i32 - SNAKE_CNT as i32).contains(&token_idx2) {
202                    k -= 2;
203                    continue;
204                }
205            } else {
206                if !(SNAKE_CNT as i32..file1.len() as i32).contains(&token_idx1) {
207                    k -= 2;
208                    continue;
209                }
210                if !(SNAKE_CNT as i32..file2.len() as i32).contains(&token_idx2) {
211                    k -= 2;
212                    continue;
213                }
214            }
215
216            let main_diagonal_distance = k.unsigned_abs() as usize;
217            let distance = if BACK {
218                (file1.len() - token_idx1 as u32) + (file2.len() - token_idx2 as u32)
219            } else {
220                token_idx1 as u32 + token_idx2 as u32
221            };
222            let score = distance as usize + main_diagonal_distance;
223            if score > (K_HEUR * ec) as usize && score > best_score {
224                let is_snake = if BACK {
225                    file1.tokens[token_idx1 as usize..]
226                        .iter()
227                        .zip(&file2.tokens[token_idx2 as usize..])
228                        .take(SNAKE_CNT as usize)
229                        .all(|(token1, token2)| token1 == token2)
230                } else {
231                    file1.tokens[..token_idx1 as usize]
232                        .iter()
233                        .zip(&file2.tokens[..token_idx2 as usize])
234                        .rev()
235                        .take(SNAKE_CNT as usize)
236                        .all(|(token1, token2)| token1 == token2)
237                };
238                if is_snake {
239                    best_token_idx1 = token_idx1;
240                    best_token_idx2 = token_idx2;
241                    best_score = score
242                }
243            }
244
245            k -= 2;
246        }
247
248        (best_score > 0).then_some((best_token_idx1, best_token_idx2))
249    }
250}
251
252pub enum SearchResult {
253    Snake,
254    Found { token_idx1: i32, token_idx2: i32 },
255}