imara_diff/myers/
middle_snake.rs
1use std::ptr::NonNull;
2
3use crate::myers::slice::FileSlice;
4use crate::util::{common_postfix, common_prefix};
5
6const SNAKE_CNT: u32 = 20;
7const K_HEUR: u32 = 4;
8
9pub struct MiddleSnakeSearch<const BACK: bool> {
10 kvec: NonNull<i32>,
11 kmin: i32,
12 kmax: i32,
13 dmin: i32,
14 dmax: i32,
15}
16
17impl<const BACK: bool> MiddleSnakeSearch<BACK> {
18 pub unsafe fn new(data: NonNull<i32>, file1: &FileSlice, file2: &FileSlice) -> Self {
21 let dmin = -(file2.len() as i32);
22 let dmax = file1.len() as i32;
23 let kmid = if BACK { dmin + dmax } else { 0 };
24 let mut res = Self {
25 kvec: data,
26 kmin: kmid,
27 kmax: kmid,
28 dmin,
29 dmax,
30 };
31 let init = if BACK { file1.len() as i32 } else { 0 };
32 res.write_xpos_at_diagonal(kmid, init);
33 res
34 }
35
36 pub fn contains(&self, k: i32) -> bool {
37 (self.kmin..=self.kmax).contains(&k)
38 }
39
40 pub fn bounds_check(&self, k: i32) {
41 debug_assert!((self.dmin - 1..=self.dmax + 1).contains(&k));
42 }
43
44 fn write_xpos_at_diagonal(&mut self, k: i32, token_idx1: i32) {
45 self.bounds_check(k);
46 unsafe { self.kvec.as_ptr().offset(k as isize).write(token_idx1) }
47 }
48
49 pub fn x_pos_at_diagonal(&self, diagonal: i32) -> i32 {
50 self.bounds_check(diagonal);
51 unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() }
52 }
53
54 pub fn pos_at_diagonal(&self, diagonal: i32) -> (i32, i32) {
55 self.bounds_check(diagonal);
56 let token_idx1 = unsafe { self.kvec.as_ptr().offset(diagonal as isize).read() };
57 let token_idx2 = token_idx1 - diagonal;
58 (token_idx1, token_idx2)
59 }
60
61 pub fn next_d(&mut self) {
69 let init_val = if BACK {
70 i32::MAX
72 } else {
73 i32::MIN
75 };
76
77 if self.kmin > self.dmin {
78 self.kmin -= 1;
79 self.write_xpos_at_diagonal(self.kmin - 1, init_val);
80 } else {
81 self.kmin += 1;
82 }
83
84 if self.kmax < self.dmax {
85 self.kmax += 1;
86 self.write_xpos_at_diagonal(self.kmax + 1, init_val);
87 } else {
88 self.kmax -= 1;
89 }
90 }
91
92 pub fn run(
93 &mut self,
94 file1: &FileSlice,
95 file2: &FileSlice,
96 mut f: impl FnMut(i32, i32) -> bool,
97 ) -> Option<SearchResult> {
98 let mut res = None;
99 let mut k = self.kmax;
100 while k >= self.kmin {
101 let mut token_idx1 = if BACK {
102 if self.x_pos_at_diagonal(k - 1) < self.x_pos_at_diagonal(k + 1) {
103 self.x_pos_at_diagonal(k - 1)
104 } else {
105 self.x_pos_at_diagonal(k + 1) - 1
106 }
107 } else if self.x_pos_at_diagonal(k - 1) >= self.x_pos_at_diagonal(k + 1) {
108 self.x_pos_at_diagonal(k - 1) + 1
109 } else {
110 self.x_pos_at_diagonal(k + 1)
111 };
112
113 let mut token_idx2 = token_idx1 - k;
114 let off = if BACK {
115 if token_idx1 > 0 && token_idx2 > 0 {
116 let tokens1 = &file1.tokens[..token_idx1 as usize];
117 let tokens2 = &file2.tokens[..token_idx2 as usize];
118 common_postfix(tokens1, tokens2)
119 } else {
120 0
121 }
122 } else if token_idx1 < file1.len() as i32 && token_idx2 < file2.len() as i32 {
123 let tokens1 = &file1.tokens[token_idx1 as usize..];
124 let tokens2 = &file2.tokens[token_idx2 as usize..];
125 common_prefix(tokens1, tokens2)
126 } else {
127 0
128 };
129
130 if off > SNAKE_CNT {
131 res = Some(SearchResult::Snake)
132 }
133
134 if BACK {
135 token_idx1 -= off as i32;
136 token_idx2 -= off as i32;
137 } else {
138 token_idx1 += off as i32;
139 token_idx2 += off as i32;
140 }
141 self.write_xpos_at_diagonal(k, token_idx1);
142
143 if f(k, token_idx1) {
144 return Some(SearchResult::Found {
145 token_idx1,
146 token_idx2,
147 });
148 }
149
150 k -= 2;
151 }
152
153 res
154 }
155
156 pub fn best_position(&self, file1: &FileSlice, file2: &FileSlice) -> (isize, i32) {
157 let mut best_distance: isize = if BACK { isize::MAX } else { -1 };
158 let mut best_token_idx1 = if BACK { i32::MAX } else { -1 };
159 let mut k = self.kmax;
160 while k >= self.kmin {
161 let mut token_idx1 = self.x_pos_at_diagonal(k);
162 if BACK {
163 token_idx1 = token_idx1.max(0);
164 } else {
165 token_idx1 = token_idx1.min(file1.len() as i32);
166 }
167 let mut token_idx2 = token_idx1 - k;
168 if BACK {
169 if token_idx2 < 0 {
170 token_idx1 = k;
171 token_idx2 = 0;
172 }
173 } else if token_idx2 > file2.len() as i32 {
174 token_idx1 = file2.len() as i32 + k;
175 token_idx2 = file2.len() as i32;
176 }
177
178 let distance = token_idx1 as isize + token_idx2 as isize;
179 if BACK && distance < best_distance || !BACK && distance > best_distance {
180 best_distance = distance;
181 best_token_idx1 = token_idx1;
182 }
183
184 k -= 2;
185 }
186 (best_distance, best_token_idx1)
187 }
188
189 pub fn found_snake(&self, ec: u32, file1: &FileSlice, file2: &FileSlice) -> Option<(i32, i32)> {
190 let mut best_score = 0;
191 let mut best_token_idx1 = 0;
192 let mut best_token_idx2 = 0;
193 let mut k = self.kmax;
194 while k >= self.kmin {
195 let (token_idx1, token_idx2) = self.pos_at_diagonal(k);
196 if BACK {
197 if !(0..file1.len() as i32 - SNAKE_CNT as i32).contains(&token_idx1) {
198 k -= 2;
199 continue;
200 }
201 if !(0..file2.len() as i32 - SNAKE_CNT as i32).contains(&token_idx2) {
202 k -= 2;
203 continue;
204 }
205 } else {
206 if !(SNAKE_CNT as i32..file1.len() as i32).contains(&token_idx1) {
207 k -= 2;
208 continue;
209 }
210 if !(SNAKE_CNT as i32..file2.len() as i32).contains(&token_idx2) {
211 k -= 2;
212 continue;
213 }
214 }
215
216 let main_diagonal_distance = k.unsigned_abs() as usize;
217 let distance = if BACK {
218 (file1.len() - token_idx1 as u32) + (file2.len() - token_idx2 as u32)
219 } else {
220 token_idx1 as u32 + token_idx2 as u32
221 };
222 let score = distance as usize + main_diagonal_distance;
223 if score > (K_HEUR * ec) as usize && score > best_score {
224 let is_snake = if BACK {
225 file1.tokens[token_idx1 as usize..]
226 .iter()
227 .zip(&file2.tokens[token_idx2 as usize..])
228 .take(SNAKE_CNT as usize)
229 .all(|(token1, token2)| token1 == token2)
230 } else {
231 file1.tokens[..token_idx1 as usize]
232 .iter()
233 .zip(&file2.tokens[..token_idx2 as usize])
234 .rev()
235 .take(SNAKE_CNT as usize)
236 .all(|(token1, token2)| token1 == token2)
237 };
238 if is_snake {
239 best_token_idx1 = token_idx1;
240 best_token_idx2 = token_idx2;
241 best_score = score
242 }
243 }
244
245 k -= 2;
246 }
247
248 (best_score > 0).then_some((best_token_idx1, best_token_idx2))
249 }
250}
251
252pub enum SearchResult {
253 Snake,
254 Found { token_idx1: i32, token_idx2: i32 },
255}