1use foldhash::HashMap;
2use foldhash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13#[must_use]
33pub fn compute_file_diff(
34 file_id: FileId,
35 old_signature: Option<&FileSignature>,
36 new_signature: Option<&FileSignature>,
37) -> CodebaseDiff {
38 match (old_signature, new_signature) {
39 (None, None) => CodebaseDiff::new(),
40 (Some(old_sig), None) => mark_all_as_changed(old_sig),
41 (None, Some(new_sig)) => mark_all_as_changed(new_sig),
42 (Some(old_sig), Some(new_sig)) => myers_diff(file_id, old_sig, new_sig),
43 }
44}
45
46fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
48 let mut changed = HashSet::default();
49
50 for node in &signature.ast_nodes {
51 changed.insert((node.name, empty_atom()));
53
54 for child in &node.children {
56 changed.insert((node.name, child.name));
57 }
58 }
59
60 CodebaseDiff::new().with_changed(changed)
61}
62
63fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
88 let mut keep = HashSet::default();
89 let mut changed = HashSet::default();
90 let mut file_diffs: Vec<DiffHunk> = vec![];
91 let mut deletion_ranges: Vec<DeletionRange> = vec![];
92
93 let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
94 tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
95
96 return mark_all_as_changed(new_signature);
97 };
98
99 let diff = extract_diff(&trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
100
101 for diff_elem in diff {
102 match diff_elem {
103 AstDiffElem::Keep(a, b) => {
104 let mut has_child_sig_change = false;
105
106 let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
107 changed.insert((a.name, empty_atom()));
108 for child in &a.children {
109 changed.insert((a.name, child.name));
110 }
111
112 for child in &b.children {
113 changed.insert((b.name, child.name));
114 }
115
116 continue;
117 };
118
119 let class_diff = extract_diff(&class_trace, class_x, class_y, &a.children, &b.children);
120
121 for class_diff_elem in class_diff {
122 match class_diff_elem {
123 AstDiffElem::Keep(a_child, b_child) => {
124 if a_child.signature_hash == b_child.signature_hash {
127 keep.insert((a.name, a_child.name));
128 } else {
129 has_child_sig_change = true;
130 changed.insert((a.name, a_child.name));
131 }
132
133 if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
135 {
136 file_diffs.push((
137 a_child.start_offset as usize,
138 a_child.end_offset as usize,
139 b_child.start_offset as isize - a_child.start_offset as isize,
140 b_child.start_line as isize - a_child.start_line as isize,
141 ));
142 }
143 }
144 AstDiffElem::Remove(child_node) => {
145 has_child_sig_change = true;
146 changed.insert((a.name, child_node.name));
147 deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
148 }
149 AstDiffElem::Add(child_node) => {
150 has_child_sig_change = true;
151 changed.insert((a.name, child_node.name));
152 }
153 }
154 }
155
156 if has_child_sig_change || a.signature_hash != b.signature_hash {
157 changed.insert((a.name, empty_atom()));
158 } else {
159 keep.insert((a.name, empty_atom()));
160
161 if b.start_offset != a.start_offset || b.start_line != a.start_line {
163 file_diffs.push((
164 a.start_offset as usize,
165 a.end_offset as usize,
166 b.start_offset as isize - a.start_offset as isize,
167 b.start_line as isize - a.start_line as isize,
168 ));
169 }
170 }
171 }
172 AstDiffElem::Remove(node) => {
173 changed.insert((node.name, empty_atom()));
174 deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
175
176 for child in &node.children {
178 changed.insert((node.name, child.name));
179 }
180 }
181 AstDiffElem::Add(node) => {
182 changed.insert((node.name, empty_atom()));
183
184 for child in &node.children {
186 changed.insert((node.name, child.name));
187 }
188 }
189 }
190 }
191
192 let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
193
194 if !file_diffs.is_empty() {
195 diff.add_diff_map_entry(file_id, file_diffs);
196 }
197
198 if !deletion_ranges.is_empty() {
199 diff.add_deletion_ranges_entry(file_id, deletion_ranges);
200 }
201
202 diff
203}
204
205type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
211
212fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
228 let n = a_nodes.len();
229 let m = b_nodes.len();
230 let max = n + m;
231 let mut v: HashMap<isize, usize> = HashMap::default();
232 v.insert(1, 0);
233 let mut trace = vec![];
234
235 for d in 0..=(max as isize) {
236 trace.push(v.clone());
237 let mut k = -d;
238 while k <= d {
239 let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
240
241 let mut y = (x as isize - k) as usize;
242
243 while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
245 x += 1;
246 y += 1;
247 }
248
249 v.insert(k, x);
250
251 if x >= n && y >= m {
253 return Ok((trace, x, y));
254 }
255
256 k += 2;
257 }
258 }
259
260 Err("Myers diff algorithm failed to converge")
261}
262
263fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
273 a_node.name == b_node.name && a_node.is_function == b_node.is_function
274}
275
276fn extract_diff<'a>(
280 trace: &[HashMap<isize, usize>],
281 mut x: usize,
282 mut y: usize,
283 a_nodes: &'a [DefSignatureNode],
284 b_nodes: &'a [DefSignatureNode],
285) -> Vec<AstDiffElem<'a>> {
286 let mut result = vec![];
287 let mut d = trace.len() as isize - 1;
288
289 while d >= 0 {
290 let v = &trace[d as usize];
291 let k = (x as isize) - (y as isize);
292
293 let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
294
295 let prev_x = v[&prev_k];
296 let prev_y = prev_x as isize - prev_k;
297
298 while x > prev_x && y as isize > prev_y {
300 result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
301 x -= 1;
302 y -= 1;
303 }
304
305 if d == 0 {
306 break;
307 }
308
309 while x > prev_x {
311 result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
312 x -= 1;
313 }
314
315 while y as isize > prev_y {
317 result.push(AstDiffElem::Add(&b_nodes[y - 1]));
318 y -= 1;
319 }
320
321 d -= 1;
322 }
323
324 result.reverse();
325 result
326}
327
328#[derive(Debug)]
330enum AstDiffElem<'a> {
331 Keep(&'a DefSignatureNode, &'a DefSignatureNode),
333 Remove(&'a DefSignatureNode),
335 Add(&'a DefSignatureNode),
337}