mago_codex/differ.rs
1use ahash::HashMap;
2use ahash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for diff_map)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32pub fn compute_file_diff(
33 file_id: FileId,
34 old_signature: Option<&FileSignature>,
35 new_signature: Option<&FileSignature>,
36) -> CodebaseDiff {
37 let Some(new_signature) = new_signature else {
38 return CodebaseDiff::new();
39 };
40
41 match old_signature {
42 None => {
43 // New file - all symbols are changed
44 mark_all_as_changed(new_signature)
45 }
46 Some(old_sig) => {
47 // Existing file - use Myers diff
48 myers_diff(file_id, old_sig, new_signature)
49 }
50 }
51}
52
53/// Marks all symbols in a file signature as changed (used for new files).
54fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
55 let mut changed = HashSet::default();
56
57 for node in &signature.ast_nodes {
58 // Add top-level symbol
59 changed.insert((node.name, empty_atom()));
60
61 // Add all children (methods, properties, etc.)
62 for child in &node.children {
63 changed.insert((node.name, child.name));
64 }
65 }
66
67 CodebaseDiff::new().with_changed(changed)
68}
69
70/// Computes a detailed diff between two file signatures using Myers algorithm.
71///
72/// This function implements a two-level Myers diff:
73/// 1. Top-level diff: compares classes, functions, and constants
74/// 2. Member-level diff: compares methods, properties, and class constants within each class
75///
76/// For each symbol, it checks both structural changes (add/remove/keep) and content changes
77/// (hash comparison) to determine if re-analysis is needed.
78///
79/// # Implementation
80///
81/// Adapted from Hakana's implementation, but uses a single fingerprint hash per symbol
82/// instead of separate signature/body hashes. Any hash change triggers re-analysis.
83///
84/// See: https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L10-L13
85///
86/// # Arguments
87///
88/// * `file_id` - File being compared (for tracking position changes)
89/// * `old_signature` - Previous file signature
90/// * `new_signature` - Current file signature
91///
92/// # Returns
93///
94/// A `CodebaseDiff` with:
95/// - `keep`: Unchanged symbols
96/// - `changed`: Added, removed, or modified symbols
97/// - `diff_map`: Position changes for issue mapping
98/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
99fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
100 let mut keep = HashSet::default();
101 let mut changed = HashSet::default();
102 let mut file_diffs: Vec<DiffHunk> = vec![];
103 let mut deletion_ranges: Vec<DeletionRange> = vec![];
104
105 let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
106 tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
107
108 return mark_all_as_changed(new_signature);
109 };
110
111 let diff = extract_diff(trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
112
113 for diff_elem in diff {
114 match diff_elem {
115 AstDiffElem::Keep(a, b) => {
116 let mut has_child_change = false;
117
118 let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
119 changed.insert((a.name, empty_atom()));
120 for child in &a.children {
121 changed.insert((a.name, child.name));
122 }
123
124 for child in &b.children {
125 changed.insert((b.name, child.name));
126 }
127
128 continue;
129 };
130
131 let class_diff = extract_diff(class_trace, class_x, class_y, &a.children, &b.children);
132
133 for class_diff_elem in class_diff {
134 match class_diff_elem {
135 AstDiffElem::Keep(a_child, b_child) => {
136 // Check if the child's hash changed
137 if a_child.hash != b_child.hash {
138 has_child_change = true;
139 changed.insert((a.name, a_child.name));
140 } else {
141 keep.insert((a.name, a_child.name));
142 }
143
144 // Track position changes for issue mapping
145 if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
146 {
147 file_diffs.push((
148 a_child.start_offset as usize,
149 a_child.end_offset as usize,
150 b_child.start_offset as isize - a_child.start_offset as isize,
151 b_child.start_line as isize - a_child.start_line as isize,
152 ));
153 }
154 }
155 AstDiffElem::Remove(child_node) => {
156 has_child_change = true;
157 changed.insert((a.name, child_node.name));
158 deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
159 }
160 AstDiffElem::Add(child_node) => {
161 has_child_change = true;
162 changed.insert((a.name, child_node.name));
163 }
164 }
165 }
166
167 // Check if parent's hash changed or if any child changed
168 if has_child_change || a.hash != b.hash {
169 changed.insert((a.name, empty_atom()));
170 } else {
171 keep.insert((a.name, empty_atom()));
172
173 // Track position changes for issue mapping
174 if b.start_offset != a.start_offset || b.start_line != a.start_line {
175 file_diffs.push((
176 a.start_offset as usize,
177 a.end_offset as usize,
178 b.start_offset as isize - a.start_offset as isize,
179 b.start_line as isize - a.start_line as isize,
180 ));
181 }
182 }
183 }
184 AstDiffElem::Remove(node) => {
185 changed.insert((node.name, empty_atom()));
186 deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
187
188 // Also mark all children as removed
189 for child in &node.children {
190 changed.insert((node.name, child.name));
191 }
192 }
193 AstDiffElem::Add(node) => {
194 changed.insert((node.name, empty_atom()));
195
196 // Also mark all children as added
197 for child in &node.children {
198 changed.insert((node.name, child.name));
199 }
200 }
201 }
202 }
203
204 let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
205
206 if !file_diffs.is_empty() {
207 diff.add_diff_map_entry(file_id, file_diffs);
208 }
209
210 if !deletion_ranges.is_empty() {
211 diff.add_deletion_ranges_entry(file_id, deletion_ranges);
212 }
213
214 diff
215}
216
217/// Type alias for the Myers diff trace structure.
218///
219/// - Vec<HashMap<isize, usize>>: The trace of the search path
220/// - usize: Final position in the old sequence
221/// - usize: Final position in the new sequence
222type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
223
224/// Implements the Myers diff algorithm.
225///
226/// Borrows from:
227/// - https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php
228/// - https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159
229///
230/// Myers, Eugene W. "An O(ND) difference algorithm and its variations."
231/// Algorithmica 1.1 (1986): 251-266.
232///
233/// Returns a Result containing a tuple of (trace, x, y) where:
234/// - trace: A vector of hash maps representing the search path
235/// - x: Final position in the old sequence
236/// - y: Final position in the new sequence
237///
238/// Returns Err if the algorithm fails to converge (theoretically impossible but handled gracefully).
239fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
240 let n = a_nodes.len();
241 let m = b_nodes.len();
242 let max = n + m;
243 let mut v: HashMap<isize, usize> = HashMap::default();
244 v.insert(1, 0);
245 let mut trace = vec![];
246
247 for d in 0..=(max as isize) {
248 trace.push(v.clone());
249 let mut k = -d;
250 while k <= d {
251 let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
252
253 let mut y = (x as isize - k) as usize;
254
255 // Advance along the diagonal while nodes are equal
256 while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
257 x += 1;
258 y += 1;
259 }
260
261 v.insert(k, x);
262
263 // Found the end
264 if x >= n && y >= m {
265 return Ok((trace, x, y));
266 }
267
268 k += 2;
269 }
270 }
271
272 Err("Myers diff algorithm failed to converge")
273}
274
275/// Checks if two DefSignatureNode instances can be matched for diffing.
276///
277/// Two nodes are considered matchable if they have the same:
278/// - name
279/// - is_function flag
280///
281/// We don't check hash here because we want to match nodes even if their
282/// content changed. The hash difference will be detected later to determine
283/// if they belong in the "keep" or "changed" set.
284fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
285 a_node.name == b_node.name && a_node.is_function == b_node.is_function
286}
287
288/// Extracts the diff elements from the Myers trace.
289///
290/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
291fn extract_diff<'a>(
292 trace: Vec<HashMap<isize, usize>>,
293 mut x: usize,
294 mut y: usize,
295 a_nodes: &'a [DefSignatureNode],
296 b_nodes: &'a [DefSignatureNode],
297) -> Vec<AstDiffElem<'a>> {
298 let mut result = vec![];
299 let mut d = trace.len() as isize - 1;
300
301 while d >= 0 {
302 let v = &trace[d as usize];
303 let k = (x as isize) - (y as isize);
304
305 let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
306
307 let prev_x = v[&prev_k];
308 let prev_y = prev_x as isize - prev_k;
309
310 // Walk diagonals (unchanged elements)
311 while x > prev_x && y as isize > prev_y {
312 result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
313 x -= 1;
314 y -= 1;
315 }
316
317 if d == 0 {
318 break;
319 }
320
321 // Deletions
322 while x > prev_x {
323 result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
324 x -= 1;
325 }
326
327 // Additions
328 while y as isize > prev_y {
329 result.push(AstDiffElem::Add(&b_nodes[y - 1]));
330 y -= 1;
331 }
332
333 d -= 1;
334 }
335
336 result.reverse();
337 result
338}
339
340/// Represents a single element in the AST diff.
341#[derive(Debug)]
342enum AstDiffElem<'a> {
343 /// Node unchanged in both old and new versions
344 Keep(&'a DefSignatureNode, &'a DefSignatureNode),
345 /// Node was removed in the new version
346 Remove(&'a DefSignatureNode),
347 /// Node was added in the new version
348 Add(&'a DefSignatureNode),
349}