mago_codex/differ.rs
1use ahash::HashMap;
2use ahash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for diff_map)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32pub fn compute_file_diff(
33 file_id: FileId,
34 old_signature: Option<&FileSignature>,
35 new_signature: Option<&FileSignature>,
36) -> CodebaseDiff {
37 let Some(new_signature) = new_signature else {
38 return CodebaseDiff::new();
39 };
40
41 match old_signature {
42 None => {
43 // New file - all symbols are changed
44 mark_all_as_changed(new_signature)
45 }
46 Some(old_sig) => {
47 // Existing file - use Myers diff
48 myers_diff(file_id, old_sig, new_signature)
49 }
50 }
51}
52
53/// Marks all symbols in a file signature as changed (used for new files).
54fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
55 let mut changed = HashSet::default();
56
57 for node in &signature.ast_nodes {
58 // Add top-level symbol
59 changed.insert((node.name, empty_atom()));
60
61 // Add all children (methods, properties, etc.)
62 for child in &node.children {
63 changed.insert((node.name, child.name));
64 }
65 }
66
67 CodebaseDiff::new().with_changed(changed)
68}
69
70/// Computes a detailed diff between two file signatures using Myers algorithm.
71///
72/// This function implements a two-level Myers diff:
73/// 1. Top-level diff: compares classes, functions, and constants
74/// 2. Member-level diff: compares methods, properties, and class constants within each class
75///
76/// For each symbol, it checks both structural changes (add/remove/keep) and content changes
77/// (hash comparison) to determine if re-analysis is needed.
78///
79/// # Implementation
80///
81/// Adapted from Hakana's implementation, but uses a single fingerprint hash per symbol
82/// instead of separate signature/body hashes. Any hash change triggers re-analysis.
83///
84/// See: https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L10-L13
85///
86/// # Arguments
87///
88/// * `file_id` - File being compared (for tracking position changes)
89/// * `old_signature` - Previous file signature
90/// * `new_signature` - Current file signature
91///
92/// # Returns
93///
94/// A `CodebaseDiff` with:
95/// - `keep`: Unchanged symbols
96/// - `changed`: Added, removed, or modified symbols
97/// - `diff_map`: Position changes for issue mapping
98/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
99fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
100 let mut keep = HashSet::default();
101 let mut changed = HashSet::default();
102 let mut file_diffs: Vec<DiffHunk> = vec![];
103 let mut deletion_ranges: Vec<DeletionRange> = vec![];
104
105 let (trace, x, y) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes);
106
107 let diff = extract_diff(trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
108
109 for diff_elem in diff {
110 match diff_elem {
111 AstDiffElem::Keep(a, b) => {
112 let mut has_child_change = false;
113
114 // Compare children (methods, properties within classes)
115 let (class_trace, class_x, class_y) = calculate_trace(&a.children, &b.children);
116
117 let class_diff = extract_diff(class_trace, class_x, class_y, &a.children, &b.children);
118
119 for class_diff_elem in class_diff {
120 match class_diff_elem {
121 AstDiffElem::Keep(a_child, b_child) => {
122 // Check if the child's hash changed
123 if a_child.hash != b_child.hash {
124 has_child_change = true;
125 changed.insert((a.name, a_child.name));
126 } else {
127 keep.insert((a.name, a_child.name));
128 }
129
130 // Track position changes for issue mapping
131 if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
132 {
133 file_diffs.push((
134 a_child.start_offset as usize,
135 a_child.end_offset as usize,
136 b_child.start_offset as isize - a_child.start_offset as isize,
137 b_child.start_line as isize - a_child.start_line as isize,
138 ));
139 }
140 }
141 AstDiffElem::Remove(child_node) => {
142 has_child_change = true;
143 changed.insert((a.name, child_node.name));
144 deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
145 }
146 AstDiffElem::Add(child_node) => {
147 has_child_change = true;
148 changed.insert((a.name, child_node.name));
149 }
150 }
151 }
152
153 // Check if parent's hash changed or if any child changed
154 if has_child_change || a.hash != b.hash {
155 changed.insert((a.name, empty_atom()));
156 } else {
157 keep.insert((a.name, empty_atom()));
158
159 // Track position changes for issue mapping
160 if b.start_offset != a.start_offset || b.start_line != a.start_line {
161 file_diffs.push((
162 a.start_offset as usize,
163 a.end_offset as usize,
164 b.start_offset as isize - a.start_offset as isize,
165 b.start_line as isize - a.start_line as isize,
166 ));
167 }
168 }
169 }
170 AstDiffElem::Remove(node) => {
171 changed.insert((node.name, empty_atom()));
172 deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
173
174 // Also mark all children as removed
175 for child in &node.children {
176 changed.insert((node.name, child.name));
177 }
178 }
179 AstDiffElem::Add(node) => {
180 changed.insert((node.name, empty_atom()));
181
182 // Also mark all children as added
183 for child in &node.children {
184 changed.insert((node.name, child.name));
185 }
186 }
187 }
188 }
189
190 let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
191
192 if !file_diffs.is_empty() {
193 diff.add_diff_map_entry(file_id, file_diffs);
194 }
195
196 if !deletion_ranges.is_empty() {
197 diff.add_deletion_ranges_entry(file_id, deletion_ranges);
198 }
199
200 diff
201}
202
203/**
204 * Implements the Myers diff algorithm.
205 *
206 * Borrows from:
207 * - https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php
208 * - https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159
209 *
210 * Myers, Eugene W. "An O(ND) difference algorithm and its variations."
211 * Algorithmica 1.1 (1986): 251-266.
212 *
213 * Returns a tuple of (trace, x, y) where:
214 * - trace: A vector of hash maps representing the search path
215 * - x: Final position in the old sequence
216 * - y: Final position in the new sequence
217 */
218fn calculate_trace(
219 a_nodes: &[DefSignatureNode],
220 b_nodes: &[DefSignatureNode],
221) -> (Vec<HashMap<isize, usize>>, usize, usize) {
222 let n = a_nodes.len();
223 let m = b_nodes.len();
224 let max = n + m;
225 let mut v: HashMap<isize, usize> = HashMap::default();
226 v.insert(1, 0);
227 let mut trace = vec![];
228
229 for d in 0..=(max as isize) {
230 trace.push(v.clone());
231 let mut k = -d;
232 while k <= d {
233 let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
234
235 let mut y = (x as isize - k) as usize;
236
237 // Advance along the diagonal while nodes are equal
238 while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
239 x += 1;
240 y += 1;
241 }
242
243 v.insert(k, x);
244
245 // Found the end
246 if x >= n && y >= m {
247 return (trace, x, y);
248 }
249 k += 2;
250 }
251 }
252
253 panic!("Myers diff algorithm failed to converge");
254}
255
256/// Checks if two DefSignatureNode instances can be matched for diffing.
257///
258/// Two nodes are considered matchable if they have the same:
259/// - name
260/// - is_function flag
261///
262/// We don't check hash here because we want to match nodes even if their
263/// content changed. The hash difference will be detected later to determine
264/// if they belong in the "keep" or "changed" set.
265fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
266 a_node.name == b_node.name && a_node.is_function == b_node.is_function
267}
268
269/// Extracts the diff elements from the Myers trace.
270///
271/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
272fn extract_diff<'a>(
273 trace: Vec<HashMap<isize, usize>>,
274 mut x: usize,
275 mut y: usize,
276 a_nodes: &'a [DefSignatureNode],
277 b_nodes: &'a [DefSignatureNode],
278) -> Vec<AstDiffElem<'a>> {
279 let mut result = vec![];
280 let mut d = trace.len() as isize - 1;
281
282 while d >= 0 {
283 let v = &trace[d as usize];
284 let k = (x as isize) - (y as isize);
285
286 let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
287
288 let prev_x = v[&prev_k];
289 let prev_y = prev_x as isize - prev_k;
290
291 // Walk diagonals (unchanged elements)
292 while x > prev_x && y as isize > prev_y {
293 result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
294 x -= 1;
295 y -= 1;
296 }
297
298 if d == 0 {
299 break;
300 }
301
302 // Deletions
303 while x > prev_x {
304 result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
305 x -= 1;
306 }
307
308 // Additions
309 while y as isize > prev_y {
310 result.push(AstDiffElem::Add(&b_nodes[y - 1]));
311 y -= 1;
312 }
313
314 d -= 1;
315 }
316
317 result.reverse();
318 result
319}
320
321/// Represents a single element in the AST diff.
322#[derive(Debug)]
323enum AstDiffElem<'a> {
324 /// Node unchanged in both old and new versions
325 Keep(&'a DefSignatureNode, &'a DefSignatureNode),
326 /// Node was removed in the new version
327 Remove(&'a DefSignatureNode),
328 /// Node was added in the new version
329 Add(&'a DefSignatureNode),
330}