mago_codex/differ.rs
1use ahash::HashMap;
2use ahash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for `diff_map`)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32#[must_use]
33pub fn compute_file_diff(
34 file_id: FileId,
35 old_signature: Option<&FileSignature>,
36 new_signature: Option<&FileSignature>,
37) -> CodebaseDiff {
38 let Some(new_signature) = new_signature else {
39 return CodebaseDiff::new();
40 };
41
42 match old_signature {
43 None => {
44 // New file - all symbols are changed
45 mark_all_as_changed(new_signature)
46 }
47 Some(old_sig) => {
48 // Existing file - use Myers diff
49 myers_diff(file_id, old_sig, new_signature)
50 }
51 }
52}
53
54/// Marks all symbols in a file signature as changed (used for new files).
55fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
56 let mut changed = HashSet::default();
57
58 for node in &signature.ast_nodes {
59 // Add top-level symbol
60 changed.insert((node.name, empty_atom()));
61
62 // Add all children (methods, properties, etc.)
63 for child in &node.children {
64 changed.insert((node.name, child.name));
65 }
66 }
67
68 CodebaseDiff::new().with_changed(changed)
69}
70
71/// Computes a detailed diff between two file signatures using Myers algorithm.
72///
73/// This function implements a two-level Myers diff:
74/// 1. Top-level diff: compares classes, functions, and constants
75/// 2. Member-level diff: compares methods, properties, and class constants within each class
76///
77/// For each symbol, it checks both structural changes (add/remove/keep) and content changes
78/// (hash comparison) to determine if re-analysis is needed.
79///
80/// # Implementation
81///
82/// Adapted from Hakana's implementation, but uses a single fingerprint hash per symbol
83/// instead of separate signature/body hashes. Any hash change triggers re-analysis.
84///
85/// See: <https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L10-L13>
86///
87/// # Arguments
88///
89/// * `file_id` - File being compared (for tracking position changes)
90/// * `old_signature` - Previous file signature
91/// * `new_signature` - Current file signature
92///
93/// # Returns
94///
95/// A `CodebaseDiff` with:
96/// - `keep`: Unchanged symbols
97/// - `changed`: Added, removed, or modified symbols
98/// - `diff_map`: Position changes for issue mapping
99/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
100fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
101 let mut keep = HashSet::default();
102 let mut changed = HashSet::default();
103 let mut file_diffs: Vec<DiffHunk> = vec![];
104 let mut deletion_ranges: Vec<DeletionRange> = vec![];
105
106 let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
107 tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
108
109 return mark_all_as_changed(new_signature);
110 };
111
112 let diff = extract_diff(&trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
113
114 for diff_elem in diff {
115 match diff_elem {
116 AstDiffElem::Keep(a, b) => {
117 let mut has_child_change = false;
118
119 let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
120 changed.insert((a.name, empty_atom()));
121 for child in &a.children {
122 changed.insert((a.name, child.name));
123 }
124
125 for child in &b.children {
126 changed.insert((b.name, child.name));
127 }
128
129 continue;
130 };
131
132 let class_diff = extract_diff(&class_trace, class_x, class_y, &a.children, &b.children);
133
134 for class_diff_elem in class_diff {
135 match class_diff_elem {
136 AstDiffElem::Keep(a_child, b_child) => {
137 // Check if the child's hash changed
138 if a_child.hash == b_child.hash {
139 keep.insert((a.name, a_child.name));
140 } else {
141 has_child_change = true;
142 changed.insert((a.name, a_child.name));
143 }
144
145 // Track position changes for issue mapping
146 if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
147 {
148 file_diffs.push((
149 a_child.start_offset as usize,
150 a_child.end_offset as usize,
151 b_child.start_offset as isize - a_child.start_offset as isize,
152 b_child.start_line as isize - a_child.start_line as isize,
153 ));
154 }
155 }
156 AstDiffElem::Remove(child_node) => {
157 has_child_change = true;
158 changed.insert((a.name, child_node.name));
159 deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
160 }
161 AstDiffElem::Add(child_node) => {
162 has_child_change = true;
163 changed.insert((a.name, child_node.name));
164 }
165 }
166 }
167
168 // Check if parent's hash changed or if any child changed
169 if has_child_change || a.hash != b.hash {
170 changed.insert((a.name, empty_atom()));
171 } else {
172 keep.insert((a.name, empty_atom()));
173
174 // Track position changes for issue mapping
175 if b.start_offset != a.start_offset || b.start_line != a.start_line {
176 file_diffs.push((
177 a.start_offset as usize,
178 a.end_offset as usize,
179 b.start_offset as isize - a.start_offset as isize,
180 b.start_line as isize - a.start_line as isize,
181 ));
182 }
183 }
184 }
185 AstDiffElem::Remove(node) => {
186 changed.insert((node.name, empty_atom()));
187 deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
188
189 // Also mark all children as removed
190 for child in &node.children {
191 changed.insert((node.name, child.name));
192 }
193 }
194 AstDiffElem::Add(node) => {
195 changed.insert((node.name, empty_atom()));
196
197 // Also mark all children as added
198 for child in &node.children {
199 changed.insert((node.name, child.name));
200 }
201 }
202 }
203 }
204
205 let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
206
207 if !file_diffs.is_empty() {
208 diff.add_diff_map_entry(file_id, file_diffs);
209 }
210
211 if !deletion_ranges.is_empty() {
212 diff.add_deletion_ranges_entry(file_id, deletion_ranges);
213 }
214
215 diff
216}
217
218/// Type alias for the Myers diff trace structure.
219///
220/// - Vec<`HashMap`<isize, usize>>: The trace of the search path
221/// - usize: Final position in the old sequence
222/// - usize: Final position in the new sequence
223type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
224
225/// Implements the Myers diff algorithm.
226///
227/// Borrows from:
228/// - <https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php>
229/// - <https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159>
230///
231/// Myers, Eugene W. "An O(ND) difference algorithm and its variations."
232/// Algorithmica 1.1 (1986): 251-266.
233///
234/// Returns a Result containing a tuple of (trace, x, y) where:
235/// - trace: A vector of hash maps representing the search path
236/// - x: Final position in the old sequence
237/// - y: Final position in the new sequence
238///
239/// Returns Err if the algorithm fails to converge (theoretically impossible but handled gracefully).
240fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
241 let n = a_nodes.len();
242 let m = b_nodes.len();
243 let max = n + m;
244 let mut v: HashMap<isize, usize> = HashMap::default();
245 v.insert(1, 0);
246 let mut trace = vec![];
247
248 for d in 0..=(max as isize) {
249 trace.push(v.clone());
250 let mut k = -d;
251 while k <= d {
252 let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
253
254 let mut y = (x as isize - k) as usize;
255
256 // Advance along the diagonal while nodes are equal
257 while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
258 x += 1;
259 y += 1;
260 }
261
262 v.insert(k, x);
263
264 // Found the end
265 if x >= n && y >= m {
266 return Ok((trace, x, y));
267 }
268
269 k += 2;
270 }
271 }
272
273 Err("Myers diff algorithm failed to converge")
274}
275
276/// Checks if two `DefSignatureNode` instances can be matched for diffing.
277///
278/// Two nodes are considered matchable if they have the same:
279/// - name
280/// - `is_function` flag
281///
282/// We don't check hash here because we want to match nodes even if their
283/// content changed. The hash difference will be detected later to determine
284/// if they belong in the "keep" or "changed" set.
285fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
286 a_node.name == b_node.name && a_node.is_function == b_node.is_function
287}
288
289/// Extracts the diff elements from the Myers trace.
290///
291/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
292fn extract_diff<'a>(
293 trace: &[HashMap<isize, usize>],
294 mut x: usize,
295 mut y: usize,
296 a_nodes: &'a [DefSignatureNode],
297 b_nodes: &'a [DefSignatureNode],
298) -> Vec<AstDiffElem<'a>> {
299 let mut result = vec![];
300 let mut d = trace.len() as isize - 1;
301
302 while d >= 0 {
303 let v = &trace[d as usize];
304 let k = (x as isize) - (y as isize);
305
306 let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
307
308 let prev_x = v[&prev_k];
309 let prev_y = prev_x as isize - prev_k;
310
311 // Walk diagonals (unchanged elements)
312 while x > prev_x && y as isize > prev_y {
313 result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
314 x -= 1;
315 y -= 1;
316 }
317
318 if d == 0 {
319 break;
320 }
321
322 // Deletions
323 while x > prev_x {
324 result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
325 x -= 1;
326 }
327
328 // Additions
329 while y as isize > prev_y {
330 result.push(AstDiffElem::Add(&b_nodes[y - 1]));
331 y -= 1;
332 }
333
334 d -= 1;
335 }
336
337 result.reverse();
338 result
339}
340
341/// Represents a single element in the AST diff.
342#[derive(Debug)]
343enum AstDiffElem<'a> {
344 /// Node unchanged in both old and new versions
345 Keep(&'a DefSignatureNode, &'a DefSignatureNode),
346 /// Node was removed in the new version
347 Remove(&'a DefSignatureNode),
348 /// Node was added in the new version
349 Add(&'a DefSignatureNode),
350}