mago_codex/
differ.rs

1use ahash::HashMap;
2use ahash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for diff_map)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32pub fn compute_file_diff(
33    file_id: FileId,
34    old_signature: Option<&FileSignature>,
35    new_signature: Option<&FileSignature>,
36) -> CodebaseDiff {
37    let Some(new_signature) = new_signature else {
38        return CodebaseDiff::new();
39    };
40
41    match old_signature {
42        None => {
43            // New file - all symbols are changed
44            mark_all_as_changed(new_signature)
45        }
46        Some(old_sig) => {
47            // Existing file - use Myers diff
48            myers_diff(file_id, old_sig, new_signature)
49        }
50    }
51}
52
53/// Marks all symbols in a file signature as changed (used for new files).
54fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
55    let mut changed = HashSet::default();
56
57    for node in &signature.ast_nodes {
58        // Add top-level symbol
59        changed.insert((node.name, empty_atom()));
60
61        // Add all children (methods, properties, etc.)
62        for child in &node.children {
63            changed.insert((node.name, child.name));
64        }
65    }
66
67    CodebaseDiff::new().with_changed(changed)
68}
69
70/// Computes a detailed diff between two file signatures using Myers algorithm.
71///
72/// This function implements a two-level Myers diff:
73/// 1. Top-level diff: compares classes, functions, and constants
74/// 2. Member-level diff: compares methods, properties, and class constants within each class
75///
76/// For each symbol, it checks both structural changes (add/remove/keep) and content changes
77/// (hash comparison) to determine if re-analysis is needed.
78///
79/// # Implementation
80///
81/// Adapted from Hakana's implementation, but uses a single fingerprint hash per symbol
82/// instead of separate signature/body hashes. Any hash change triggers re-analysis.
83///
84/// See: https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L10-L13
85///
86/// # Arguments
87///
88/// * `file_id` - File being compared (for tracking position changes)
89/// * `old_signature` - Previous file signature
90/// * `new_signature` - Current file signature
91///
92/// # Returns
93///
94/// A `CodebaseDiff` with:
95/// - `keep`: Unchanged symbols
96/// - `changed`: Added, removed, or modified symbols
97/// - `diff_map`: Position changes for issue mapping
98/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
99fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
100    let mut keep = HashSet::default();
101    let mut changed = HashSet::default();
102    let mut file_diffs: Vec<DiffHunk> = vec![];
103    let mut deletion_ranges: Vec<DeletionRange> = vec![];
104
105    let (trace, x, y) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes);
106
107    let diff = extract_diff(trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
108
109    for diff_elem in diff {
110        match diff_elem {
111            AstDiffElem::Keep(a, b) => {
112                let mut has_child_change = false;
113
114                // Compare children (methods, properties within classes)
115                let (class_trace, class_x, class_y) = calculate_trace(&a.children, &b.children);
116
117                let class_diff = extract_diff(class_trace, class_x, class_y, &a.children, &b.children);
118
119                for class_diff_elem in class_diff {
120                    match class_diff_elem {
121                        AstDiffElem::Keep(a_child, b_child) => {
122                            // Check if the child's hash changed
123                            if a_child.hash != b_child.hash {
124                                has_child_change = true;
125                                changed.insert((a.name, a_child.name));
126                            } else {
127                                keep.insert((a.name, a_child.name));
128                            }
129
130                            // Track position changes for issue mapping
131                            if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
132                            {
133                                file_diffs.push((
134                                    a_child.start_offset as usize,
135                                    a_child.end_offset as usize,
136                                    b_child.start_offset as isize - a_child.start_offset as isize,
137                                    b_child.start_line as isize - a_child.start_line as isize,
138                                ));
139                            }
140                        }
141                        AstDiffElem::Remove(child_node) => {
142                            has_child_change = true;
143                            changed.insert((a.name, child_node.name));
144                            deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
145                        }
146                        AstDiffElem::Add(child_node) => {
147                            has_child_change = true;
148                            changed.insert((a.name, child_node.name));
149                        }
150                    }
151                }
152
153                // Check if parent's hash changed or if any child changed
154                if has_child_change || a.hash != b.hash {
155                    changed.insert((a.name, empty_atom()));
156                } else {
157                    keep.insert((a.name, empty_atom()));
158
159                    // Track position changes for issue mapping
160                    if b.start_offset != a.start_offset || b.start_line != a.start_line {
161                        file_diffs.push((
162                            a.start_offset as usize,
163                            a.end_offset as usize,
164                            b.start_offset as isize - a.start_offset as isize,
165                            b.start_line as isize - a.start_line as isize,
166                        ));
167                    }
168                }
169            }
170            AstDiffElem::Remove(node) => {
171                changed.insert((node.name, empty_atom()));
172                deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
173
174                // Also mark all children as removed
175                for child in &node.children {
176                    changed.insert((node.name, child.name));
177                }
178            }
179            AstDiffElem::Add(node) => {
180                changed.insert((node.name, empty_atom()));
181
182                // Also mark all children as added
183                for child in &node.children {
184                    changed.insert((node.name, child.name));
185                }
186            }
187        }
188    }
189
190    let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
191
192    if !file_diffs.is_empty() {
193        diff.add_diff_map_entry(file_id, file_diffs);
194    }
195
196    if !deletion_ranges.is_empty() {
197        diff.add_deletion_ranges_entry(file_id, deletion_ranges);
198    }
199
200    diff
201}
202
203/**
204 * Implements the Myers diff algorithm.
205 *
206 * Borrows from:
207 * - https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php
208 * - https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159
209 *
210 * Myers, Eugene W. "An O(ND) difference algorithm and its variations."
211 * Algorithmica 1.1 (1986): 251-266.
212 *
213 * Returns a tuple of (trace, x, y) where:
214 * - trace: A vector of hash maps representing the search path
215 * - x: Final position in the old sequence
216 * - y: Final position in the new sequence
217 */
218fn calculate_trace(
219    a_nodes: &[DefSignatureNode],
220    b_nodes: &[DefSignatureNode],
221) -> (Vec<HashMap<isize, usize>>, usize, usize) {
222    let n = a_nodes.len();
223    let m = b_nodes.len();
224    let max = n + m;
225    let mut v: HashMap<isize, usize> = HashMap::default();
226    v.insert(1, 0);
227    let mut trace = vec![];
228
229    for d in 0..=(max as isize) {
230        trace.push(v.clone());
231        let mut k = -d;
232        while k <= d {
233            let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
234
235            let mut y = (x as isize - k) as usize;
236
237            // Advance along the diagonal while nodes are equal
238            while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
239                x += 1;
240                y += 1;
241            }
242
243            v.insert(k, x);
244
245            // Found the end
246            if x >= n && y >= m {
247                return (trace, x, y);
248            }
249            k += 2;
250        }
251    }
252
253    panic!("Myers diff algorithm failed to converge");
254}
255
256/// Checks if two DefSignatureNode instances can be matched for diffing.
257///
258/// Two nodes are considered matchable if they have the same:
259/// - name
260/// - is_function flag
261///
262/// We don't check hash here because we want to match nodes even if their
263/// content changed. The hash difference will be detected later to determine
264/// if they belong in the "keep" or "changed" set.
265fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
266    a_node.name == b_node.name && a_node.is_function == b_node.is_function
267}
268
269/// Extracts the diff elements from the Myers trace.
270///
271/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
272fn extract_diff<'a>(
273    trace: Vec<HashMap<isize, usize>>,
274    mut x: usize,
275    mut y: usize,
276    a_nodes: &'a [DefSignatureNode],
277    b_nodes: &'a [DefSignatureNode],
278) -> Vec<AstDiffElem<'a>> {
279    let mut result = vec![];
280    let mut d = trace.len() as isize - 1;
281
282    while d >= 0 {
283        let v = &trace[d as usize];
284        let k = (x as isize) - (y as isize);
285
286        let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
287
288        let prev_x = v[&prev_k];
289        let prev_y = prev_x as isize - prev_k;
290
291        // Walk diagonals (unchanged elements)
292        while x > prev_x && y as isize > prev_y {
293            result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
294            x -= 1;
295            y -= 1;
296        }
297
298        if d == 0 {
299            break;
300        }
301
302        // Deletions
303        while x > prev_x {
304            result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
305            x -= 1;
306        }
307
308        // Additions
309        while y as isize > prev_y {
310            result.push(AstDiffElem::Add(&b_nodes[y - 1]));
311            y -= 1;
312        }
313
314        d -= 1;
315    }
316
317    result.reverse();
318    result
319}
320
321/// Represents a single element in the AST diff.
322#[derive(Debug)]
323enum AstDiffElem<'a> {
324    /// Node unchanged in both old and new versions
325    Keep(&'a DefSignatureNode, &'a DefSignatureNode),
326    /// Node was removed in the new version
327    Remove(&'a DefSignatureNode),
328    /// Node was added in the new version
329    Add(&'a DefSignatureNode),
330}