Skip to main content

mago_codex/
differ.rs

1use ahash::HashMap;
2use ahash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for `diff_map`)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32#[must_use]
33pub fn compute_file_diff(
34    file_id: FileId,
35    old_signature: Option<&FileSignature>,
36    new_signature: Option<&FileSignature>,
37) -> CodebaseDiff {
38    let Some(new_signature) = new_signature else {
39        return CodebaseDiff::new();
40    };
41
42    match old_signature {
43        None => {
44            // New file - all symbols are changed
45            mark_all_as_changed(new_signature)
46        }
47        Some(old_sig) => {
48            // Existing file - use Myers diff
49            myers_diff(file_id, old_sig, new_signature)
50        }
51    }
52}
53
54/// Marks all symbols in a file signature as changed (used for new files).
55fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
56    let mut changed = HashSet::default();
57
58    for node in &signature.ast_nodes {
59        // Add top-level symbol
60        changed.insert((node.name, empty_atom()));
61
62        // Add all children (methods, properties, etc.)
63        for child in &node.children {
64            changed.insert((node.name, child.name));
65        }
66    }
67
68    CodebaseDiff::new().with_changed(changed)
69}
70
71/// Computes a detailed diff between two file signatures using Myers algorithm.
72///
73/// This function implements a two-level Myers diff:
74/// 1. Top-level diff: compares classes, functions, and constants
75/// 2. Member-level diff: compares methods, properties, and class constants within each class
76///
77/// For each symbol, it checks both structural changes (add/remove/keep) and content changes
78/// (hash comparison) to determine if re-analysis is needed.
79///
80/// # Implementation
81///
82/// Adapted from Hakana's implementation, but uses a single fingerprint hash per symbol
83/// instead of separate signature/body hashes. Any hash change triggers re-analysis.
84///
85/// See: <https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L10-L13>
86///
87/// # Arguments
88///
89/// * `file_id` - File being compared (for tracking position changes)
90/// * `old_signature` - Previous file signature
91/// * `new_signature` - Current file signature
92///
93/// # Returns
94///
95/// A `CodebaseDiff` with:
96/// - `keep`: Unchanged symbols
97/// - `changed`: Added, removed, or modified symbols
98/// - `diff_map`: Position changes for issue mapping
99/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
100fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
101    let mut keep = HashSet::default();
102    let mut changed = HashSet::default();
103    let mut file_diffs: Vec<DiffHunk> = vec![];
104    let mut deletion_ranges: Vec<DeletionRange> = vec![];
105
106    let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
107        tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
108
109        return mark_all_as_changed(new_signature);
110    };
111
112    let diff = extract_diff(&trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
113
114    for diff_elem in diff {
115        match diff_elem {
116            AstDiffElem::Keep(a, b) => {
117                let mut has_child_change = false;
118
119                let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
120                    changed.insert((a.name, empty_atom()));
121                    for child in &a.children {
122                        changed.insert((a.name, child.name));
123                    }
124
125                    for child in &b.children {
126                        changed.insert((b.name, child.name));
127                    }
128
129                    continue;
130                };
131
132                let class_diff = extract_diff(&class_trace, class_x, class_y, &a.children, &b.children);
133
134                for class_diff_elem in class_diff {
135                    match class_diff_elem {
136                        AstDiffElem::Keep(a_child, b_child) => {
137                            // Check if the child's hash changed
138                            if a_child.hash == b_child.hash {
139                                keep.insert((a.name, a_child.name));
140                            } else {
141                                has_child_change = true;
142                                changed.insert((a.name, a_child.name));
143                            }
144
145                            // Track position changes for issue mapping
146                            if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
147                            {
148                                file_diffs.push((
149                                    a_child.start_offset as usize,
150                                    a_child.end_offset as usize,
151                                    b_child.start_offset as isize - a_child.start_offset as isize,
152                                    b_child.start_line as isize - a_child.start_line as isize,
153                                ));
154                            }
155                        }
156                        AstDiffElem::Remove(child_node) => {
157                            has_child_change = true;
158                            changed.insert((a.name, child_node.name));
159                            deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
160                        }
161                        AstDiffElem::Add(child_node) => {
162                            has_child_change = true;
163                            changed.insert((a.name, child_node.name));
164                        }
165                    }
166                }
167
168                // Check if parent's hash changed or if any child changed
169                if has_child_change || a.hash != b.hash {
170                    changed.insert((a.name, empty_atom()));
171                } else {
172                    keep.insert((a.name, empty_atom()));
173
174                    // Track position changes for issue mapping
175                    if b.start_offset != a.start_offset || b.start_line != a.start_line {
176                        file_diffs.push((
177                            a.start_offset as usize,
178                            a.end_offset as usize,
179                            b.start_offset as isize - a.start_offset as isize,
180                            b.start_line as isize - a.start_line as isize,
181                        ));
182                    }
183                }
184            }
185            AstDiffElem::Remove(node) => {
186                changed.insert((node.name, empty_atom()));
187                deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
188
189                // Also mark all children as removed
190                for child in &node.children {
191                    changed.insert((node.name, child.name));
192                }
193            }
194            AstDiffElem::Add(node) => {
195                changed.insert((node.name, empty_atom()));
196
197                // Also mark all children as added
198                for child in &node.children {
199                    changed.insert((node.name, child.name));
200                }
201            }
202        }
203    }
204
205    let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
206
207    if !file_diffs.is_empty() {
208        diff.add_diff_map_entry(file_id, file_diffs);
209    }
210
211    if !deletion_ranges.is_empty() {
212        diff.add_deletion_ranges_entry(file_id, deletion_ranges);
213    }
214
215    diff
216}
217
218/// Type alias for the Myers diff trace structure.
219///
220/// - Vec<`HashMap`<isize, usize>>: The trace of the search path
221/// - usize: Final position in the old sequence
222/// - usize: Final position in the new sequence
223type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
224
225/// Implements the Myers diff algorithm.
226///
227/// Borrows from:
228/// - <https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php>
229/// - <https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159>
230///
231/// Myers, Eugene W. "An O(ND) difference algorithm and its variations."
232/// Algorithmica 1.1 (1986): 251-266.
233///
234/// Returns a Result containing a tuple of (trace, x, y) where:
235/// - trace: A vector of hash maps representing the search path
236/// - x: Final position in the old sequence
237/// - y: Final position in the new sequence
238///
239/// Returns Err if the algorithm fails to converge (theoretically impossible but handled gracefully).
240fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
241    let n = a_nodes.len();
242    let m = b_nodes.len();
243    let max = n + m;
244    let mut v: HashMap<isize, usize> = HashMap::default();
245    v.insert(1, 0);
246    let mut trace = vec![];
247
248    for d in 0..=(max as isize) {
249        trace.push(v.clone());
250        let mut k = -d;
251        while k <= d {
252            let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
253
254            let mut y = (x as isize - k) as usize;
255
256            // Advance along the diagonal while nodes are equal
257            while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
258                x += 1;
259                y += 1;
260            }
261
262            v.insert(k, x);
263
264            // Found the end
265            if x >= n && y >= m {
266                return Ok((trace, x, y));
267            }
268
269            k += 2;
270        }
271    }
272
273    Err("Myers diff algorithm failed to converge")
274}
275
276/// Checks if two `DefSignatureNode` instances can be matched for diffing.
277///
278/// Two nodes are considered matchable if they have the same:
279/// - name
280/// - `is_function` flag
281///
282/// We don't check hash here because we want to match nodes even if their
283/// content changed. The hash difference will be detected later to determine
284/// if they belong in the "keep" or "changed" set.
285fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
286    a_node.name == b_node.name && a_node.is_function == b_node.is_function
287}
288
289/// Extracts the diff elements from the Myers trace.
290///
291/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
292fn extract_diff<'a>(
293    trace: &[HashMap<isize, usize>],
294    mut x: usize,
295    mut y: usize,
296    a_nodes: &'a [DefSignatureNode],
297    b_nodes: &'a [DefSignatureNode],
298) -> Vec<AstDiffElem<'a>> {
299    let mut result = vec![];
300    let mut d = trace.len() as isize - 1;
301
302    while d >= 0 {
303        let v = &trace[d as usize];
304        let k = (x as isize) - (y as isize);
305
306        let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
307
308        let prev_x = v[&prev_k];
309        let prev_y = prev_x as isize - prev_k;
310
311        // Walk diagonals (unchanged elements)
312        while x > prev_x && y as isize > prev_y {
313            result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
314            x -= 1;
315            y -= 1;
316        }
317
318        if d == 0 {
319            break;
320        }
321
322        // Deletions
323        while x > prev_x {
324            result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
325            x -= 1;
326        }
327
328        // Additions
329        while y as isize > prev_y {
330            result.push(AstDiffElem::Add(&b_nodes[y - 1]));
331            y -= 1;
332        }
333
334        d -= 1;
335    }
336
337    result.reverse();
338    result
339}
340
341/// Represents a single element in the AST diff.
342#[derive(Debug)]
343enum AstDiffElem<'a> {
344    /// Node unchanged in both old and new versions
345    Keep(&'a DefSignatureNode, &'a DefSignatureNode),
346    /// Node was removed in the new version
347    Remove(&'a DefSignatureNode),
348    /// Node was added in the new version
349    Add(&'a DefSignatureNode),
350}