Skip to main content

mago_codex/
differ.rs

1use foldhash::HashMap;
2use foldhash::HashSet;
3
4use mago_atom::empty_atom;
5use mago_database::file::FileId;
6
7use crate::diff::CodebaseDiff;
8use crate::diff::DeletionRange;
9use crate::diff::DiffHunk;
10use crate::signature::DefSignatureNode;
11use crate::signature::FileSignature;
12
13/// Computes the difference between an old file signature and a new file signature.
14///
15/// This function uses the Myers diff algorithm to efficiently identify changes between
16/// two versions of a file's AST. Unlike Hakana which differentiates between signature
17/// and body changes, we use a single hash approach: any change triggers re-analysis.
18///
19/// # Arguments
20///
21/// * `file_id` - The identifier of the file being compared (used for `diff_map`)
22/// * `old_signature` - The previous file signature (None if this is a new file)
23/// * `new_signature` - The current file signature
24///
25/// # Returns
26///
27/// A `CodebaseDiff` containing:
28/// - `keep`: Symbols that are unchanged
29/// - `changed`: Symbols that are new, deleted, or modified
30/// - `diff_map`: Position mappings for symbols that moved
31/// - `deletion_ranges_map`: Ranges of deleted code
32#[must_use]
33pub fn compute_file_diff(
34    file_id: FileId,
35    old_signature: Option<&FileSignature>,
36    new_signature: Option<&FileSignature>,
37) -> CodebaseDiff {
38    match (old_signature, new_signature) {
39        (None, None) => CodebaseDiff::new(),
40        (Some(old_sig), None) => mark_all_as_changed(old_sig),
41        (None, Some(new_sig)) => mark_all_as_changed(new_sig),
42        (Some(old_sig), Some(new_sig)) => myers_diff(file_id, old_sig, new_sig),
43    }
44}
45
46/// Marks all symbols in a file signature as changed (used for new files).
47fn mark_all_as_changed(signature: &FileSignature) -> CodebaseDiff {
48    let mut changed = HashSet::default();
49
50    for node in &signature.ast_nodes {
51        // Add top-level symbol
52        changed.insert((node.name, empty_atom()));
53
54        // Add all children (methods, properties, etc.)
55        for child in &node.children {
56            changed.insert((node.name, child.name));
57        }
58    }
59
60    CodebaseDiff::new().with_changed(changed)
61}
62
63/// Computes a detailed diff between two file signatures using Myers algorithm.
64///
65/// This function implements a two-level Myers diff:
66/// 1. Top-level diff: compares classes, functions, and constants
67/// 2. Member-level diff: compares methods, properties, and class constants within each class
68///
69/// Uses `signature_hash` (which excludes function/method bodies) for the keep/changed
70/// decision. This means body-only changes to a function or method do NOT mark it as
71/// "changed" in the diff, so they don't trigger cascade invalidation of dependents.
72/// The changed file itself is still re-analyzed because its content hash changed.
73///
74/// # Arguments
75///
76/// * `file_id` - File being compared (for tracking position changes)
77/// * `old_signature` - Previous file signature
78/// * `new_signature` - Current file signature
79///
80/// # Returns
81///
82/// A `CodebaseDiff` with:
83/// - `keep`: Symbols whose signatures are unchanged
84/// - `changed`: Added, removed, or signature-modified symbols
85/// - `diff_map`: Position changes for issue mapping
86/// - `deletion_ranges_map`: Deleted code ranges for issue filtering
87fn myers_diff(file_id: FileId, old_signature: &FileSignature, new_signature: &FileSignature) -> CodebaseDiff {
88    let mut keep = HashSet::default();
89    let mut changed = HashSet::default();
90    let mut file_diffs: Vec<DiffHunk> = vec![];
91    let mut deletion_ranges: Vec<DeletionRange> = vec![];
92
93    let Ok((trace, x, y)) = calculate_trace(&old_signature.ast_nodes, &new_signature.ast_nodes) else {
94        tracing::warn!("Myers diff algorithm failed to converge for file {file_id:?}, marking all symbols as changed");
95
96        return mark_all_as_changed(new_signature);
97    };
98
99    let diff = extract_diff(&trace, x, y, &old_signature.ast_nodes, &new_signature.ast_nodes);
100
101    for diff_elem in diff {
102        match diff_elem {
103            AstDiffElem::Keep(a, b) => {
104                let mut has_child_sig_change = false;
105
106                let Ok((class_trace, class_x, class_y)) = calculate_trace(&a.children, &b.children) else {
107                    changed.insert((a.name, empty_atom()));
108                    for child in &a.children {
109                        changed.insert((a.name, child.name));
110                    }
111
112                    for child in &b.children {
113                        changed.insert((b.name, child.name));
114                    }
115
116                    continue;
117                };
118
119                let class_diff = extract_diff(&class_trace, class_x, class_y, &a.children, &b.children);
120
121                for class_diff_elem in class_diff {
122                    match class_diff_elem {
123                        AstDiffElem::Keep(a_child, b_child) => {
124                            // Use signature_hash for cascade decision: body-only changes
125                            // don't cascade to dependents, only signature changes do.
126                            if a_child.signature_hash == b_child.signature_hash {
127                                keep.insert((a.name, a_child.name));
128                            } else {
129                                has_child_sig_change = true;
130                                changed.insert((a.name, a_child.name));
131                            }
132
133                            // Track position changes for issue mapping
134                            if b_child.start_offset != a_child.start_offset || b_child.start_line != a_child.start_line
135                            {
136                                file_diffs.push((
137                                    a_child.start_offset as usize,
138                                    a_child.end_offset as usize,
139                                    b_child.start_offset as isize - a_child.start_offset as isize,
140                                    b_child.start_line as isize - a_child.start_line as isize,
141                                ));
142                            }
143                        }
144                        AstDiffElem::Remove(child_node) => {
145                            has_child_sig_change = true;
146                            changed.insert((a.name, child_node.name));
147                            deletion_ranges.push((child_node.start_offset as usize, child_node.end_offset as usize));
148                        }
149                        AstDiffElem::Add(child_node) => {
150                            has_child_sig_change = true;
151                            changed.insert((a.name, child_node.name));
152                        }
153                    }
154                }
155
156                if has_child_sig_change || a.signature_hash != b.signature_hash {
157                    changed.insert((a.name, empty_atom()));
158                } else {
159                    keep.insert((a.name, empty_atom()));
160
161                    // Track position changes for issue mapping
162                    if b.start_offset != a.start_offset || b.start_line != a.start_line {
163                        file_diffs.push((
164                            a.start_offset as usize,
165                            a.end_offset as usize,
166                            b.start_offset as isize - a.start_offset as isize,
167                            b.start_line as isize - a.start_line as isize,
168                        ));
169                    }
170                }
171            }
172            AstDiffElem::Remove(node) => {
173                changed.insert((node.name, empty_atom()));
174                deletion_ranges.push((node.start_offset as usize, node.end_offset as usize));
175
176                // Also mark all children as removed
177                for child in &node.children {
178                    changed.insert((node.name, child.name));
179                }
180            }
181            AstDiffElem::Add(node) => {
182                changed.insert((node.name, empty_atom()));
183
184                // Also mark all children as added
185                for child in &node.children {
186                    changed.insert((node.name, child.name));
187                }
188            }
189        }
190    }
191
192    let mut diff = CodebaseDiff::new().with_keep(keep).with_changed(changed);
193
194    if !file_diffs.is_empty() {
195        diff.add_diff_map_entry(file_id, file_diffs);
196    }
197
198    if !deletion_ranges.is_empty() {
199        diff.add_deletion_ranges_entry(file_id, deletion_ranges);
200    }
201
202    diff
203}
204
205/// Type alias for the Myers diff trace structure.
206///
207/// - Vec<`HashMap`<isize, usize>>: The trace of the search path
208/// - usize: Final position in the old sequence
209/// - usize: Final position in the new sequence
210type DiffTrace = (Vec<HashMap<isize, usize>>, usize, usize);
211
212/// Implements the Myers diff algorithm.
213///
214/// Borrows from:
215/// - <https://github.com/nikic/PHP-Parser/blob/master/lib/PhpParser/Internal/Differ.php>
216/// - <https://github.com/slackhq/hakana/blob/35890f99ded7897e4203a896fd1636bda300bad6/src/orchestrator/ast_differ.rs#L151-L159>
217///
218/// Myers, Eugene W. "An O(ND) difference algorithm and its variations."
219/// Algorithmica 1.1 (1986): 251-266.
220///
221/// Returns a Result containing a tuple of (trace, x, y) where:
222/// - trace: A vector of hash maps representing the search path
223/// - x: Final position in the old sequence
224/// - y: Final position in the new sequence
225///
226/// Returns Err if the algorithm fails to converge (theoretically impossible but handled gracefully).
227fn calculate_trace(a_nodes: &[DefSignatureNode], b_nodes: &[DefSignatureNode]) -> Result<DiffTrace, &'static str> {
228    let n = a_nodes.len();
229    let m = b_nodes.len();
230    let max = n + m;
231    let mut v: HashMap<isize, usize> = HashMap::default();
232    v.insert(1, 0);
233    let mut trace = vec![];
234
235    for d in 0..=(max as isize) {
236        trace.push(v.clone());
237        let mut k = -d;
238        while k <= d {
239            let mut x = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { v[&(k + 1)] } else { v[&(k - 1)] + 1 };
240
241            let mut y = (x as isize - k) as usize;
242
243            // Advance along the diagonal while nodes are equal
244            while x < n && y < m && is_equal(&a_nodes[x], &b_nodes[y]) {
245                x += 1;
246                y += 1;
247            }
248
249            v.insert(k, x);
250
251            // Found the end
252            if x >= n && y >= m {
253                return Ok((trace, x, y));
254            }
255
256            k += 2;
257        }
258    }
259
260    Err("Myers diff algorithm failed to converge")
261}
262
263/// Checks if two `DefSignatureNode` instances can be matched for diffing.
264///
265/// Two nodes are considered matchable if they have the same:
266/// - name
267/// - `is_function` flag
268///
269/// We don't check hash here because we want to match nodes even if their
270/// content changed. The hash difference will be detected later to determine
271/// if they belong in the "keep" or "changed" set.
272fn is_equal(a_node: &DefSignatureNode, b_node: &DefSignatureNode) -> bool {
273    a_node.name == b_node.name && a_node.is_function == b_node.is_function
274}
275
276/// Extracts the diff elements from the Myers trace.
277///
278/// Walks backward through the trace to build a sequence of Keep, Remove, and Add operations.
279fn extract_diff<'a>(
280    trace: &[HashMap<isize, usize>],
281    mut x: usize,
282    mut y: usize,
283    a_nodes: &'a [DefSignatureNode],
284    b_nodes: &'a [DefSignatureNode],
285) -> Vec<AstDiffElem<'a>> {
286    let mut result = vec![];
287    let mut d = trace.len() as isize - 1;
288
289    while d >= 0 {
290        let v = &trace[d as usize];
291        let k = (x as isize) - (y as isize);
292
293        let prev_k = if k == -d || (k != d && v[&(k - 1)] < v[&(k + 1)]) { k + 1 } else { k - 1 };
294
295        let prev_x = v[&prev_k];
296        let prev_y = prev_x as isize - prev_k;
297
298        // Walk diagonals (unchanged elements)
299        while x > prev_x && y as isize > prev_y {
300            result.push(AstDiffElem::Keep(&a_nodes[x - 1], &b_nodes[y - 1]));
301            x -= 1;
302            y -= 1;
303        }
304
305        if d == 0 {
306            break;
307        }
308
309        // Deletions
310        while x > prev_x {
311            result.push(AstDiffElem::Remove(&a_nodes[x - 1]));
312            x -= 1;
313        }
314
315        // Additions
316        while y as isize > prev_y {
317            result.push(AstDiffElem::Add(&b_nodes[y - 1]));
318            y -= 1;
319        }
320
321        d -= 1;
322    }
323
324    result.reverse();
325    result
326}
327
328/// Represents a single element in the AST diff.
329#[derive(Debug)]
330enum AstDiffElem<'a> {
331    /// Node unchanged in both old and new versions
332    Keep(&'a DefSignatureNode, &'a DefSignatureNode),
333    /// Node was removed in the new version
334    Remove(&'a DefSignatureNode),
335    /// Node was added in the new version
336    Add(&'a DefSignatureNode),
337}