Skip to main content

mago_codex/
diff.rs

1use foldhash::HashMap;
2use foldhash::HashSet;
3use serde::Deserialize;
4use serde::Serialize;
5
6use mago_database::file::FileId;
7
8use crate::differ::compute_file_diff;
9use crate::metadata::CodebaseMetadata;
10use crate::symbol::SymbolIdentifier;
11
12/// Represents a text diff hunk with position and offset information.
13///
14/// Format: `(old_start, old_length, line_offset, column_offset)`
15/// - `old_start`: Starting byte offset in the old version
16/// - `old_length`: Length of the changed region in bytes
17/// - `line_offset`: Line number change (`new_line` - `old_line`)
18/// - `column_offset`: Column number change (`new_column` - `old_column`)
19pub type DiffHunk = (usize, usize, isize, isize);
20
21/// Represents a range of deleted code.
22///
23/// Format: `(start_offset, end_offset)`
24/// - `start_offset`: Starting byte offset of deletion
25/// - `end_offset`: Ending byte offset of deletion
26pub type DeletionRange = (usize, usize);
27
28/// Represents the differences between two states of a codebase, typically used for incremental analysis.
29///
30/// This structure uses a single fingerprint hash per symbol to determine changes. Any change to a symbol
31/// (signature, body, modifiers, attributes) produces a different hash, triggering re-analysis.
32///
33/// Provides a comprehensive API for modification and querying following established conventions.
34#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
35pub struct CodebaseDiff {
36    /// Set of `(Symbol, Member)` pairs whose fingerprint hash is UNCHANGED.
37    /// These symbols can be safely skipped during re-analysis.
38    /// Member is empty for top-level symbols.
39    keep: HashSet<SymbolIdentifier>,
40
41    /// Set of `(Symbol, Member)` pairs that are new, deleted, or have a different fingerprint hash.
42    /// These symbols MUST be re-analyzed.
43    /// Member is empty for top-level symbols.
44    changed: HashSet<SymbolIdentifier>,
45
46    /// Map from source file identifier to a vector of text diff hunks.
47    /// Used for mapping issue positions between old and new code.
48    diff_map: HashMap<FileId, Vec<DiffHunk>>,
49
50    /// Map from source file identifier to a vector of deleted code ranges.
51    /// Used for filtering out issues in deleted code regions.
52    deletion_ranges_map: HashMap<FileId, Vec<DeletionRange>>,
53}
54
55impl CodebaseDiff {
56    #[inline]
57    #[must_use]
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    /// Computes the `CodebaseDiff` between two `CodebaseMetadata` instances.
63    ///
64    /// This method compares the metadata of the old and new codebases to determine which symbols have changed,
65    /// which can be kept unchanged, and what text diffs exist for source files.
66    ///
67    /// It aggregates this information into a `CodebaseDiff` instance that can be used for incremental analysis.
68    pub fn between(old_metadata: &CodebaseMetadata, new_metadata: &CodebaseMetadata) -> Self {
69        let mut aggregate_diff = CodebaseDiff::new();
70
71        let mut all_file_ids = old_metadata.get_all_file_ids();
72        all_file_ids.extend(new_metadata.get_all_file_ids());
73        all_file_ids.sort();
74        all_file_ids.dedup();
75
76        for file_id in all_file_ids {
77            let old_sig = old_metadata.get_file_signature(&file_id);
78            let new_sig = new_metadata.get_file_signature(&file_id);
79
80            let file_diff = compute_file_diff(file_id, old_sig, new_sig);
81
82            aggregate_diff.extend(file_diff);
83        }
84
85        aggregate_diff
86    }
87
88    /// Merges changes from another `CodebaseDiff` into this one.
89    #[inline]
90    pub fn extend(&mut self, other: Self) {
91        self.keep.extend(other.keep);
92        self.changed.extend(other.changed);
93        for (source, diffs) in other.diff_map {
94            self.diff_map.entry(source).or_default().extend(diffs);
95        }
96        for (source, ranges) in other.deletion_ranges_map {
97            self.deletion_ranges_map.entry(source).or_default().extend(ranges);
98        }
99    }
100
101    /// Returns a reference to the set of symbols/members to keep unchanged.
102    #[inline]
103    #[must_use]
104    pub fn get_keep(&self) -> &HashSet<SymbolIdentifier> {
105        &self.keep
106    }
107
108    /// Returns a reference to the set of changed symbols/members.
109    #[inline]
110    #[must_use]
111    pub fn get_changed(&self) -> &HashSet<SymbolIdentifier> {
112        &self.changed
113    }
114
115    /// Returns a reference to the map of source files to text diff hunks.
116    #[inline]
117    #[must_use]
118    pub fn get_diff_map(&self) -> &HashMap<FileId, Vec<DiffHunk>> {
119        &self.diff_map
120    }
121
122    /// Returns a reference to the map of source files to deletion ranges.
123    #[inline]
124    #[must_use]
125    pub fn get_deletion_ranges_map(&self) -> &HashMap<FileId, Vec<DeletionRange>> {
126        &self.deletion_ranges_map
127    }
128
129    /// Sets the 'keep' set, replacing the existing one.
130    #[inline]
131    pub fn set_keep(&mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) {
132        self.keep = keep_set.into_iter().collect();
133    }
134
135    /// Returns a new instance with the 'keep' set replaced.
136    #[inline]
137    pub fn with_keep(mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
138        self.set_keep(keep_set);
139        self
140    }
141
142    /// Adds a single entry to the 'keep' set. Returns `true` if the entry was not already present.
143    #[inline]
144    pub fn add_keep_entry(&mut self, entry: SymbolIdentifier) -> bool {
145        self.keep.insert(entry)
146    }
147
148    /// Returns a new instance with the entry added to the 'keep' set.
149    #[inline]
150    #[must_use]
151    pub fn with_added_keep_entry(mut self, entry: SymbolIdentifier) -> Self {
152        self.add_keep_entry(entry);
153        self
154    }
155
156    /// Adds multiple entries to the 'keep' set.
157    #[inline]
158    pub fn add_keep_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
159        self.keep.extend(entries);
160    }
161
162    /// Returns a new instance with multiple entries added to the 'keep' set.
163    #[inline]
164    pub fn with_added_keep_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
165        self.add_keep_entries(entries);
166        self
167    }
168
169    /// Clears the 'keep' set.
170    #[inline]
171    pub fn unset_keep(&mut self) {
172        self.keep.clear();
173    }
174
175    /// Returns a new instance with an empty 'keep' set.
176    #[inline]
177    #[must_use]
178    pub fn without_keep(mut self) -> Self {
179        self.unset_keep();
180        self
181    }
182
183    /// Sets the 'changed' set, replacing the existing one.
184    #[inline]
185    pub fn set_changed(&mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) {
186        self.changed = change_set.into_iter().collect();
187    }
188
189    /// Returns a new instance with the 'changed' set replaced.
190    #[inline]
191    pub fn with_changed(mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
192        self.set_changed(change_set);
193        self
194    }
195
196    /// Adds a single entry to the 'changed' set. Returns `true` if the entry was not already present.
197    #[inline]
198    pub fn add_changed_entry(&mut self, entry: SymbolIdentifier) -> bool {
199        self.changed.insert(entry)
200    }
201
202    /// Checks if the 'changed' set contains a specific entry.
203    #[inline]
204    #[must_use]
205    pub fn contains_changed_entry(&self, entry: &SymbolIdentifier) -> bool {
206        self.changed.contains(entry)
207    }
208
209    /// Returns a new instance with the entry added to the 'changed' set.
210    #[inline]
211    #[must_use]
212    pub fn with_added_changed_entry(mut self, entry: SymbolIdentifier) -> Self {
213        self.add_changed_entry(entry);
214        self
215    }
216
217    /// Adds multiple entries to the 'changed' set.
218    #[inline]
219    pub fn add_changed_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
220        self.changed.extend(entries);
221    }
222
223    /// Returns a new instance with multiple entries added to the 'changed' set.
224    #[inline]
225    pub fn with_added_changed_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
226        self.add_changed_entries(entries);
227        self
228    }
229
230    /// Clears the 'changed' set.
231    #[inline]
232    pub fn unset_changed(&mut self) {
233        self.changed.clear();
234    }
235
236    /// Returns a new instance with an empty 'changed' set.
237    #[inline]
238    #[must_use]
239    pub fn without_changed(mut self) -> Self {
240        self.unset_changed();
241        self
242    }
243
244    /// Sets the diff map, replacing the existing one.
245    #[inline]
246    pub fn set_diff_map(&mut self, map: HashMap<FileId, Vec<DiffHunk>>) {
247        self.diff_map = map;
248    }
249
250    /// Returns a new instance with the diff map replaced.
251    #[inline]
252    #[must_use]
253    pub fn with_diff_map(mut self, map: HashMap<FileId, Vec<DiffHunk>>) -> Self {
254        self.set_diff_map(map);
255        self
256    }
257
258    /// Adds or replaces the diff hunks for a specific source file. Returns previous hunks if any.
259    #[inline]
260    pub fn add_diff_map_entry(&mut self, source: FileId, diffs: Vec<DiffHunk>) -> Option<Vec<DiffHunk>> {
261        self.diff_map.insert(source, diffs)
262    }
263
264    /// Returns a new instance with the diff hunks for the source file added or updated.
265    #[inline]
266    #[must_use]
267    pub fn with_added_diff_map_entry(mut self, source: FileId, diffs: Vec<DiffHunk>) -> Self {
268        self.add_diff_map_entry(source, diffs);
269        self
270    }
271
272    /// Extends the diff hunks for a specific source file.
273    #[inline]
274    pub fn add_diffs_for_source(&mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) {
275        self.diff_map.entry(source).or_default().extend(diffs);
276    }
277
278    /// Returns a new instance with the diff hunks for the source file extended.
279    #[inline]
280    pub fn with_added_diffs_for_source(mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) -> Self {
281        self.add_diffs_for_source(source, diffs);
282        self
283    }
284
285    /// Clears the diff map.
286    #[inline]
287    pub fn unset_diff_map(&mut self) {
288        self.diff_map.clear();
289    }
290
291    /// Returns a new instance with an empty diff map.
292    #[inline]
293    #[must_use]
294    pub fn without_diff_map(mut self) -> Self {
295        self.unset_diff_map();
296        self
297    }
298
299    /// Sets the deletion ranges map, replacing the existing one.
300    #[inline]
301    pub fn set_deletion_ranges_map(&mut self, map: HashMap<FileId, Vec<DeletionRange>>) {
302        self.deletion_ranges_map = map;
303    }
304
305    /// Returns a new instance with the deletion ranges map replaced.
306    #[inline]
307    #[must_use]
308    pub fn with_deletion_ranges_map(mut self, map: HashMap<FileId, Vec<DeletionRange>>) -> Self {
309        self.set_deletion_ranges_map(map);
310        self
311    }
312
313    /// Adds or replaces the deletion ranges for a specific source file. Returns previous ranges if any.
314    #[inline]
315    pub fn add_deletion_ranges_entry(
316        &mut self,
317        source: FileId,
318        ranges: Vec<DeletionRange>,
319    ) -> Option<Vec<DeletionRange>> {
320        self.deletion_ranges_map.insert(source, ranges)
321    }
322
323    /// Returns a new instance with the deletion ranges for the source file added or updated.
324    #[inline]
325    #[must_use]
326    pub fn with_added_deletion_ranges_entry(mut self, file: FileId, ranges: Vec<DeletionRange>) -> Self {
327        self.add_deletion_ranges_entry(file, ranges);
328        self
329    }
330
331    /// Extends the deletion ranges for a specific source file.
332    #[inline]
333    pub fn add_deletion_ranges_for_source(&mut self, file: FileId, ranges: impl IntoIterator<Item = (usize, usize)>) {
334        self.deletion_ranges_map.entry(file).or_default().extend(ranges);
335    }
336
337    /// Returns a new instance with the deletion ranges for the source file extended.
338    #[inline]
339    pub fn with_added_deletion_ranges_for_source(
340        mut self,
341        file: FileId,
342        ranges: impl IntoIterator<Item = (usize, usize)>,
343    ) -> Self {
344        self.add_deletion_ranges_for_source(file, ranges);
345        self
346    }
347
348    /// Clears the deletion ranges map.
349    #[inline]
350    pub fn unset_deletion_ranges_map(&mut self) {
351        self.deletion_ranges_map.clear();
352    }
353
354    /// Returns a new instance with an empty deletion ranges map.
355    #[inline]
356    #[must_use]
357    pub fn without_deletion_ranges_map(mut self) -> Self {
358        self.unset_deletion_ranges_map();
359        self
360    }
361}