mago_codex/
diff.rs

1use ahash::HashMap;
2use ahash::HashSet;
3use serde::Deserialize;
4use serde::Serialize;
5
6use mago_database::file::FileId;
7
8use crate::symbol::SymbolIdentifier;
9
10/// Represents a text diff hunk with position and offset information.
11///
12/// Format: `(old_start, old_length, line_offset, column_offset)`
13/// - `old_start`: Starting byte offset in the old version
14/// - `old_length`: Length of the changed region in bytes
15/// - `line_offset`: Line number change (`new_line` - `old_line`)
16/// - `column_offset`: Column number change (`new_column` - `old_column`)
17pub type DiffHunk = (usize, usize, isize, isize);
18
19/// Represents a range of deleted code.
20///
21/// Format: `(start_offset, end_offset)`
22/// - `start_offset`: Starting byte offset of deletion
23/// - `end_offset`: Ending byte offset of deletion
24pub type DeletionRange = (usize, usize);
25
26/// Represents the differences between two states of a codebase, typically used for incremental analysis.
27///
28/// This structure uses a single fingerprint hash per symbol to determine changes. Any change to a symbol
29/// (signature, body, modifiers, attributes) produces a different hash, triggering re-analysis.
30///
31/// Provides a comprehensive API for modification and querying following established conventions.
32#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct CodebaseDiff {
34    /// Set of `(Symbol, Member)` pairs whose fingerprint hash is UNCHANGED.
35    /// These symbols can be safely skipped during re-analysis.
36    /// Member is empty for top-level symbols.
37    keep: HashSet<SymbolIdentifier>,
38
39    /// Set of `(Symbol, Member)` pairs that are new, deleted, or have a different fingerprint hash.
40    /// These symbols MUST be re-analyzed.
41    /// Member is empty for top-level symbols.
42    changed: HashSet<SymbolIdentifier>,
43
44    /// Map from source file identifier to a vector of text diff hunks.
45    /// Used for mapping issue positions between old and new code.
46    diff_map: HashMap<FileId, Vec<DiffHunk>>,
47
48    /// Map from source file identifier to a vector of deleted code ranges.
49    /// Used for filtering out issues in deleted code regions.
50    deletion_ranges_map: HashMap<FileId, Vec<DeletionRange>>,
51}
52
53impl CodebaseDiff {
54    #[inline]
55    #[must_use]
56    pub fn new() -> Self {
57        Self::default()
58    }
59
60    /// Merges changes from another `CodebaseDiff` into this one.
61    #[inline]
62    pub fn extend(&mut self, other: Self) {
63        self.keep.extend(other.keep);
64        self.changed.extend(other.changed);
65        for (source, diffs) in other.diff_map {
66            self.diff_map.entry(source).or_default().extend(diffs);
67        }
68        for (source, ranges) in other.deletion_ranges_map {
69            self.deletion_ranges_map.entry(source).or_default().extend(ranges);
70        }
71    }
72
73    /// Returns a reference to the set of symbols/members to keep unchanged.
74    #[inline]
75    #[must_use]
76    pub fn get_keep(&self) -> &HashSet<SymbolIdentifier> {
77        &self.keep
78    }
79
80    /// Returns a reference to the set of changed symbols/members.
81    #[inline]
82    #[must_use]
83    pub fn get_changed(&self) -> &HashSet<SymbolIdentifier> {
84        &self.changed
85    }
86
87    /// Returns a reference to the map of source files to text diff hunks.
88    #[inline]
89    #[must_use]
90    pub fn get_diff_map(&self) -> &HashMap<FileId, Vec<DiffHunk>> {
91        &self.diff_map
92    }
93
94    /// Returns a reference to the map of source files to deletion ranges.
95    #[inline]
96    #[must_use]
97    pub fn get_deletion_ranges_map(&self) -> &HashMap<FileId, Vec<DeletionRange>> {
98        &self.deletion_ranges_map
99    }
100
101    /// Sets the 'keep' set, replacing the existing one.
102    #[inline]
103    pub fn set_keep(&mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) {
104        self.keep = keep_set.into_iter().collect();
105    }
106
107    /// Returns a new instance with the 'keep' set replaced.
108    #[inline]
109    pub fn with_keep(mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
110        self.set_keep(keep_set);
111        self
112    }
113
114    /// Adds a single entry to the 'keep' set. Returns `true` if the entry was not already present.
115    #[inline]
116    pub fn add_keep_entry(&mut self, entry: SymbolIdentifier) -> bool {
117        self.keep.insert(entry)
118    }
119
120    /// Returns a new instance with the entry added to the 'keep' set.
121    #[inline]
122    #[must_use]
123    pub fn with_added_keep_entry(mut self, entry: SymbolIdentifier) -> Self {
124        self.add_keep_entry(entry);
125        self
126    }
127
128    /// Adds multiple entries to the 'keep' set.
129    #[inline]
130    pub fn add_keep_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
131        self.keep.extend(entries);
132    }
133
134    /// Returns a new instance with multiple entries added to the 'keep' set.
135    #[inline]
136    pub fn with_added_keep_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
137        self.add_keep_entries(entries);
138        self
139    }
140
141    /// Clears the 'keep' set.
142    #[inline]
143    pub fn unset_keep(&mut self) {
144        self.keep.clear();
145    }
146
147    /// Returns a new instance with an empty 'keep' set.
148    #[inline]
149    #[must_use]
150    pub fn without_keep(mut self) -> Self {
151        self.unset_keep();
152        self
153    }
154
155    /// Sets the 'changed' set, replacing the existing one.
156    #[inline]
157    pub fn set_changed(&mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) {
158        self.changed = change_set.into_iter().collect();
159    }
160
161    /// Returns a new instance with the 'changed' set replaced.
162    #[inline]
163    pub fn with_changed(mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
164        self.set_changed(change_set);
165        self
166    }
167
168    /// Adds a single entry to the 'changed' set. Returns `true` if the entry was not already present.
169    #[inline]
170    pub fn add_changed_entry(&mut self, entry: SymbolIdentifier) -> bool {
171        self.changed.insert(entry)
172    }
173
174    /// Checks if the 'changed' set contains a specific entry.
175    #[inline]
176    #[must_use]
177    pub fn contains_changed_entry(&self, entry: &SymbolIdentifier) -> bool {
178        self.changed.contains(entry)
179    }
180
181    /// Returns a new instance with the entry added to the 'changed' set.
182    #[inline]
183    #[must_use]
184    pub fn with_added_changed_entry(mut self, entry: SymbolIdentifier) -> Self {
185        self.add_changed_entry(entry);
186        self
187    }
188
189    /// Adds multiple entries to the 'changed' set.
190    #[inline]
191    pub fn add_changed_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
192        self.changed.extend(entries);
193    }
194
195    /// Returns a new instance with multiple entries added to the 'changed' set.
196    #[inline]
197    pub fn with_added_changed_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
198        self.add_changed_entries(entries);
199        self
200    }
201
202    /// Clears the 'changed' set.
203    #[inline]
204    pub fn unset_changed(&mut self) {
205        self.changed.clear();
206    }
207
208    /// Returns a new instance with an empty 'changed' set.
209    #[inline]
210    #[must_use]
211    pub fn without_changed(mut self) -> Self {
212        self.unset_changed();
213        self
214    }
215
216    /// Sets the diff map, replacing the existing one.
217    #[inline]
218    pub fn set_diff_map(&mut self, map: HashMap<FileId, Vec<DiffHunk>>) {
219        self.diff_map = map;
220    }
221
222    /// Returns a new instance with the diff map replaced.
223    #[inline]
224    #[must_use]
225    pub fn with_diff_map(mut self, map: HashMap<FileId, Vec<DiffHunk>>) -> Self {
226        self.set_diff_map(map);
227        self
228    }
229
230    /// Adds or replaces the diff hunks for a specific source file. Returns previous hunks if any.
231    #[inline]
232    pub fn add_diff_map_entry(&mut self, source: FileId, diffs: Vec<DiffHunk>) -> Option<Vec<DiffHunk>> {
233        self.diff_map.insert(source, diffs)
234    }
235
236    /// Returns a new instance with the diff hunks for the source file added or updated.
237    #[inline]
238    #[must_use]
239    pub fn with_added_diff_map_entry(mut self, source: FileId, diffs: Vec<DiffHunk>) -> Self {
240        self.add_diff_map_entry(source, diffs);
241        self
242    }
243
244    /// Extends the diff hunks for a specific source file.
245    #[inline]
246    pub fn add_diffs_for_source(&mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) {
247        self.diff_map.entry(source).or_default().extend(diffs);
248    }
249
250    /// Returns a new instance with the diff hunks for the source file extended.
251    #[inline]
252    pub fn with_added_diffs_for_source(mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) -> Self {
253        self.add_diffs_for_source(source, diffs);
254        self
255    }
256
257    /// Clears the diff map.
258    #[inline]
259    pub fn unset_diff_map(&mut self) {
260        self.diff_map.clear();
261    }
262
263    /// Returns a new instance with an empty diff map.
264    #[inline]
265    #[must_use]
266    pub fn without_diff_map(mut self) -> Self {
267        self.unset_diff_map();
268        self
269    }
270
271    /// Sets the deletion ranges map, replacing the existing one.
272    #[inline]
273    pub fn set_deletion_ranges_map(&mut self, map: HashMap<FileId, Vec<DeletionRange>>) {
274        self.deletion_ranges_map = map;
275    }
276
277    /// Returns a new instance with the deletion ranges map replaced.
278    #[inline]
279    #[must_use]
280    pub fn with_deletion_ranges_map(mut self, map: HashMap<FileId, Vec<DeletionRange>>) -> Self {
281        self.set_deletion_ranges_map(map);
282        self
283    }
284
285    /// Adds or replaces the deletion ranges for a specific source file. Returns previous ranges if any.
286    #[inline]
287    pub fn add_deletion_ranges_entry(
288        &mut self,
289        source: FileId,
290        ranges: Vec<DeletionRange>,
291    ) -> Option<Vec<DeletionRange>> {
292        self.deletion_ranges_map.insert(source, ranges)
293    }
294
295    /// Returns a new instance with the deletion ranges for the source file added or updated.
296    #[inline]
297    #[must_use]
298    pub fn with_added_deletion_ranges_entry(mut self, file: FileId, ranges: Vec<DeletionRange>) -> Self {
299        self.add_deletion_ranges_entry(file, ranges);
300        self
301    }
302
303    /// Extends the deletion ranges for a specific source file.
304    #[inline]
305    pub fn add_deletion_ranges_for_source(&mut self, file: FileId, ranges: impl IntoIterator<Item = (usize, usize)>) {
306        self.deletion_ranges_map.entry(file).or_default().extend(ranges);
307    }
308
309    /// Returns a new instance with the deletion ranges for the source file extended.
310    #[inline]
311    pub fn with_added_deletion_ranges_for_source(
312        mut self,
313        file: FileId,
314        ranges: impl IntoIterator<Item = (usize, usize)>,
315    ) -> Self {
316        self.add_deletion_ranges_for_source(file, ranges);
317        self
318    }
319
320    /// Clears the deletion ranges map.
321    #[inline]
322    pub fn unset_deletion_ranges_map(&mut self) {
323        self.deletion_ranges_map.clear();
324    }
325
326    /// Returns a new instance with an empty deletion ranges map.
327    #[inline]
328    #[must_use]
329    pub fn without_deletion_ranges_map(mut self) -> Self {
330        self.unset_deletion_ranges_map();
331        self
332    }
333}