mago_codex/
diff.rs

1use ahash::HashMap;
2use ahash::HashSet;
3use serde::Deserialize;
4use serde::Serialize;
5
6use mago_database::file::FileId;
7
8use crate::symbol::SymbolIdentifier;
9
10/// Represents a text diff hunk with position and offset information.
11///
12/// Format: `(old_start, old_length, line_offset, column_offset)`
13/// - `old_start`: Starting byte offset in the old version
14/// - `old_length`: Length of the changed region in bytes
15/// - `line_offset`: Line number change (new_line - old_line)
16/// - `column_offset`: Column number change (new_column - old_column)
17pub type DiffHunk = (usize, usize, isize, isize);
18
19/// Represents a range of deleted code.
20///
21/// Format: `(start_offset, end_offset)`
22/// - `start_offset`: Starting byte offset of deletion
23/// - `end_offset`: Ending byte offset of deletion
24pub type DeletionRange = (usize, usize);
25
26/// Represents the differences between two states of a codebase, typically used for incremental analysis.
27///
28/// This structure uses a single fingerprint hash per symbol to determine changes. Any change to a symbol
29/// (signature, body, modifiers, attributes) produces a different hash, triggering re-analysis.
30///
31/// Provides a comprehensive API for modification and querying following established conventions.
32#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct CodebaseDiff {
34    /// Set of `(Symbol, Member)` pairs whose fingerprint hash is UNCHANGED.
35    /// These symbols can be safely skipped during re-analysis.
36    /// Member is empty for top-level symbols.
37    keep: HashSet<SymbolIdentifier>,
38
39    /// Set of `(Symbol, Member)` pairs that are new, deleted, or have a different fingerprint hash.
40    /// These symbols MUST be re-analyzed.
41    /// Member is empty for top-level symbols.
42    changed: HashSet<SymbolIdentifier>,
43
44    /// Map from source file identifier to a vector of text diff hunks.
45    /// Used for mapping issue positions between old and new code.
46    diff_map: HashMap<FileId, Vec<DiffHunk>>,
47
48    /// Map from source file identifier to a vector of deleted code ranges.
49    /// Used for filtering out issues in deleted code regions.
50    deletion_ranges_map: HashMap<FileId, Vec<DeletionRange>>,
51}
52
53impl CodebaseDiff {
54    #[inline]
55    pub fn new() -> Self {
56        Self::default()
57    }
58
59    /// Merges changes from another `CodebaseDiff` into this one.
60    #[inline]
61    pub fn extend(&mut self, other: Self) {
62        self.keep.extend(other.keep);
63        self.changed.extend(other.changed);
64        for (source, diffs) in other.diff_map {
65            self.diff_map.entry(source).or_default().extend(diffs);
66        }
67        for (source, ranges) in other.deletion_ranges_map {
68            self.deletion_ranges_map.entry(source).or_default().extend(ranges);
69        }
70    }
71
72    /// Returns a reference to the set of symbols/members to keep unchanged.
73    #[inline]
74    pub fn get_keep(&self) -> &HashSet<SymbolIdentifier> {
75        &self.keep
76    }
77
78    /// Returns a reference to the set of changed symbols/members.
79    #[inline]
80    pub fn get_changed(&self) -> &HashSet<SymbolIdentifier> {
81        &self.changed
82    }
83
84    /// Returns a reference to the map of source files to text diff hunks.
85    #[inline]
86    pub fn get_diff_map(&self) -> &HashMap<FileId, Vec<DiffHunk>> {
87        &self.diff_map
88    }
89
90    /// Returns a reference to the map of source files to deletion ranges.
91    #[inline]
92    pub fn get_deletion_ranges_map(&self) -> &HashMap<FileId, Vec<DeletionRange>> {
93        &self.deletion_ranges_map
94    }
95
96    /// Sets the 'keep' set, replacing the existing one.
97    #[inline]
98    pub fn set_keep(&mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) {
99        self.keep = keep_set.into_iter().collect();
100    }
101
102    /// Returns a new instance with the 'keep' set replaced.
103    #[inline]
104    pub fn with_keep(mut self, keep_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
105        self.set_keep(keep_set);
106        self
107    }
108
109    /// Adds a single entry to the 'keep' set. Returns `true` if the entry was not already present.
110    #[inline]
111    pub fn add_keep_entry(&mut self, entry: SymbolIdentifier) -> bool {
112        self.keep.insert(entry)
113    }
114
115    /// Returns a new instance with the entry added to the 'keep' set.
116    #[inline]
117    pub fn with_added_keep_entry(mut self, entry: SymbolIdentifier) -> Self {
118        self.add_keep_entry(entry);
119        self
120    }
121
122    /// Adds multiple entries to the 'keep' set.
123    #[inline]
124    pub fn add_keep_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
125        self.keep.extend(entries);
126    }
127
128    /// Returns a new instance with multiple entries added to the 'keep' set.
129    #[inline]
130    pub fn with_added_keep_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
131        self.add_keep_entries(entries);
132        self
133    }
134
135    /// Clears the 'keep' set.
136    #[inline]
137    pub fn unset_keep(&mut self) {
138        self.keep.clear();
139    }
140
141    /// Returns a new instance with an empty 'keep' set.
142    #[inline]
143    pub fn without_keep(mut self) -> Self {
144        self.unset_keep();
145        self
146    }
147
148    /// Sets the 'changed' set, replacing the existing one.
149    #[inline]
150    pub fn set_changed(&mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) {
151        self.changed = change_set.into_iter().collect();
152    }
153
154    /// Returns a new instance with the 'changed' set replaced.
155    #[inline]
156    pub fn with_changed(mut self, change_set: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
157        self.set_changed(change_set);
158        self
159    }
160
161    /// Adds a single entry to the 'changed' set. Returns `true` if the entry was not already present.
162    #[inline]
163    pub fn add_changed_entry(&mut self, entry: SymbolIdentifier) -> bool {
164        self.changed.insert(entry)
165    }
166
167    /// Checks if the 'changed' set contains a specific entry.
168    #[inline]
169    pub fn contains_changed_entry(&self, entry: &SymbolIdentifier) -> bool {
170        self.changed.contains(entry)
171    }
172
173    /// Returns a new instance with the entry added to the 'changed' set.
174    #[inline]
175    pub fn with_added_changed_entry(mut self, entry: SymbolIdentifier) -> Self {
176        self.add_changed_entry(entry);
177        self
178    }
179
180    /// Adds multiple entries to the 'changed' set.
181    #[inline]
182    pub fn add_changed_entries(&mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) {
183        self.changed.extend(entries);
184    }
185
186    /// Returns a new instance with multiple entries added to the 'changed' set.
187    #[inline]
188    pub fn with_added_changed_entries(mut self, entries: impl IntoIterator<Item = SymbolIdentifier>) -> Self {
189        self.add_changed_entries(entries);
190        self
191    }
192
193    /// Clears the 'changed' set.
194    #[inline]
195    pub fn unset_changed(&mut self) {
196        self.changed.clear();
197    }
198
199    /// Returns a new instance with an empty 'changed' set.
200    #[inline]
201    pub fn without_changed(mut self) -> Self {
202        self.unset_changed();
203        self
204    }
205
206    /// Sets the diff map, replacing the existing one.
207    #[inline]
208    pub fn set_diff_map(&mut self, map: HashMap<FileId, Vec<DiffHunk>>) {
209        self.diff_map = map;
210    }
211
212    /// Returns a new instance with the diff map replaced.
213    #[inline]
214    pub fn with_diff_map(mut self, map: HashMap<FileId, Vec<DiffHunk>>) -> Self {
215        self.set_diff_map(map);
216        self
217    }
218
219    /// Adds or replaces the diff hunks for a specific source file. Returns previous hunks if any.
220    #[inline]
221    pub fn add_diff_map_entry(&mut self, source: FileId, diffs: Vec<DiffHunk>) -> Option<Vec<DiffHunk>> {
222        self.diff_map.insert(source, diffs)
223    }
224
225    /// Returns a new instance with the diff hunks for the source file added or updated.
226    #[inline]
227    pub fn with_added_diff_map_entry(mut self, source: FileId, diffs: Vec<DiffHunk>) -> Self {
228        self.add_diff_map_entry(source, diffs);
229        self
230    }
231
232    /// Extends the diff hunks for a specific source file.
233    #[inline]
234    pub fn add_diffs_for_source(&mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) {
235        self.diff_map.entry(source).or_default().extend(diffs);
236    }
237
238    /// Returns a new instance with the diff hunks for the source file extended.
239    #[inline]
240    pub fn with_added_diffs_for_source(mut self, source: FileId, diffs: impl IntoIterator<Item = DiffHunk>) -> Self {
241        self.add_diffs_for_source(source, diffs);
242        self
243    }
244
245    /// Clears the diff map.
246    #[inline]
247    pub fn unset_diff_map(&mut self) {
248        self.diff_map.clear();
249    }
250
251    /// Returns a new instance with an empty diff map.
252    #[inline]
253    pub fn without_diff_map(mut self) -> Self {
254        self.unset_diff_map();
255        self
256    }
257
258    /// Sets the deletion ranges map, replacing the existing one.
259    #[inline]
260    pub fn set_deletion_ranges_map(&mut self, map: HashMap<FileId, Vec<DeletionRange>>) {
261        self.deletion_ranges_map = map;
262    }
263
264    /// Returns a new instance with the deletion ranges map replaced.
265    #[inline]
266    pub fn with_deletion_ranges_map(mut self, map: HashMap<FileId, Vec<DeletionRange>>) -> Self {
267        self.set_deletion_ranges_map(map);
268        self
269    }
270
271    /// Adds or replaces the deletion ranges for a specific source file. Returns previous ranges if any.
272    #[inline]
273    pub fn add_deletion_ranges_entry(
274        &mut self,
275        source: FileId,
276        ranges: Vec<DeletionRange>,
277    ) -> Option<Vec<DeletionRange>> {
278        self.deletion_ranges_map.insert(source, ranges)
279    }
280
281    /// Returns a new instance with the deletion ranges for the source file added or updated.
282    #[inline]
283    pub fn with_added_deletion_ranges_entry(mut self, file: FileId, ranges: Vec<DeletionRange>) -> Self {
284        self.add_deletion_ranges_entry(file, ranges);
285        self
286    }
287
288    /// Extends the deletion ranges for a specific source file.
289    #[inline]
290    pub fn add_deletion_ranges_for_source(&mut self, file: FileId, ranges: impl IntoIterator<Item = (usize, usize)>) {
291        self.deletion_ranges_map.entry(file).or_default().extend(ranges);
292    }
293
294    /// Returns a new instance with the deletion ranges for the source file extended.
295    #[inline]
296    pub fn with_added_deletion_ranges_for_source(
297        mut self,
298        file: FileId,
299        ranges: impl IntoIterator<Item = (usize, usize)>,
300    ) -> Self {
301        self.add_deletion_ranges_for_source(file, ranges);
302        self
303    }
304
305    /// Clears the deletion ranges map.
306    #[inline]
307    pub fn unset_deletion_ranges_map(&mut self) {
308        self.deletion_ranges_map.clear();
309    }
310
311    /// Returns a new instance with an empty deletion ranges map.
312    #[inline]
313    pub fn without_deletion_ranges_map(mut self) -> Self {
314        self.unset_deletion_ranges_map();
315        self
316    }
317}