Skip to main content

perl_parser_core/syntax/
edit.rs

1//! Edit tracking for incremental parsing
2//!
3//! This module provides types and algorithms for tracking edits to source code
4//! and applying them to an existing parse tree.
5
6use perl_position_tracking::{Position, Range};
7
8/// Represents an edit to the source text
9#[derive(Debug, Clone, PartialEq)]
10pub struct Edit {
11    /// Start byte offset of the edit
12    pub start_byte: usize,
13    /// End byte offset of the text being replaced (in old source)
14    pub old_end_byte: usize,
15    /// End byte offset after the edit (in new source)
16    pub new_end_byte: usize,
17    /// Start position (line/column)
18    pub start_position: Position,
19    /// Old end position before edit
20    pub old_end_position: Position,
21    /// New end position after edit
22    pub new_end_position: Position,
23}
24
25impl Edit {
26    /// Create a new edit
27    pub fn new(
28        start_byte: usize,
29        old_end_byte: usize,
30        new_end_byte: usize,
31        start_position: Position,
32        old_end_position: Position,
33        new_end_position: Position,
34    ) -> Self {
35        Edit {
36            start_byte,
37            old_end_byte,
38            new_end_byte,
39            start_position,
40            old_end_position,
41            new_end_position,
42        }
43    }
44
45    /// Calculate the byte shift caused by this edit
46    pub fn byte_shift(&self) -> isize {
47        self.new_end_byte as isize - self.old_end_byte as isize
48    }
49
50    /// Calculate the line shift caused by this edit
51    pub fn line_shift(&self) -> i32 {
52        self.new_end_position.line as i32 - self.old_end_position.line as i32
53    }
54
55    /// Check if a byte position is affected by this edit
56    pub fn affects_byte(&self, byte: usize) -> bool {
57        byte >= self.start_byte
58    }
59
60    /// Check if a range overlaps with this edit
61    pub fn overlaps_range(&self, range: &Range) -> bool {
62        range.start.byte < self.old_end_byte && range.end.byte > self.start_byte
63    }
64
65    /// Apply this edit to a position
66    pub fn apply_to_position(&self, pos: Position) -> Option<Position> {
67        if pos.byte < self.start_byte {
68            // Position is before the edit - unchanged
69            Some(pos)
70        } else if pos.byte >= self.old_end_byte {
71            // Position is after the edit - shift it
72            Some(Position {
73                byte: (pos.byte as isize + self.byte_shift()) as usize,
74                line: (pos.line as i32 + self.line_shift()) as u32,
75                column: if pos.line == self.old_end_position.line {
76                    // Same line as edit end - adjust column
77                    let col_shift =
78                        self.new_end_position.column as i32 - self.old_end_position.column as i32;
79                    (pos.column as i32 + col_shift) as u32
80                } else {
81                    // Different line - column unchanged
82                    pos.column
83                },
84            })
85        } else {
86            // Position is within the edit - invalidate
87            None
88        }
89    }
90
91    /// Apply this edit to a range
92    pub fn apply_to_range(&self, range: &Range) -> Option<Range> {
93        let new_start = self.apply_to_position(range.start)?;
94        let new_end = self.apply_to_position(range.end)?;
95        Some(Range::new(new_start, new_end))
96    }
97}
98
99/// Collection of edits that can be applied together
100#[derive(Debug, Clone, Default)]
101pub struct EditSet {
102    pub(crate) edits: Vec<Edit>,
103}
104
105impl EditSet {
106    /// Create a new empty edit set
107    pub fn new() -> Self {
108        EditSet { edits: Vec::new() }
109    }
110
111    /// Add an edit to the set
112    pub fn add(&mut self, edit: Edit) {
113        // Keep edits sorted by start position
114        let pos = self
115            .edits
116            .iter()
117            .position(|e| e.start_byte > edit.start_byte)
118            .unwrap_or(self.edits.len());
119        self.edits.insert(pos, edit);
120    }
121
122    /// Apply all edits to a position
123    pub fn apply_to_position(&self, mut pos: Position) -> Option<Position> {
124        for edit in &self.edits {
125            pos = edit.apply_to_position(pos)?;
126        }
127        Some(pos)
128    }
129
130    /// Apply all edits to a range
131    pub fn apply_to_range(&self, mut range: Range) -> Option<Range> {
132        for edit in &self.edits {
133            range = edit.apply_to_range(&range)?;
134        }
135        Some(range)
136    }
137
138    /// Returns the number of edits in the set.
139    pub fn len(&self) -> usize {
140        self.edits.len()
141    }
142
143    /// Returns true if the edit set is empty.
144    pub fn is_empty(&self) -> bool {
145        self.edits.is_empty()
146    }
147
148    /// Provides read-only access to the underlying edits.
149    pub fn edits(&self) -> &[Edit] {
150        &self.edits
151    }
152
153    /// Check if a range is affected by any edit
154    pub fn affects_range(&self, range: &Range) -> bool {
155        self.edits.iter().any(|edit| edit.overlaps_range(range))
156    }
157
158    /// Get the total byte shift at a given position
159    pub fn byte_shift_at(&self, byte: usize) -> isize {
160        self.edits
161            .iter()
162            .filter(|edit| edit.old_end_byte <= byte)
163            .map(|edit| edit.byte_shift())
164            .sum()
165    }
166
167    /// Get all ranges affected by the edits
168    pub fn affected_ranges(&self) -> Vec<Range> {
169        self.edits
170            .iter()
171            .map(|edit| {
172                let mut range = Range::new(edit.start_position, edit.old_end_position);
173                if range.is_empty() {
174                    // Pure insertions have an empty old range, but incremental reuse still
175                    // needs a small invalidation window around the insertion boundary.
176                    //
177                    // Use byte-only context here: overlap checks in incremental reuse are
178                    // byte-based, so line/column fields are not consulted.
179                    let start_byte = range.start.byte.saturating_sub(1);
180                    let end_byte = range.end.byte.saturating_add(1);
181                    range = Range::new(
182                        Position::new(start_byte, range.start.line, range.start.column),
183                        Position::new(end_byte, range.end.line, range.end.column),
184                    );
185                }
186                range
187            })
188            .collect()
189    }
190
191    /// Get affected ranges with overlapping and adjacent regions coalesced.
192    ///
193    /// This is useful for incremental parsing workflows that only need the
194    /// minimal set of invalidation windows.
195    pub fn coalesced_affected_ranges(&self) -> Vec<Range> {
196        let mut ranges = self.affected_ranges();
197        if ranges.len() <= 1 {
198            return ranges;
199        }
200
201        ranges.sort_by_key(|range| range.start.byte);
202
203        let mut merged = Vec::with_capacity(ranges.len());
204        let mut current = ranges[0];
205        for range in ranges.into_iter().skip(1) {
206            if range.start.byte <= current.end.byte {
207                if range.end.byte > current.end.byte {
208                    current.end = range.end;
209                }
210            } else {
211                merged.push(current);
212                current = range;
213            }
214        }
215        merged.push(current);
216        merged
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use perl_tdd_support::must_some;
224
225    #[test]
226    fn test_simple_edit() {
227        // Replace "hello" with "goodbye" at position 10
228        let edit = Edit::new(
229            10,
230            15,
231            17,
232            Position::new(10, 2, 5),
233            Position::new(15, 2, 10),
234            Position::new(17, 2, 12),
235        );
236
237        assert_eq!(edit.byte_shift(), 2);
238        assert_eq!(edit.line_shift(), 0);
239
240        // Position before edit - unchanged
241        let pos = Position::new(5, 1, 5);
242        assert_eq!(edit.apply_to_position(pos), Some(pos));
243
244        // Position after edit - shifted
245        let pos = Position::new(20, 2, 15);
246        let new_pos = must_some(edit.apply_to_position(pos));
247        assert_eq!(new_pos.byte, 22);
248        assert_eq!(new_pos.column, 17);
249    }
250
251    #[test]
252    fn test_multiline_edit() {
253        // Replace multiple lines
254        let edit = Edit::new(
255            10,
256            30,
257            20,
258            Position::new(10, 2, 5),
259            Position::new(30, 4, 10),
260            Position::new(20, 2, 15),
261        );
262
263        assert_eq!(edit.byte_shift(), -10);
264        assert_eq!(edit.line_shift(), -2);
265
266        // Position on later line - shifted
267        let pos = Position::new(50, 6, 5);
268        let new_pos = must_some(edit.apply_to_position(pos));
269        assert_eq!(new_pos.byte, 40);
270        assert_eq!(new_pos.line, 4);
271        assert_eq!(new_pos.column, 5);
272    }
273
274    #[test]
275    fn test_edit_set() {
276        let mut edits = EditSet::new();
277
278        // Add two non-overlapping edits
279        edits.add(Edit::new(
280            10,
281            15,
282            17,
283            Position::new(10, 2, 5),
284            Position::new(15, 2, 10),
285            Position::new(17, 2, 12),
286        ));
287
288        edits.add(Edit::new(
289            30,
290            35,
291            40,
292            Position::new(30, 3, 5),
293            Position::new(35, 3, 10),
294            Position::new(40, 3, 15),
295        ));
296
297        // Check cumulative shift
298        assert_eq!(edits.byte_shift_at(50), 7); // +2 from first, +5 from second
299    }
300
301    #[test]
302    fn test_coalesced_affected_ranges() {
303        let mut edits = EditSet::new();
304        edits.add(Edit::new(
305            10,
306            15,
307            17,
308            Position::new(10, 1, 0),
309            Position::new(15, 1, 5),
310            Position::new(17, 1, 7),
311        ));
312        edits.add(Edit::new(
313            14,
314            20,
315            21,
316            Position::new(14, 1, 4),
317            Position::new(20, 1, 10),
318            Position::new(21, 1, 11),
319        ));
320        edits.add(Edit::new(
321            30,
322            35,
323            36,
324            Position::new(30, 2, 0),
325            Position::new(35, 2, 5),
326            Position::new(36, 2, 6),
327        ));
328
329        let ranges = edits.coalesced_affected_ranges();
330        assert_eq!(ranges.len(), 2);
331        assert_eq!(ranges[0].start.byte, 10);
332        assert_eq!(ranges[0].end.byte, 20);
333        assert_eq!(ranges[1].start.byte, 30);
334        assert_eq!(ranges[1].end.byte, 35);
335    }
336
337    #[test]
338    fn test_affected_ranges_expands_zero_length_insertions() {
339        let mut edits = EditSet::new();
340        edits.add(Edit::new(
341            10,
342            10,
343            12,
344            Position::new(10, 1, 2),
345            Position::new(10, 1, 2),
346            Position::new(12, 1, 4),
347        ));
348
349        let ranges = edits.affected_ranges();
350        assert_eq!(ranges.len(), 1);
351        assert_eq!(ranges[0].start.byte, 9);
352        assert_eq!(ranges[0].end.byte, 11);
353    }
354
355    #[test]
356    fn test_affected_ranges_expands_zero_length_insertions_at_start() {
357        let mut edits = EditSet::new();
358        edits.add(Edit::new(
359            0,
360            0,
361            1,
362            Position::new(0, 0, 0),
363            Position::new(0, 0, 0),
364            Position::new(1, 0, 1),
365        ));
366
367        let ranges = edits.affected_ranges();
368        assert_eq!(ranges.len(), 1);
369        assert_eq!(ranges[0].start.byte, 0);
370        assert_eq!(ranges[0].end.byte, 1);
371    }
372}