Skip to main content

perl_parser/incremental/
incremental_edit.rs

1//! Enhanced edit structure for incremental parsing with text content
2//!
3//! This module provides an extended Edit type that includes the new text
4//! being inserted, enabling efficient incremental parsing with subtree reuse.
5
6use perl_parser_core::position::Position;
7
8/// Validation failures for an [`IncrementalEditSet`] batch.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum IncrementalEditBatchError {
11    /// An edit uses a backward range (`start_byte > old_end_byte`).
12    ///
13    /// `index` is the position of the offending edit in the **original,
14    /// unsorted** order at the time `normalize_and_validate` was called.
15    BackwardRange { index: usize, start_byte: usize, old_end_byte: usize },
16    /// Two normalized edits overlap in the old-source byte space.
17    ///
18    /// `left_index` and `right_index` are positions in the **post-sort**
19    /// order produced by [`IncrementalEditSet::sort_reverse_deterministic`],
20    /// not in the original insertion order.
21    OverlappingEdits { left_index: usize, right_index: usize },
22}
23
24/// Enhanced edit with text content for incremental parsing
25#[derive(Debug, Clone, PartialEq)]
26pub struct IncrementalEdit {
27    /// Start byte offset of the edit
28    pub start_byte: usize,
29    /// End byte offset of the text being replaced (in old source)
30    pub old_end_byte: usize,
31    /// The new text being inserted
32    pub new_text: String,
33    /// Start position (line/column)
34    pub start_position: Position,
35    /// Old end position before edit
36    pub old_end_position: Position,
37}
38
39impl IncrementalEdit {
40    /// Create a new incremental edit
41    pub fn new(start_byte: usize, old_end_byte: usize, new_text: String) -> Self {
42        IncrementalEdit {
43            start_byte,
44            old_end_byte,
45            new_text,
46            start_position: Position::new(start_byte, 0, 0),
47            old_end_position: Position::new(old_end_byte, 0, 0),
48        }
49    }
50
51    /// Create with position information
52    pub fn with_positions(
53        start_byte: usize,
54        old_end_byte: usize,
55        new_text: String,
56        start_position: Position,
57        old_end_position: Position,
58    ) -> Self {
59        IncrementalEdit { start_byte, old_end_byte, new_text, start_position, old_end_position }
60    }
61
62    /// Get the new end byte after applying this edit
63    pub fn new_end_byte(&self) -> usize {
64        self.start_byte + self.new_text.len()
65    }
66
67    /// Calculate the byte shift caused by this edit
68    pub fn byte_shift(&self) -> isize {
69        self.new_text.len() as isize - (self.old_end_byte - self.start_byte) as isize
70    }
71
72    /// Check if this edit overlaps with a byte range
73    pub fn overlaps(&self, start: usize, end: usize) -> bool {
74        self.start_byte < end && self.old_end_byte > start
75    }
76
77    /// Check if this edit is entirely before a position
78    pub fn is_before(&self, pos: usize) -> bool {
79        self.old_end_byte <= pos
80    }
81
82    /// Check if this edit is entirely after a position
83    pub fn is_after(&self, pos: usize) -> bool {
84        self.start_byte >= pos
85    }
86}
87
88/// Collection of incremental edits
89#[derive(Debug, Clone, Default)]
90pub struct IncrementalEditSet {
91    pub edits: Vec<IncrementalEdit>,
92}
93
94impl IncrementalEditSet {
95    /// Create a new empty edit set
96    pub fn new() -> Self {
97        IncrementalEditSet { edits: Vec::new() }
98    }
99
100    /// Add an edit to the set
101    pub fn add(&mut self, edit: IncrementalEdit) {
102        self.edits.push(edit);
103    }
104
105    /// Normalize edit ordering for deterministic reverse-application and validate batch shape.
106    ///
107    /// Normalization behavior:
108    /// - removes obvious no-op edits when `filter_no_ops` is `true`
109    /// - sorts edits in deterministic reverse-application order (highest byte offset first)
110    ///
111    /// Validation behavior:
112    /// - rejects backward ranges (`start_byte > old_end_byte`)
113    /// - rejects overlaps after normalization unless `allow_overlaps` is `true`
114    pub fn normalize_and_validate(
115        &mut self,
116        allow_overlaps: bool,
117        filter_no_ops: bool,
118    ) -> Result<(), IncrementalEditBatchError> {
119        for (index, edit) in self.edits.iter().enumerate() {
120            if edit.start_byte > edit.old_end_byte {
121                return Err(IncrementalEditBatchError::BackwardRange {
122                    index,
123                    start_byte: edit.start_byte,
124                    old_end_byte: edit.old_end_byte,
125                });
126            }
127        }
128
129        if filter_no_ops {
130            self.edits.retain(|edit| !edit.is_obvious_no_op());
131        }
132
133        self.sort_reverse_deterministic();
134
135        if !allow_overlaps {
136            for left in 0..self.edits.len() {
137                for right in (left + 1)..self.edits.len() {
138                    if self.edits[left]
139                        .overlaps(self.edits[right].start_byte, self.edits[right].old_end_byte)
140                    {
141                        return Err(IncrementalEditBatchError::OverlappingEdits {
142                            left_index: left,
143                            right_index: right,
144                        });
145                    }
146                }
147            }
148        }
149
150        Ok(())
151    }
152
153    /// Sort edits by position (for correct application order)
154    pub fn sort(&mut self) {
155        self.edits.sort_by_key(|e| e.start_byte);
156    }
157
158    /// Sort edits in reverse order (for applying from end to start)
159    pub fn sort_reverse(&mut self) {
160        self.edits.sort_by_key(|e| std::cmp::Reverse(e.start_byte));
161    }
162
163    /// Sort edits in a deterministic reverse-application order.
164    ///
165    /// This keeps behavior stable even when multiple edits share a start byte.
166    pub fn sort_reverse_deterministic(&mut self) {
167        self.edits.sort_by(|left, right| {
168            right
169                .start_byte
170                .cmp(&left.start_byte)
171                .then_with(|| right.old_end_byte.cmp(&left.old_end_byte))
172                .then_with(|| left.new_text.cmp(&right.new_text))
173        });
174    }
175
176    /// Check if the edit set is empty
177    pub fn is_empty(&self) -> bool {
178        self.edits.is_empty()
179    }
180
181    /// Get the total byte shift for all edits
182    pub fn total_byte_shift(&self) -> isize {
183        self.edits.iter().map(|e| e.byte_shift()).sum()
184    }
185
186    /// Apply edits to a string
187    pub fn apply_to_string(&self, source: &str) -> String {
188        if self.edits.is_empty() {
189            return source.to_string();
190        }
191
192        let Some(normalized_edits) = self.normalize_for_source(source) else {
193            return source.to_string();
194        };
195
196        let mut result = source.to_string();
197        for edit in &normalized_edits {
198            result.replace_range(edit.start_byte..edit.old_end_byte, &edit.new_text);
199        }
200
201        result
202    }
203
204    /// Validate and normalize edits for deterministic reverse-order application.
205    ///
206    /// Returns `None` when any edit is not safely mappable for the given source
207    /// (backwards range, out-of-bounds range, or non-UTF-8 boundaries), or when
208    /// non-empty ranges overlap.
209    pub fn normalize_for_source(&self, source: &str) -> Option<Vec<IncrementalEdit>> {
210        let mut indexed = Vec::with_capacity(self.edits.len());
211        for (index, edit) in self.edits.iter().enumerate() {
212            if !Self::is_edit_mappable(source, edit) {
213                return None;
214            }
215            indexed.push((index, edit.clone()));
216        }
217
218        indexed.sort_by(|(_, left), (_, right)| {
219            right
220                .start_byte
221                .cmp(&left.start_byte)
222                .then_with(|| right.old_end_byte.cmp(&left.old_end_byte))
223        });
224
225        let mut previous_non_empty: Option<&IncrementalEdit> = None;
226        for (_, edit) in &indexed {
227            if edit.start_byte == edit.old_end_byte {
228                continue;
229            }
230
231            if let Some(previous) = previous_non_empty {
232                if edit.old_end_byte > previous.start_byte {
233                    return None;
234                }
235            }
236            previous_non_empty = Some(edit);
237        }
238
239        Some(indexed.into_iter().map(|(_, edit)| edit).collect())
240    }
241
242    fn is_edit_mappable(source: &str, edit: &IncrementalEdit) -> bool {
243        if edit.start_byte > edit.old_end_byte {
244            return false;
245        }
246        if edit.old_end_byte > source.len() {
247            return false;
248        }
249        source.is_char_boundary(edit.start_byte) && source.is_char_boundary(edit.old_end_byte)
250    }
251}
252
253impl IncrementalEdit {
254    fn is_obvious_no_op(&self) -> bool {
255        self.start_byte == self.old_end_byte && self.new_text.is_empty()
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn test_incremental_edit_basic() {
265        let edit = IncrementalEdit::new(5, 10, "hello".to_string());
266        assert_eq!(edit.new_end_byte(), 10);
267        assert_eq!(edit.byte_shift(), 0);
268    }
269
270    #[test]
271    fn test_incremental_edit_insertion() {
272        let edit = IncrementalEdit::new(5, 5, "inserted".to_string());
273        assert_eq!(edit.new_end_byte(), 13);
274        assert_eq!(edit.byte_shift(), 8);
275    }
276
277    #[test]
278    fn test_incremental_edit_deletion() {
279        let edit = IncrementalEdit::new(5, 15, "".to_string());
280        assert_eq!(edit.new_end_byte(), 5);
281        assert_eq!(edit.byte_shift(), -10);
282    }
283
284    #[test]
285    fn test_incremental_edit_replacement() {
286        let edit = IncrementalEdit::new(5, 10, "replaced".to_string());
287        assert_eq!(edit.new_end_byte(), 13);
288        assert_eq!(edit.byte_shift(), 3);
289    }
290
291    #[test]
292    fn test_edit_set_apply() {
293        let mut edits = IncrementalEditSet::new();
294        edits.add(IncrementalEdit::new(0, 5, "Hello".to_string()));
295        edits.add(IncrementalEdit::new(6, 11, "Perl".to_string()));
296
297        let source = "hello world";
298        let result = edits.apply_to_string(source);
299        assert_eq!(result, "Hello Perl");
300    }
301
302    #[test]
303    fn test_apply_to_string_rejects_invalid_char_boundary_edits() {
304        let mut edits = IncrementalEditSet::new();
305        edits.add(IncrementalEdit::new(1, 2, "x".to_string()));
306
307        let source = "éa";
308        let result = edits.apply_to_string(source);
309        assert_eq!(result, source);
310    }
311
312    #[test]
313    fn test_apply_to_string_rejects_overlapping_edits() {
314        let mut edits = IncrementalEditSet::new();
315        edits.add(IncrementalEdit::new(1, 4, "x".to_string()));
316        edits.add(IncrementalEdit::new(3, 5, "y".to_string()));
317
318        let source = "abcdef";
319        let result = edits.apply_to_string(source);
320        assert_eq!(result, source);
321    }
322
323    #[test]
324    fn test_normalize_for_source_rejects_backwards_ranges() {
325        let mut edits = IncrementalEditSet::new();
326        edits.add(IncrementalEdit::new(4, 2, "x".to_string()));
327
328        assert!(edits.normalize_for_source("abcdef").is_none());
329    }
330
331    #[test]
332    fn test_normalize_for_source_rejects_overlapping_non_empty_ranges() {
333        let mut edits = IncrementalEditSet::new();
334        edits.add(IncrementalEdit::new(1, 4, "x".to_string()));
335        edits.add(IncrementalEdit::new(3, 5, "y".to_string()));
336
337        assert!(edits.normalize_for_source("abcdef").is_none());
338    }
339
340    #[test]
341    fn test_normalize_for_source_preserves_zero_width_insertions() {
342        let mut edits = IncrementalEditSet::new();
343        edits.add(IncrementalEdit::new(2, 2, "X".to_string()));
344        edits.add(IncrementalEdit::new(2, 2, "Y".to_string()));
345
346        let normalized = edits.normalize_for_source("abcd");
347        assert!(normalized.is_some());
348        if let Some(normalized) = normalized {
349            assert_eq!(normalized.len(), 2);
350            assert_eq!(normalized[0].start_byte, 2);
351            assert_eq!(normalized[1].start_byte, 2);
352        }
353    }
354}