Skip to main content

perl_parser/incremental/
incremental_edit.rs

1//! Enhanced edit structure for incremental parsing with text content
2//!
3//! This module provides an extended Edit type that includes the new text
4//! being inserted, enabling efficient incremental parsing with subtree reuse.
5
6use perl_parser_core::position::Position;
7
8/// Validation failures for an [`IncrementalEditSet`] batch.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum IncrementalEditBatchError {
11    /// An edit uses a backward range (`start_byte > old_end_byte`).
12    ///
13    /// `index` is the position of the offending edit in the **original,
14    /// unsorted** order at the time `normalize_and_validate` was called.
15    BackwardRange { index: usize, start_byte: usize, old_end_byte: usize },
16    /// Two normalized edits overlap in the old-source byte space.
17    ///
18    /// `left_index` and `right_index` are positions in the **post-sort**
19    /// order produced by [`IncrementalEditSet::sort_reverse_deterministic`],
20    /// not in the original insertion order.
21    OverlappingEdits { left_index: usize, right_index: usize },
22}
23
24/// Enhanced edit with text content for incremental parsing
25#[derive(Debug, Clone, PartialEq)]
26pub struct IncrementalEdit {
27    /// Start byte offset of the edit
28    pub start_byte: usize,
29    /// End byte offset of the text being replaced (in old source)
30    pub old_end_byte: usize,
31    /// The new text being inserted
32    pub new_text: String,
33    /// Start position (line/column)
34    pub start_position: Position,
35    /// Old end position before edit
36    pub old_end_position: Position,
37}
38
39impl IncrementalEdit {
40    /// Create a new incremental edit
41    pub fn new(start_byte: usize, old_end_byte: usize, new_text: String) -> Self {
42        IncrementalEdit {
43            start_byte,
44            old_end_byte,
45            new_text,
46            start_position: Position::new(start_byte, 0, 0),
47            old_end_position: Position::new(old_end_byte, 0, 0),
48        }
49    }
50
51    /// Create with position information
52    pub fn with_positions(
53        start_byte: usize,
54        old_end_byte: usize,
55        new_text: String,
56        start_position: Position,
57        old_end_position: Position,
58    ) -> Self {
59        IncrementalEdit { start_byte, old_end_byte, new_text, start_position, old_end_position }
60    }
61
62    /// Get the new end byte after applying this edit
63    pub fn new_end_byte(&self) -> usize {
64        self.start_byte + self.new_text.len()
65    }
66
67    /// Calculate the byte shift caused by this edit
68    pub fn byte_shift(&self) -> isize {
69        self.new_text.len() as isize - (self.old_end_byte - self.start_byte) as isize
70    }
71
72    /// Check if this edit overlaps with a byte range
73    pub fn overlaps(&self, start: usize, end: usize) -> bool {
74        self.start_byte < end && self.old_end_byte > start
75    }
76
77    /// Check if this edit is entirely before a position
78    pub fn is_before(&self, pos: usize) -> bool {
79        self.old_end_byte <= pos
80    }
81
82    /// Check if this edit is entirely after a position
83    pub fn is_after(&self, pos: usize) -> bool {
84        self.start_byte >= pos
85    }
86}
87
88/// Collection of incremental edits
89#[derive(Debug, Clone, Default)]
90pub struct IncrementalEditSet {
91    pub edits: Vec<IncrementalEdit>,
92}
93
94impl IncrementalEditSet {
95    /// Create a new empty edit set
96    pub fn new() -> Self {
97        IncrementalEditSet { edits: Vec::new() }
98    }
99
100    /// Add an edit to the set
101    pub fn add(&mut self, edit: IncrementalEdit) {
102        self.edits.push(edit);
103    }
104
105    /// Normalize edit ordering for deterministic reverse-application and validate batch shape.
106    ///
107    /// Normalization behavior:
108    /// - removes obvious no-op edits when `filter_no_ops` is `true`
109    /// - sorts edits in deterministic reverse-application order (highest byte offset first)
110    ///
111    /// Validation behavior:
112    /// - rejects backward ranges (`start_byte > old_end_byte`)
113    /// - rejects overlaps after normalization unless `allow_overlaps` is `true`
114    pub fn normalize_and_validate(
115        &mut self,
116        allow_overlaps: bool,
117        filter_no_ops: bool,
118    ) -> Result<(), IncrementalEditBatchError> {
119        for (index, edit) in self.edits.iter().enumerate() {
120            if edit.start_byte > edit.old_end_byte {
121                return Err(IncrementalEditBatchError::BackwardRange {
122                    index,
123                    start_byte: edit.start_byte,
124                    old_end_byte: edit.old_end_byte,
125                });
126            }
127        }
128
129        if filter_no_ops {
130            self.edits.retain(|edit| !edit.is_obvious_no_op());
131        }
132
133        self.sort_reverse_deterministic();
134
135        if !allow_overlaps {
136            for left in 0..self.edits.len() {
137                for right in (left + 1)..self.edits.len() {
138                    if self.edits[left]
139                        .overlaps(self.edits[right].start_byte, self.edits[right].old_end_byte)
140                    {
141                        return Err(IncrementalEditBatchError::OverlappingEdits {
142                            left_index: left,
143                            right_index: right,
144                        });
145                    }
146                }
147            }
148        }
149
150        Ok(())
151    }
152
153    /// Sort edits by position (for correct application order)
154    pub fn sort(&mut self) {
155        self.edits.sort_by_key(|e| e.start_byte);
156    }
157
158    /// Sort edits in reverse order (for applying from end to start)
159    pub fn sort_reverse(&mut self) {
160        self.edits.sort_by_key(|e| std::cmp::Reverse(e.start_byte));
161    }
162
163    /// Sort edits in a deterministic reverse-application order.
164    ///
165    /// This keeps behavior stable even when multiple edits share a start byte.
166    pub fn sort_reverse_deterministic(&mut self) {
167        self.edits.sort_by(|left, right| {
168            right
169                .start_byte
170                .cmp(&left.start_byte)
171                .then_with(|| right.old_end_byte.cmp(&left.old_end_byte))
172                .then_with(|| left.new_text.cmp(&right.new_text))
173        });
174    }
175
176    /// Check if the edit set is empty
177    pub fn is_empty(&self) -> bool {
178        self.edits.is_empty()
179    }
180
181    /// Get the total byte shift for all edits
182    pub fn total_byte_shift(&self) -> isize {
183        self.edits.iter().map(|e| e.byte_shift()).sum()
184    }
185
186    /// Apply edits to a string using deterministic fallback semantics.
187    ///
188    /// This is intentionally more tolerant than [`Self::normalize_for_source`]:
189    /// the incremental fast path rejects overlapping or unmappable batches,
190    /// while this fallback applies every edit that still maps safely in reverse
191    /// byte order and skips edits that cannot be applied without panicking.
192    pub fn apply_to_string(&self, source: &str) -> String {
193        if self.edits.is_empty() {
194            return source.to_string();
195        }
196
197        let mut edits = self.edits.clone();
198        edits.sort_by(|left, right| {
199            right
200                .start_byte
201                .cmp(&left.start_byte)
202                .then_with(|| right.old_end_byte.cmp(&left.old_end_byte))
203                .then_with(|| left.new_text.cmp(&right.new_text))
204        });
205
206        let mut result = source.to_string();
207        for edit in &edits {
208            if !Self::is_edit_mappable(&result, edit) {
209                continue;
210            }
211            result.replace_range(edit.start_byte..edit.old_end_byte, &edit.new_text);
212        }
213
214        result
215    }
216
217    /// Validate and normalize edits for deterministic reverse-order application.
218    ///
219    /// Returns `None` when any edit is not safely mappable for the given source
220    /// (backwards range, out-of-bounds range, or non-UTF-8 boundaries), or when
221    /// non-empty ranges overlap.
222    pub fn normalize_for_source(&self, source: &str) -> Option<Vec<IncrementalEdit>> {
223        let mut indexed = Vec::with_capacity(self.edits.len());
224        for (index, edit) in self.edits.iter().enumerate() {
225            if !Self::is_edit_mappable(source, edit) {
226                return None;
227            }
228            indexed.push((index, edit.clone()));
229        }
230
231        indexed.sort_by(|(_, left), (_, right)| {
232            right
233                .start_byte
234                .cmp(&left.start_byte)
235                .then_with(|| right.old_end_byte.cmp(&left.old_end_byte))
236        });
237
238        let mut previous_non_empty: Option<&IncrementalEdit> = None;
239        for (_, edit) in &indexed {
240            if edit.start_byte == edit.old_end_byte {
241                continue;
242            }
243
244            if let Some(previous) = previous_non_empty {
245                if edit.old_end_byte > previous.start_byte {
246                    return None;
247                }
248            }
249            previous_non_empty = Some(edit);
250        }
251
252        Some(indexed.into_iter().map(|(_, edit)| edit).collect())
253    }
254
255    fn is_edit_mappable(source: &str, edit: &IncrementalEdit) -> bool {
256        if edit.start_byte > edit.old_end_byte {
257            return false;
258        }
259        if edit.old_end_byte > source.len() {
260            return false;
261        }
262        source.is_char_boundary(edit.start_byte) && source.is_char_boundary(edit.old_end_byte)
263    }
264}
265
266impl IncrementalEdit {
267    fn is_obvious_no_op(&self) -> bool {
268        self.start_byte == self.old_end_byte && self.new_text.is_empty()
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_incremental_edit_basic() {
278        let edit = IncrementalEdit::new(5, 10, "hello".to_string());
279        assert_eq!(edit.new_end_byte(), 10);
280        assert_eq!(edit.byte_shift(), 0);
281    }
282
283    #[test]
284    fn test_incremental_edit_insertion() {
285        let edit = IncrementalEdit::new(5, 5, "inserted".to_string());
286        assert_eq!(edit.new_end_byte(), 13);
287        assert_eq!(edit.byte_shift(), 8);
288    }
289
290    #[test]
291    fn test_incremental_edit_deletion() {
292        let edit = IncrementalEdit::new(5, 15, "".to_string());
293        assert_eq!(edit.new_end_byte(), 5);
294        assert_eq!(edit.byte_shift(), -10);
295    }
296
297    #[test]
298    fn test_incremental_edit_replacement() {
299        let edit = IncrementalEdit::new(5, 10, "replaced".to_string());
300        assert_eq!(edit.new_end_byte(), 13);
301        assert_eq!(edit.byte_shift(), 3);
302    }
303
304    #[test]
305    fn test_edit_set_apply() {
306        let mut edits = IncrementalEditSet::new();
307        edits.add(IncrementalEdit::new(0, 5, "Hello".to_string()));
308        edits.add(IncrementalEdit::new(6, 11, "Perl".to_string()));
309
310        let source = "hello world";
311        let result = edits.apply_to_string(source);
312        assert_eq!(result, "Hello Perl");
313    }
314
315    #[test]
316    fn test_apply_to_string_rejects_invalid_char_boundary_edits() {
317        let mut edits = IncrementalEditSet::new();
318        edits.add(IncrementalEdit::new(1, 2, "x".to_string()));
319
320        let source = "éa";
321        let result = edits.apply_to_string(source);
322        assert_eq!(result, source);
323    }
324
325    #[test]
326    fn test_apply_to_string_applies_overlapping_edits_deterministically() {
327        let mut edits = IncrementalEditSet::new();
328        edits.add(IncrementalEdit::new(1, 4, "x".to_string()));
329        edits.add(IncrementalEdit::new(3, 5, "y".to_string()));
330
331        let source = "abcdef";
332        let result = edits.apply_to_string(source);
333        assert_eq!(result, "axf");
334    }
335
336    #[test]
337    fn test_normalize_for_source_rejects_backwards_ranges() {
338        let mut edits = IncrementalEditSet::new();
339        edits.add(IncrementalEdit::new(4, 2, "x".to_string()));
340
341        assert!(edits.normalize_for_source("abcdef").is_none());
342    }
343
344    #[test]
345    fn test_normalize_for_source_rejects_overlapping_non_empty_ranges() {
346        let mut edits = IncrementalEditSet::new();
347        edits.add(IncrementalEdit::new(1, 4, "x".to_string()));
348        edits.add(IncrementalEdit::new(3, 5, "y".to_string()));
349
350        assert!(edits.normalize_for_source("abcdef").is_none());
351    }
352
353    #[test]
354    fn test_normalize_for_source_preserves_zero_width_insertions() {
355        let mut edits = IncrementalEditSet::new();
356        edits.add(IncrementalEdit::new(2, 2, "X".to_string()));
357        edits.add(IncrementalEdit::new(2, 2, "Y".to_string()));
358
359        let normalized = edits.normalize_for_source("abcd");
360        assert!(normalized.is_some());
361        if let Some(normalized) = normalized {
362            assert_eq!(normalized.len(), 2);
363            assert_eq!(normalized[0].start_byte, 2);
364            assert_eq!(normalized[1].start_byte, 2);
365        }
366    }
367}