Skip to main content

perl_parser/incremental/
mod.rs

1#![allow(missing_docs)]
2
3use anyhow::Result;
4use lsp_types::Diagnostic;
5use lsp_types::TextDocumentContentChangeEvent;
6pub use perl_line_index::LineIndex;
7use ropey::Rope;
8use std::ops::Range;
9
10use perl_lexer::{LexerMode, PerlLexer, Token, TokenType};
11use perl_parser_core::ast::{Node, NodeKind, SourceLocation};
12use perl_parser_core::parser::Parser;
13
14pub mod incremental_advanced_reuse;
15#[cfg(test)]
16mod incremental_boundary_regressions;
17pub mod incremental_checkpoint;
18pub mod incremental_document;
19pub mod incremental_edit;
20pub mod incremental_handler_v2;
21pub mod incremental_integration;
22pub mod incremental_simple;
23pub mod incremental_v2;
24
25/// Stable restart points to avoid re-lexing the whole world
26#[derive(Clone, Copy, Debug)]
27pub struct LexCheckpoint {
28    pub byte: usize,
29    pub mode: LexerMode,
30    pub line: usize,
31    pub column: usize,
32}
33
34/// Scope information at a parse checkpoint
35#[derive(Clone, Debug, Default)]
36pub struct ScopeSnapshot {
37    pub package_name: String,
38    pub locals: Vec<String>,
39    pub our_vars: Vec<String>,
40    pub parent_isa: Vec<String>,
41}
42
43/// Parse checkpoint with scope context
44#[derive(Clone, Debug)]
45pub struct ParseCheckpoint {
46    pub byte: usize,
47    pub scope_snapshot: ScopeSnapshot,
48    pub node_id: usize, // ID of AST node at this point
49}
50
51/// Incremental parsing state
52#[derive(Clone)]
53pub struct IncrementalState {
54    pub rope: Rope,
55    pub line_index: LineIndex,
56    pub lex_checkpoints: Vec<LexCheckpoint>,
57    pub parse_checkpoints: Vec<ParseCheckpoint>,
58    pub ast: Node,
59    pub tokens: Vec<Token>,
60    pub source: String,
61}
62
63impl IncrementalState {
64    /// Initialize incremental state by doing a full parse of `source`.
65    pub fn new(source: String) -> Self {
66        let rope = Rope::from_str(&source);
67        let line_index = LineIndex::new(&source);
68
69        // Parse the initial document
70        let mut parser = Parser::new(&source);
71        let ast = match parser.parse() {
72            Ok(ast) => ast,
73            Err(e) => Node::new(
74                NodeKind::Error {
75                    message: e.to_string(),
76                    expected: vec![],
77                    found: None,
78                    partial: None,
79                },
80                SourceLocation { start: 0, end: source.len() },
81            ),
82        };
83
84        // Get tokens from lexer
85        let mut lexer = PerlLexer::new(&source);
86        let mut tokens = Vec::new();
87        loop {
88            match lexer.next_token() {
89                Some(token) => {
90                    if token.token_type == TokenType::EOF {
91                        break;
92                    }
93                    tokens.push(token);
94                }
95                None => break,
96            }
97        }
98
99        // Create initial checkpoints
100        let lex_checkpoints = Self::create_lex_checkpoints(&tokens, &line_index);
101        let parse_checkpoints = Self::create_parse_checkpoints(&ast);
102
103        Self { rope, line_index, lex_checkpoints, parse_checkpoints, ast, tokens, source }
104    }
105
106    /// Create lexer checkpoints at safe boundaries
107    fn create_lex_checkpoints(tokens: &[Token], line_index: &LineIndex) -> Vec<LexCheckpoint> {
108        let mut checkpoints =
109            vec![LexCheckpoint { byte: 0, mode: LexerMode::ExpectTerm, line: 0, column: 0 }];
110
111        let mut mode = LexerMode::ExpectTerm;
112
113        for token in tokens {
114            // Update mode based on token
115            mode = match token.token_type {
116                TokenType::Semicolon | TokenType::LeftBrace | TokenType::RightBrace => {
117                    // Safe boundary - reset to ExpectTerm
118                    let (line, column) = line_index.byte_to_position(token.end);
119                    checkpoints.push(LexCheckpoint {
120                        byte: token.end,
121                        mode: LexerMode::ExpectTerm,
122                        line,
123                        column,
124                    });
125                    LexerMode::ExpectTerm
126                }
127                TokenType::Keyword(ref kw) if kw.as_ref() == "sub" || kw.as_ref() == "package" => {
128                    let (line, column) = line_index.byte_to_position(token.start);
129                    checkpoints.push(LexCheckpoint {
130                        byte: token.start,
131                        mode: LexerMode::ExpectTerm, // ExpectIdentifier not available
132                        line,
133                        column,
134                    });
135                    LexerMode::ExpectTerm // ExpectIdentifier not available
136                }
137                TokenType::Identifier(_) | TokenType::Number(_) | TokenType::StringLiteral => {
138                    LexerMode::ExpectOperator
139                }
140                TokenType::Operator(_) => LexerMode::ExpectTerm,
141                _ => mode,
142            };
143        }
144
145        checkpoints
146    }
147
148    /// Create parse checkpoints at statement boundaries
149    fn create_parse_checkpoints(ast: &Node) -> Vec<ParseCheckpoint> {
150        let mut checkpoints = vec![];
151        let mut scope = ScopeSnapshot::default();
152
153        Self::walk_ast_for_checkpoints(ast, &mut checkpoints, &mut scope, 0);
154        checkpoints
155    }
156
157    fn walk_ast_for_checkpoints(
158        node: &Node,
159        checkpoints: &mut Vec<ParseCheckpoint>,
160        scope: &mut ScopeSnapshot,
161        node_id: usize,
162    ) {
163        // Process current node
164        match &node.kind {
165            NodeKind::Package { name, .. } => {
166                scope.package_name = name.clone();
167                checkpoints.push(ParseCheckpoint {
168                    byte: node.location.start,
169                    scope_snapshot: scope.clone(),
170                    node_id,
171                });
172            }
173            NodeKind::Subroutine { .. } | NodeKind::Block { .. } => {
174                checkpoints.push(ParseCheckpoint {
175                    byte: node.location.start,
176                    scope_snapshot: scope.clone(),
177                    node_id,
178                });
179            }
180            NodeKind::VariableDeclaration { variable, .. } => {
181                // Extract variable name from the variable node
182                if let NodeKind::Variable { name, sigil, .. } = &variable.kind {
183                    // Include sigil for proper variable tracking
184                    scope.locals.push(format!("{}{}", sigil, name));
185                }
186            }
187            NodeKind::VariableListDeclaration { variables, .. } => {
188                // Handle list declarations like my ($x, $y, @z)
189                for var in variables {
190                    if let NodeKind::Variable { name, sigil, .. } = &var.kind {
191                        scope.locals.push(format!("{}{}", sigil, name));
192                    }
193                }
194            }
195            _ => {}
196        }
197
198        // Recurse into children based on node kind
199        match &node.kind {
200            NodeKind::Program { statements } => {
201                for (i, stmt) in statements.iter().enumerate() {
202                    let child_id = node_id.wrapping_mul(101).wrapping_add(i);
203                    Self::walk_ast_for_checkpoints(stmt, checkpoints, scope, child_id);
204                }
205            }
206            NodeKind::Block { statements } => {
207                // Enter new scope for blocks
208                let mut local_scope = scope.clone();
209                for (i, stmt) in statements.iter().enumerate() {
210                    let child_id = node_id.wrapping_mul(101).wrapping_add(i);
211                    Self::walk_ast_for_checkpoints(stmt, checkpoints, &mut local_scope, child_id);
212                }
213            }
214            NodeKind::Subroutine { body, .. } => {
215                // Subroutine body is a single node (Block), not Vec<Node>
216                let mut local_scope = scope.clone();
217                let child_id = node_id.wrapping_mul(101);
218                Self::walk_ast_for_checkpoints(body, checkpoints, &mut local_scope, child_id);
219            }
220            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
221                let base_id = node_id.wrapping_mul(101);
222                Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
223
224                // then_branch is Box<Node>, not Option<Box<Node>>
225                Self::walk_ast_for_checkpoints(
226                    then_branch,
227                    checkpoints,
228                    scope,
229                    base_id.wrapping_add(1),
230                );
231
232                // elsif_branches is Vec<(Box<Node>, Box<Node>)>
233                for (i, (elsif_cond, elsif_block)) in elsif_branches.iter().enumerate() {
234                    let elsif_base = base_id.wrapping_add((i + 2) * 2);
235                    Self::walk_ast_for_checkpoints(elsif_cond, checkpoints, scope, elsif_base);
236                    Self::walk_ast_for_checkpoints(
237                        elsif_block,
238                        checkpoints,
239                        scope,
240                        elsif_base.wrapping_add(1),
241                    );
242                }
243                if let Some(else_br) = else_branch {
244                    let else_id = base_id.wrapping_add((elsif_branches.len() + 2) * 2);
245                    Self::walk_ast_for_checkpoints(else_br, checkpoints, scope, else_id);
246                }
247            }
248            NodeKind::While { condition, body, .. } => {
249                let base_id = node_id.wrapping_mul(101);
250                Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
251                // body is Box<Node>, not Option<Box<Node>>
252                Self::walk_ast_for_checkpoints(body, checkpoints, scope, base_id.wrapping_add(1));
253            }
254            NodeKind::For { init, condition, update, body, .. } => {
255                let base_id = node_id.wrapping_mul(101);
256                let mut offset = 0;
257                if let Some(init) = init {
258                    Self::walk_ast_for_checkpoints(
259                        init,
260                        checkpoints,
261                        scope,
262                        base_id.wrapping_add(offset),
263                    );
264                    offset += 1;
265                }
266                if let Some(cond) = condition {
267                    Self::walk_ast_for_checkpoints(
268                        cond,
269                        checkpoints,
270                        scope,
271                        base_id.wrapping_add(offset),
272                    );
273                    offset += 1;
274                }
275                if let Some(upd) = update {
276                    Self::walk_ast_for_checkpoints(
277                        upd,
278                        checkpoints,
279                        scope,
280                        base_id.wrapping_add(offset),
281                    );
282                    offset += 1;
283                }
284                // body is Box<Node>, not Option<Box<Node>>
285                Self::walk_ast_for_checkpoints(
286                    body,
287                    checkpoints,
288                    scope,
289                    base_id.wrapping_add(offset),
290                );
291            }
292            NodeKind::Binary { left, right, .. } => {
293                let base_id = node_id.wrapping_mul(101);
294                Self::walk_ast_for_checkpoints(left, checkpoints, scope, base_id);
295                Self::walk_ast_for_checkpoints(right, checkpoints, scope, base_id.wrapping_add(1));
296            }
297            NodeKind::Assignment { lhs, rhs, .. } => {
298                let base_id = node_id.wrapping_mul(101);
299                Self::walk_ast_for_checkpoints(lhs, checkpoints, scope, base_id);
300                Self::walk_ast_for_checkpoints(rhs, checkpoints, scope, base_id.wrapping_add(1));
301            }
302            NodeKind::VariableDeclaration { initializer, .. } => {
303                if let Some(init) = initializer {
304                    let child_id = node_id.wrapping_mul(101);
305                    Self::walk_ast_for_checkpoints(init, checkpoints, scope, child_id);
306                }
307            }
308            // Add more cases as needed
309            _ => {}
310        }
311    }
312
313    /// Find the best checkpoint before a given byte offset
314    pub fn find_lex_checkpoint(&self, byte: usize) -> Option<&LexCheckpoint> {
315        self.lex_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
316    }
317
318    /// Find the best parse checkpoint before a given byte offset
319    pub fn find_parse_checkpoint(&self, byte: usize) -> Option<&ParseCheckpoint> {
320        self.parse_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
321    }
322}
323
324/// Edit description
325#[derive(Clone, Debug)]
326pub struct Edit {
327    pub start_byte: usize,
328    pub old_end_byte: usize,
329    pub new_end_byte: usize,
330    pub new_text: String,
331}
332
333impl Edit {
334    /// Convert LSP change to Edit
335    pub fn from_lsp_change(
336        change: &TextDocumentContentChangeEvent,
337        line_index: &LineIndex,
338        old_text: &str,
339    ) -> Option<Self> {
340        if let Some(range) = change.range {
341            let start_byte = line_index
342                .position_to_byte(range.start.line as usize, range.start.character as usize)?;
343            let old_end_byte = line_index
344                .position_to_byte(range.end.line as usize, range.end.character as usize)?;
345            let new_end_byte = start_byte + change.text.len();
346
347            Some(Edit { start_byte, old_end_byte, new_end_byte, new_text: change.text.clone() })
348        } else {
349            // Full document change
350            Some(Edit {
351                start_byte: 0,
352                old_end_byte: old_text.len(),
353                new_end_byte: change.text.len(),
354                new_text: change.text.clone(),
355            })
356        }
357    }
358}
359
360impl Edit {
361    /// Returns the size of text touched by this edit (inserted or deleted).
362    ///
363    /// This is used for fallback heuristics so that large deletions are treated
364    /// the same as large insertions.
365    fn touched_bytes(&self) -> usize {
366        let replaced_len = self.old_end_byte.saturating_sub(self.start_byte);
367        replaced_len.max(self.new_text.len())
368    }
369}
370
371/// Result of incremental reparse
372#[derive(Debug)]
373pub struct ReparseResult {
374    pub changed_ranges: Vec<Range<usize>>,
375    pub diagnostics: Vec<Diagnostic>,
376    pub reparsed_bytes: usize,
377}
378
379/// Apply edits incrementally
380pub fn apply_edits(state: &mut IncrementalState, edits: &[Edit]) -> Result<ReparseResult> {
381    // Handle multiple edits by sorting and applying in reverse order
382    let mut sorted_edits = edits.to_vec();
383    sorted_edits.sort_by_key(|e| e.start_byte);
384    sorted_edits.reverse(); // Apply from end to start to avoid offset shifts
385
386    // Check if we should fall back to full reparse
387    let total_changed = sorted_edits.iter().map(Edit::touched_bytes).sum::<usize>();
388
389    // Fallback thresholds
390    const MAX_EDIT_SIZE: usize = 64 * 1024; // 64KB
391
392    if total_changed > MAX_EDIT_SIZE {
393        return full_reparse(state);
394    }
395
396    // For MVP, handle single edit with incremental logic
397    if sorted_edits.len() == 1 {
398        let edit = &sorted_edits[0];
399
400        // Heuristic: if edit is large (>1KB) or crosses many lines, do full reparse
401        if edit.touched_bytes() > 1024 || edit.new_text.matches('\n').count() > 10 {
402            apply_single_edit(state, edit)?;
403            return full_reparse(state);
404        }
405
406        // Apply the edit with incremental lexing
407        let reparsed_range = apply_single_edit(state, edit)?;
408        let reparsed_bytes = reparsed_range.end - reparsed_range.start;
409
410        // If reparsed too much (>20% of doc), might need full parse in future
411        // But for now, trust the incremental result
412
413        Ok(ReparseResult {
414            changed_ranges: vec![reparsed_range],
415            diagnostics: vec![],
416            reparsed_bytes,
417        })
418    } else {
419        // Multiple edits - apply them in sequence
420        for edit in sorted_edits {
421            apply_single_edit(state, &edit)?;
422        }
423        full_reparse(state)
424    }
425}
426
427/// Apply a single edit to the state
428fn apply_single_edit(state: &mut IncrementalState, edit: &Edit) -> Result<Range<usize>> {
429    // Find checkpoint before edit to resume lexing
430    let checkpoint = state
431        .find_lex_checkpoint(edit.start_byte)
432        .copied()
433        .ok_or_else(|| anyhow::anyhow!("No checkpoint found"))?;
434
435    // Apply text edit with safe boundary checking
436    let old_end_byte = edit.old_end_byte.min(state.source.len());
437    let start_byte = edit.start_byte.min(state.source.len());
438
439    // Compute the byte shift caused by this edit
440    let byte_shift: isize = edit.new_text.len() as isize - (old_end_byte - start_byte) as isize;
441
442    let mut new_source = String::with_capacity(
443        state.source.len() - (old_end_byte - start_byte) + edit.new_text.len(),
444    );
445    new_source.push_str(&state.source[..start_byte]);
446    new_source.push_str(&edit.new_text);
447    new_source.push_str(&state.source[old_end_byte..]);
448    state.source = new_source;
449    state.rope = Rope::from_str(&state.source);
450    state.line_index = LineIndex::new(&state.source);
451
452    // Start lexing from checkpoint
453    use perl_lexer::{Checkpointable, LexerCheckpoint, Position};
454    let mut lexer = PerlLexer::new(&state.source);
455    let mut lex_cp = LexerCheckpoint::new();
456    lex_cp.position = checkpoint.byte;
457    lex_cp.mode = checkpoint.mode;
458    lex_cp.current_pos = Position {
459        byte: checkpoint.byte,
460        line: (checkpoint.line + 1) as u32,
461        column: (checkpoint.column + 1) as u32,
462    };
463    lexer.restore(&lex_cp);
464
465    // Determine token index to splice
466    let start_idx =
467        state.tokens.iter().position(|t| t.start >= checkpoint.byte).unwrap_or(state.tokens.len());
468
469    // Build a lookup of old tokens past the edit for synchronisation.
470    // Once a newly-lexed token matches an old token (shifted by the byte delta),
471    // we can stop re-lexing and reuse the remaining old tokens with adjusted positions.
472    let edit_end_in_new = start_byte + edit.new_text.len();
473
474    // Find the first old-token index whose start >= old_end_byte (i.e. fully past the edit)
475    let old_sync_start =
476        state.tokens.iter().position(|t| t.start >= old_end_byte).unwrap_or(state.tokens.len());
477
478    let mut new_tokens = Vec::new();
479    let mut last_token_end = checkpoint.byte;
480    let mut synced = false;
481    let mut sync_old_idx = state.tokens.len();
482    loop {
483        match lexer.next_token() {
484            Some(token) => {
485                if token.token_type == TokenType::EOF {
486                    break;
487                }
488                last_token_end = token.end;
489
490                // Once we are past the edit region, check if the new token matches
491                // an old token (shifted by byte_shift). If so, we can stop re-lexing
492                // and reuse the rest of the old tokens with position adjustment.
493                if token.start >= edit_end_in_new {
494                    let mut found_sync = false;
495                    for (idx_offset, old_tok) in state.tokens[old_sync_start..].iter().enumerate() {
496                        let shifted_start = (old_tok.start as isize + byte_shift) as usize;
497                        let shifted_end = (old_tok.end as isize + byte_shift) as usize;
498                        if token.start == shifted_start
499                            && token.end == shifted_end
500                            && token.token_type == old_tok.token_type
501                        {
502                            // Token synchronised -- reuse the rest
503                            found_sync = true;
504                            sync_old_idx = old_sync_start + idx_offset + 1;
505                            break;
506                        }
507                    }
508                    // Push the token regardless (it's valid either way)
509                    new_tokens.push(token);
510                    if found_sync {
511                        synced = true;
512                        break;
513                    }
514                } else {
515                    new_tokens.push(token);
516                }
517            }
518            None => break,
519        }
520    }
521
522    // If we synchronised, append remaining old tokens with shifted positions
523    if synced {
524        for old_tok in &state.tokens[sync_old_idx..] {
525            let mut adjusted = old_tok.clone();
526            adjusted.start = (adjusted.start as isize + byte_shift) as usize;
527            adjusted.end = (adjusted.end as isize + byte_shift) as usize;
528            last_token_end = adjusted.end;
529            new_tokens.push(adjusted);
530        }
531    }
532
533    state.tokens.splice(start_idx.., new_tokens);
534
535    // Rebuild checkpoints with updated line index
536    state.lex_checkpoints =
537        IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
538
539    // Return the actual reparsed range (from checkpoint to end of last new token)
540    Ok(checkpoint.byte..last_token_end)
541}
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546    use proptest::prelude::*;
547
548    #[derive(Clone, Debug)]
549    struct FuzzEdit {
550        start: usize,
551        delete_len: usize,
552        insert_text: String,
553    }
554
555    fn apply_edit_to_ground_truth(source: &mut String, edit: &FuzzEdit) {
556        let start = edit.start.min(source.len());
557        let old_end = (start + edit.delete_len).min(source.len());
558        source.replace_range(start..old_end, &edit.insert_text);
559    }
560
561    /// Verify that a small ranged edit re-lexes substantially fewer bytes than
562    /// the total document length, confirming the checkpoint fast-path fires.
563    #[test]
564    fn test_incremental_state_small_edit_uses_checkpoint() -> Result<()> {
565        // Build a document large enough that a checkpoint will exist before the edit.
566        // We need at least one statement-boundary token (semicolon / brace) to create
567        // a checkpoint entry that sits before the edit site.
568        let mut lines = Vec::with_capacity(30);
569        for i in 0..30usize {
570            lines.push(format!("my $var_{i} = {i};"));
571        }
572        let source = lines.join("\n");
573        let doc_len = source.len();
574
575        let mut state = IncrementalState::new(source.clone());
576
577        // Sanity: we should have at least one lex checkpoint from all those semicolons.
578        assert!(
579            state.lex_checkpoints.len() > 1,
580            "expected multiple lex checkpoints, got {}",
581            state.lex_checkpoints.len()
582        );
583
584        // Edit the LAST line: change `my $var_29 = 29;` -> `my $var_29 = 999;`
585        // The checkpoint at some semicolon earlier in the doc should be used, meaning
586        // we re-lex far less than the full document.
587        let edit_text = "999";
588        let edit_start = source.rfind("29;").unwrap_or(source.len() - 3);
589        let edit_end = edit_start + 2; // replace "29"
590
591        let edit = Edit {
592            start_byte: edit_start,
593            old_end_byte: edit_end,
594            new_end_byte: edit_start + edit_text.len(),
595            new_text: edit_text.to_string(),
596        };
597
598        let result = apply_edits(&mut state, &[edit])?;
599
600        // The key assertion: reparsed_bytes must be strictly less than the full
601        // document size, proving the incremental path was taken.
602        assert!(
603            result.reparsed_bytes < doc_len,
604            "incremental reparse should cover less than the full document ({} bytes reparsed, {} doc len)",
605            result.reparsed_bytes,
606            doc_len
607        );
608
609        // Source must reflect the edit.
610        assert!(state.source.contains("999"), "source must contain the new value after edit");
611        assert!(
612            !state.source.contains("= 29;"),
613            "source must not contain the old value after edit"
614        );
615
616        Ok(())
617    }
618
619    /// Verify that the full-reparse fallback is triggered for an edit >64KB.
620    #[test]
621    fn test_incremental_state_large_edit_falls_back_to_full_reparse() -> Result<()> {
622        let source = "my $x = 1;\n".repeat(10);
623        let mut state = IncrementalState::new(source.clone());
624
625        // A new_text larger than 64KB must trigger full reparse
626        let big_text = "x".repeat(65 * 1024);
627        let edit = Edit {
628            start_byte: 0,
629            old_end_byte: 0,
630            new_end_byte: big_text.len(),
631            new_text: big_text.clone(),
632        };
633
634        let result = apply_edits(&mut state, &[edit])?;
635
636        // Full reparse: reparsed_bytes == full (new) source length
637        assert_eq!(
638            result.reparsed_bytes,
639            state.source.len(),
640            "large edit must trigger full reparse, reparsed_bytes should equal doc length"
641        );
642
643        Ok(())
644    }
645
646    /// Verify large deletions trigger full-reparse fallback the same as large insertions.
647    #[test]
648    fn test_incremental_state_large_deletion_falls_back_to_full_reparse() -> Result<()> {
649        let source = "my $x = 1;\n".repeat(10_000);
650        let mut state = IncrementalState::new(source);
651
652        // Delete almost the entire document; this should use full-reparse fallback
653        // to keep incremental behavior predictable for large edits.
654        let old_end_byte = state.source.len().saturating_sub(1);
655        let edit = Edit { start_byte: 0, old_end_byte, new_end_byte: 0, new_text: String::new() };
656
657        let result = apply_edits(&mut state, &[edit])?;
658
659        assert_eq!(
660            result.reparsed_bytes,
661            state.source.len(),
662            "large deletion must trigger full reparse, reparsed_bytes should equal doc length"
663        );
664
665        Ok(())
666    }
667
668    proptest! {
669        #[test]
670        fn prop_incremental_apply_edits_matches_ground_truth(
671            edits in prop::collection::vec(
672                (
673                    0usize..900usize,
674                    0usize..80usize,
675                    "[a-zA-Z0-9_ \\n;\\$\\(\\)\\{\\}\\[\\],]{0,40}"
676                ),
677                1..60
678            )
679        ) {
680            let mut state = IncrementalState::new("my $seed = 0;\n".repeat(80));
681            let mut expected_source = state.source.clone();
682
683            for (start, delete_len, insert_text) in edits {
684                let fuzz_edit = FuzzEdit { start, delete_len, insert_text };
685                let start_byte = fuzz_edit.start.min(state.source.len());
686                let old_end_byte = (start_byte + fuzz_edit.delete_len).min(state.source.len());
687
688                apply_edit_to_ground_truth(&mut expected_source, &fuzz_edit);
689
690                let incremental_edit = Edit {
691                    start_byte,
692                    old_end_byte,
693                    new_end_byte: start_byte + fuzz_edit.insert_text.len(),
694                    new_text: fuzz_edit.insert_text,
695                };
696
697                let result = apply_edits(&mut state, &[incremental_edit]);
698                prop_assert!(result.is_ok());
699                prop_assert_eq!(&state.source, &expected_source);
700            }
701
702            let reparsed = IncrementalState::new(expected_source);
703            prop_assert_eq!(&state.source, &reparsed.source);
704        }
705    }
706}
707
708/// Full document reparse fallback
709fn full_reparse(state: &mut IncrementalState) -> Result<ReparseResult> {
710    let mut parser = Parser::new(&state.source);
711    state.ast = match parser.parse() {
712        Ok(ast) => ast,
713        Err(e) => Node::new(
714            NodeKind::Error {
715                message: e.to_string(),
716                expected: vec![],
717                found: None,
718                partial: None,
719            },
720            SourceLocation { start: 0, end: state.source.len() },
721        ),
722    };
723
724    // Re-lex to get tokens
725    let mut lexer = PerlLexer::new(&state.source);
726    let mut tokens = Vec::new();
727    loop {
728        match lexer.next_token() {
729            Some(token) => {
730                if token.token_type == TokenType::EOF {
731                    break;
732                }
733                tokens.push(token);
734            }
735            None => break,
736        }
737    }
738    state.tokens = tokens;
739
740    state.rope = Rope::from_str(&state.source);
741    state.line_index = LineIndex::new(&state.source);
742
743    // Rebuild checkpoints
744    state.lex_checkpoints =
745        IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
746    state.parse_checkpoints = IncrementalState::create_parse_checkpoints(&state.ast);
747
748    // No diagnostics for now, will be handled by the LSP server
749    let diagnostics = vec![];
750
751    Ok(ReparseResult {
752        changed_ranges: vec![0..state.source.len()],
753        diagnostics,
754        reparsed_bytes: state.source.len(),
755    })
756}