1#![allow(missing_docs)]
2
3use anyhow::Result;
4use lsp_types::Diagnostic;
5use lsp_types::TextDocumentContentChangeEvent;
6pub use perl_line_index::LineIndex;
7use ropey::Rope;
8use std::ops::Range;
9
10use perl_lexer::{LexerMode, PerlLexer, Token, TokenType};
11use perl_parser_core::ast::{Node, NodeKind, SourceLocation};
12use perl_parser_core::parser::Parser;
13
14pub mod incremental_advanced_reuse;
15#[cfg(test)]
16mod incremental_boundary_regressions;
17pub mod incremental_checkpoint;
18pub mod incremental_document;
19pub mod incremental_edit;
20pub mod incremental_handler_v2;
21pub mod incremental_integration;
22pub mod incremental_simple;
23pub mod incremental_v2;
24
25#[derive(Clone, Copy, Debug)]
27pub struct LexCheckpoint {
28 pub byte: usize,
29 pub mode: LexerMode,
30 pub line: usize,
31 pub column: usize,
32}
33
34#[derive(Clone, Debug, Default)]
36pub struct ScopeSnapshot {
37 pub package_name: String,
38 pub locals: Vec<String>,
39 pub our_vars: Vec<String>,
40 pub parent_isa: Vec<String>,
41}
42
43#[derive(Clone, Debug)]
45pub struct ParseCheckpoint {
46 pub byte: usize,
47 pub scope_snapshot: ScopeSnapshot,
48 pub node_id: usize, }
50
51#[derive(Clone)]
53pub struct IncrementalState {
54 pub rope: Rope,
55 pub line_index: LineIndex,
56 pub lex_checkpoints: Vec<LexCheckpoint>,
57 pub parse_checkpoints: Vec<ParseCheckpoint>,
58 pub ast: Node,
59 pub tokens: Vec<Token>,
60 pub source: String,
61}
62
63impl IncrementalState {
64 pub fn new(source: String) -> Self {
66 let rope = Rope::from_str(&source);
67 let line_index = LineIndex::new(&source);
68
69 let mut parser = Parser::new(&source);
71 let ast = match parser.parse() {
72 Ok(ast) => ast,
73 Err(e) => Node::new(
74 NodeKind::Error {
75 message: e.to_string(),
76 expected: vec![],
77 found: None,
78 partial: None,
79 },
80 SourceLocation { start: 0, end: source.len() },
81 ),
82 };
83
84 let mut lexer = PerlLexer::new(&source);
86 let mut tokens = Vec::new();
87 loop {
88 match lexer.next_token() {
89 Some(token) => {
90 if token.token_type == TokenType::EOF {
91 break;
92 }
93 tokens.push(token);
94 }
95 None => break,
96 }
97 }
98
99 let lex_checkpoints = Self::create_lex_checkpoints(&tokens, &line_index);
101 let parse_checkpoints = Self::create_parse_checkpoints(&ast);
102
103 Self { rope, line_index, lex_checkpoints, parse_checkpoints, ast, tokens, source }
104 }
105
106 fn create_lex_checkpoints(tokens: &[Token], line_index: &LineIndex) -> Vec<LexCheckpoint> {
108 let mut checkpoints =
109 vec![LexCheckpoint { byte: 0, mode: LexerMode::ExpectTerm, line: 0, column: 0 }];
110
111 let mut mode = LexerMode::ExpectTerm;
112
113 for token in tokens {
114 mode = match token.token_type {
116 TokenType::Semicolon | TokenType::LeftBrace | TokenType::RightBrace => {
117 let (line, column) = line_index.byte_to_position(token.end);
119 checkpoints.push(LexCheckpoint {
120 byte: token.end,
121 mode: LexerMode::ExpectTerm,
122 line,
123 column,
124 });
125 LexerMode::ExpectTerm
126 }
127 TokenType::Keyword(ref kw) if kw.as_ref() == "sub" || kw.as_ref() == "package" => {
128 let (line, column) = line_index.byte_to_position(token.start);
129 checkpoints.push(LexCheckpoint {
130 byte: token.start,
131 mode: LexerMode::ExpectTerm, line,
133 column,
134 });
135 LexerMode::ExpectTerm }
137 TokenType::Identifier(_) | TokenType::Number(_) | TokenType::StringLiteral => {
138 LexerMode::ExpectOperator
139 }
140 TokenType::Operator(_) => LexerMode::ExpectTerm,
141 _ => mode,
142 };
143 }
144
145 checkpoints
146 }
147
148 fn create_parse_checkpoints(ast: &Node) -> Vec<ParseCheckpoint> {
150 let mut checkpoints = vec![];
151 let mut scope = ScopeSnapshot::default();
152
153 Self::walk_ast_for_checkpoints(ast, &mut checkpoints, &mut scope, 0);
154 checkpoints
155 }
156
157 fn walk_ast_for_checkpoints(
158 node: &Node,
159 checkpoints: &mut Vec<ParseCheckpoint>,
160 scope: &mut ScopeSnapshot,
161 node_id: usize,
162 ) {
163 match &node.kind {
165 NodeKind::Package { name, .. } => {
166 scope.package_name = name.clone();
167 checkpoints.push(ParseCheckpoint {
168 byte: node.location.start,
169 scope_snapshot: scope.clone(),
170 node_id,
171 });
172 }
173 NodeKind::Subroutine { .. } | NodeKind::Block { .. } => {
174 checkpoints.push(ParseCheckpoint {
175 byte: node.location.start,
176 scope_snapshot: scope.clone(),
177 node_id,
178 });
179 }
180 NodeKind::VariableDeclaration { variable, .. } => {
181 if let NodeKind::Variable { name, sigil, .. } = &variable.kind {
183 scope.locals.push(format!("{}{}", sigil, name));
185 }
186 }
187 NodeKind::VariableListDeclaration { variables, .. } => {
188 for var in variables {
190 if let NodeKind::Variable { name, sigil, .. } = &var.kind {
191 scope.locals.push(format!("{}{}", sigil, name));
192 }
193 }
194 }
195 _ => {}
196 }
197
198 match &node.kind {
200 NodeKind::Program { statements } => {
201 for (i, stmt) in statements.iter().enumerate() {
202 let child_id = node_id.wrapping_mul(101).wrapping_add(i);
203 Self::walk_ast_for_checkpoints(stmt, checkpoints, scope, child_id);
204 }
205 }
206 NodeKind::Block { statements } => {
207 let mut local_scope = scope.clone();
209 for (i, stmt) in statements.iter().enumerate() {
210 let child_id = node_id.wrapping_mul(101).wrapping_add(i);
211 Self::walk_ast_for_checkpoints(stmt, checkpoints, &mut local_scope, child_id);
212 }
213 }
214 NodeKind::Subroutine { body, .. } => {
215 let mut local_scope = scope.clone();
217 let child_id = node_id.wrapping_mul(101);
218 Self::walk_ast_for_checkpoints(body, checkpoints, &mut local_scope, child_id);
219 }
220 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
221 let base_id = node_id.wrapping_mul(101);
222 Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
223
224 Self::walk_ast_for_checkpoints(
226 then_branch,
227 checkpoints,
228 scope,
229 base_id.wrapping_add(1),
230 );
231
232 for (i, (elsif_cond, elsif_block)) in elsif_branches.iter().enumerate() {
234 let elsif_base = base_id.wrapping_add((i + 2) * 2);
235 Self::walk_ast_for_checkpoints(elsif_cond, checkpoints, scope, elsif_base);
236 Self::walk_ast_for_checkpoints(
237 elsif_block,
238 checkpoints,
239 scope,
240 elsif_base.wrapping_add(1),
241 );
242 }
243 if let Some(else_br) = else_branch {
244 let else_id = base_id.wrapping_add((elsif_branches.len() + 2) * 2);
245 Self::walk_ast_for_checkpoints(else_br, checkpoints, scope, else_id);
246 }
247 }
248 NodeKind::While { condition, body, .. } => {
249 let base_id = node_id.wrapping_mul(101);
250 Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
251 Self::walk_ast_for_checkpoints(body, checkpoints, scope, base_id.wrapping_add(1));
253 }
254 NodeKind::For { init, condition, update, body, .. } => {
255 let base_id = node_id.wrapping_mul(101);
256 let mut offset = 0;
257 if let Some(init) = init {
258 Self::walk_ast_for_checkpoints(
259 init,
260 checkpoints,
261 scope,
262 base_id.wrapping_add(offset),
263 );
264 offset += 1;
265 }
266 if let Some(cond) = condition {
267 Self::walk_ast_for_checkpoints(
268 cond,
269 checkpoints,
270 scope,
271 base_id.wrapping_add(offset),
272 );
273 offset += 1;
274 }
275 if let Some(upd) = update {
276 Self::walk_ast_for_checkpoints(
277 upd,
278 checkpoints,
279 scope,
280 base_id.wrapping_add(offset),
281 );
282 offset += 1;
283 }
284 Self::walk_ast_for_checkpoints(
286 body,
287 checkpoints,
288 scope,
289 base_id.wrapping_add(offset),
290 );
291 }
292 NodeKind::Binary { left, right, .. } => {
293 let base_id = node_id.wrapping_mul(101);
294 Self::walk_ast_for_checkpoints(left, checkpoints, scope, base_id);
295 Self::walk_ast_for_checkpoints(right, checkpoints, scope, base_id.wrapping_add(1));
296 }
297 NodeKind::Assignment { lhs, rhs, .. } => {
298 let base_id = node_id.wrapping_mul(101);
299 Self::walk_ast_for_checkpoints(lhs, checkpoints, scope, base_id);
300 Self::walk_ast_for_checkpoints(rhs, checkpoints, scope, base_id.wrapping_add(1));
301 }
302 NodeKind::VariableDeclaration { initializer, .. } => {
303 if let Some(init) = initializer {
304 let child_id = node_id.wrapping_mul(101);
305 Self::walk_ast_for_checkpoints(init, checkpoints, scope, child_id);
306 }
307 }
308 _ => {}
310 }
311 }
312
313 pub fn find_lex_checkpoint(&self, byte: usize) -> Option<&LexCheckpoint> {
315 self.lex_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
316 }
317
318 pub fn find_parse_checkpoint(&self, byte: usize) -> Option<&ParseCheckpoint> {
320 self.parse_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
321 }
322}
323
324#[derive(Clone, Debug)]
326pub struct Edit {
327 pub start_byte: usize,
328 pub old_end_byte: usize,
329 pub new_end_byte: usize,
330 pub new_text: String,
331}
332
333impl Edit {
334 pub fn from_lsp_change(
336 change: &TextDocumentContentChangeEvent,
337 line_index: &LineIndex,
338 old_text: &str,
339 ) -> Option<Self> {
340 if let Some(range) = change.range {
341 let start_byte = line_index
342 .position_to_byte(range.start.line as usize, range.start.character as usize)?;
343 let old_end_byte = line_index
344 .position_to_byte(range.end.line as usize, range.end.character as usize)?;
345 let new_end_byte = start_byte + change.text.len();
346
347 Some(Edit { start_byte, old_end_byte, new_end_byte, new_text: change.text.clone() })
348 } else {
349 Some(Edit {
351 start_byte: 0,
352 old_end_byte: old_text.len(),
353 new_end_byte: change.text.len(),
354 new_text: change.text.clone(),
355 })
356 }
357 }
358}
359
360impl Edit {
361 fn touched_bytes(&self) -> usize {
366 let replaced_len = self.old_end_byte.saturating_sub(self.start_byte);
367 replaced_len.max(self.new_text.len())
368 }
369}
370
371#[derive(Debug)]
373pub struct ReparseResult {
374 pub changed_ranges: Vec<Range<usize>>,
375 pub diagnostics: Vec<Diagnostic>,
376 pub reparsed_bytes: usize,
377}
378
379pub fn apply_edits(state: &mut IncrementalState, edits: &[Edit]) -> Result<ReparseResult> {
381 let mut sorted_edits = edits.to_vec();
383 sorted_edits.sort_by_key(|e| e.start_byte);
384 sorted_edits.reverse(); let total_changed = sorted_edits.iter().map(Edit::touched_bytes).sum::<usize>();
388
389 const MAX_EDIT_SIZE: usize = 64 * 1024; if total_changed > MAX_EDIT_SIZE {
393 return full_reparse(state);
394 }
395
396 if sorted_edits.len() == 1 {
398 let edit = &sorted_edits[0];
399
400 if edit.touched_bytes() > 1024 || edit.new_text.matches('\n').count() > 10 {
402 apply_single_edit(state, edit)?;
403 return full_reparse(state);
404 }
405
406 let reparsed_range = apply_single_edit(state, edit)?;
408 let reparsed_bytes = reparsed_range.end - reparsed_range.start;
409
410 Ok(ReparseResult {
414 changed_ranges: vec![reparsed_range],
415 diagnostics: vec![],
416 reparsed_bytes,
417 })
418 } else {
419 for edit in sorted_edits {
421 apply_single_edit(state, &edit)?;
422 }
423 full_reparse(state)
424 }
425}
426
427fn apply_single_edit(state: &mut IncrementalState, edit: &Edit) -> Result<Range<usize>> {
429 let checkpoint = state
431 .find_lex_checkpoint(edit.start_byte)
432 .copied()
433 .ok_or_else(|| anyhow::anyhow!("No checkpoint found"))?;
434
435 let old_end_byte = edit.old_end_byte.min(state.source.len());
437 let start_byte = edit.start_byte.min(state.source.len());
438
439 let byte_shift: isize = edit.new_text.len() as isize - (old_end_byte - start_byte) as isize;
441
442 let mut new_source = String::with_capacity(
443 state.source.len() - (old_end_byte - start_byte) + edit.new_text.len(),
444 );
445 new_source.push_str(&state.source[..start_byte]);
446 new_source.push_str(&edit.new_text);
447 new_source.push_str(&state.source[old_end_byte..]);
448 state.source = new_source;
449 state.rope = Rope::from_str(&state.source);
450 state.line_index = LineIndex::new(&state.source);
451
452 use perl_lexer::{Checkpointable, LexerCheckpoint, Position};
454 let mut lexer = PerlLexer::new(&state.source);
455 let mut lex_cp = LexerCheckpoint::new();
456 lex_cp.position = checkpoint.byte;
457 lex_cp.mode = checkpoint.mode;
458 lex_cp.current_pos = Position {
459 byte: checkpoint.byte,
460 line: (checkpoint.line + 1) as u32,
461 column: (checkpoint.column + 1) as u32,
462 };
463 lexer.restore(&lex_cp);
464
465 let start_idx =
467 state.tokens.iter().position(|t| t.start >= checkpoint.byte).unwrap_or(state.tokens.len());
468
469 let edit_end_in_new = start_byte + edit.new_text.len();
473
474 let old_sync_start =
476 state.tokens.iter().position(|t| t.start >= old_end_byte).unwrap_or(state.tokens.len());
477
478 let mut new_tokens = Vec::new();
479 let mut last_token_end = checkpoint.byte;
480 let mut synced = false;
481 let mut sync_old_idx = state.tokens.len();
482 loop {
483 match lexer.next_token() {
484 Some(token) => {
485 if token.token_type == TokenType::EOF {
486 break;
487 }
488 last_token_end = token.end;
489
490 if token.start >= edit_end_in_new {
494 let mut found_sync = false;
495 for (idx_offset, old_tok) in state.tokens[old_sync_start..].iter().enumerate() {
496 let shifted_start = (old_tok.start as isize + byte_shift) as usize;
497 let shifted_end = (old_tok.end as isize + byte_shift) as usize;
498 if token.start == shifted_start
499 && token.end == shifted_end
500 && token.token_type == old_tok.token_type
501 {
502 found_sync = true;
504 sync_old_idx = old_sync_start + idx_offset + 1;
505 break;
506 }
507 }
508 new_tokens.push(token);
510 if found_sync {
511 synced = true;
512 break;
513 }
514 } else {
515 new_tokens.push(token);
516 }
517 }
518 None => break,
519 }
520 }
521
522 if synced {
524 for old_tok in &state.tokens[sync_old_idx..] {
525 let mut adjusted = old_tok.clone();
526 adjusted.start = (adjusted.start as isize + byte_shift) as usize;
527 adjusted.end = (adjusted.end as isize + byte_shift) as usize;
528 last_token_end = adjusted.end;
529 new_tokens.push(adjusted);
530 }
531 }
532
533 state.tokens.splice(start_idx.., new_tokens);
534
535 state.lex_checkpoints =
537 IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
538
539 Ok(checkpoint.byte..last_token_end)
541}
542
543#[cfg(test)]
544mod tests {
545 use super::*;
546 use proptest::prelude::*;
547
548 #[derive(Clone, Debug)]
549 struct FuzzEdit {
550 start: usize,
551 delete_len: usize,
552 insert_text: String,
553 }
554
555 fn apply_edit_to_ground_truth(source: &mut String, edit: &FuzzEdit) {
556 let start = edit.start.min(source.len());
557 let old_end = (start + edit.delete_len).min(source.len());
558 source.replace_range(start..old_end, &edit.insert_text);
559 }
560
561 #[test]
564 fn test_incremental_state_small_edit_uses_checkpoint() -> Result<()> {
565 let mut lines = Vec::with_capacity(30);
569 for i in 0..30usize {
570 lines.push(format!("my $var_{i} = {i};"));
571 }
572 let source = lines.join("\n");
573 let doc_len = source.len();
574
575 let mut state = IncrementalState::new(source.clone());
576
577 assert!(
579 state.lex_checkpoints.len() > 1,
580 "expected multiple lex checkpoints, got {}",
581 state.lex_checkpoints.len()
582 );
583
584 let edit_text = "999";
588 let edit_start = source.rfind("29;").unwrap_or(source.len() - 3);
589 let edit_end = edit_start + 2; let edit = Edit {
592 start_byte: edit_start,
593 old_end_byte: edit_end,
594 new_end_byte: edit_start + edit_text.len(),
595 new_text: edit_text.to_string(),
596 };
597
598 let result = apply_edits(&mut state, &[edit])?;
599
600 assert!(
603 result.reparsed_bytes < doc_len,
604 "incremental reparse should cover less than the full document ({} bytes reparsed, {} doc len)",
605 result.reparsed_bytes,
606 doc_len
607 );
608
609 assert!(state.source.contains("999"), "source must contain the new value after edit");
611 assert!(
612 !state.source.contains("= 29;"),
613 "source must not contain the old value after edit"
614 );
615
616 Ok(())
617 }
618
619 #[test]
621 fn test_incremental_state_large_edit_falls_back_to_full_reparse() -> Result<()> {
622 let source = "my $x = 1;\n".repeat(10);
623 let mut state = IncrementalState::new(source.clone());
624
625 let big_text = "x".repeat(65 * 1024);
627 let edit = Edit {
628 start_byte: 0,
629 old_end_byte: 0,
630 new_end_byte: big_text.len(),
631 new_text: big_text.clone(),
632 };
633
634 let result = apply_edits(&mut state, &[edit])?;
635
636 assert_eq!(
638 result.reparsed_bytes,
639 state.source.len(),
640 "large edit must trigger full reparse, reparsed_bytes should equal doc length"
641 );
642
643 Ok(())
644 }
645
646 #[test]
648 fn test_incremental_state_large_deletion_falls_back_to_full_reparse() -> Result<()> {
649 let source = "my $x = 1;\n".repeat(10_000);
650 let mut state = IncrementalState::new(source);
651
652 let old_end_byte = state.source.len().saturating_sub(1);
655 let edit = Edit { start_byte: 0, old_end_byte, new_end_byte: 0, new_text: String::new() };
656
657 let result = apply_edits(&mut state, &[edit])?;
658
659 assert_eq!(
660 result.reparsed_bytes,
661 state.source.len(),
662 "large deletion must trigger full reparse, reparsed_bytes should equal doc length"
663 );
664
665 Ok(())
666 }
667
668 proptest! {
669 #[test]
670 fn prop_incremental_apply_edits_matches_ground_truth(
671 edits in prop::collection::vec(
672 (
673 0usize..900usize,
674 0usize..80usize,
675 "[a-zA-Z0-9_ \\n;\\$\\(\\)\\{\\}\\[\\],]{0,40}"
676 ),
677 1..60
678 )
679 ) {
680 let mut state = IncrementalState::new("my $seed = 0;\n".repeat(80));
681 let mut expected_source = state.source.clone();
682
683 for (start, delete_len, insert_text) in edits {
684 let fuzz_edit = FuzzEdit { start, delete_len, insert_text };
685 let start_byte = fuzz_edit.start.min(state.source.len());
686 let old_end_byte = (start_byte + fuzz_edit.delete_len).min(state.source.len());
687
688 apply_edit_to_ground_truth(&mut expected_source, &fuzz_edit);
689
690 let incremental_edit = Edit {
691 start_byte,
692 old_end_byte,
693 new_end_byte: start_byte + fuzz_edit.insert_text.len(),
694 new_text: fuzz_edit.insert_text,
695 };
696
697 let result = apply_edits(&mut state, &[incremental_edit]);
698 prop_assert!(result.is_ok());
699 prop_assert_eq!(&state.source, &expected_source);
700 }
701
702 let reparsed = IncrementalState::new(expected_source);
703 prop_assert_eq!(&state.source, &reparsed.source);
704 }
705 }
706}
707
708fn full_reparse(state: &mut IncrementalState) -> Result<ReparseResult> {
710 let mut parser = Parser::new(&state.source);
711 state.ast = match parser.parse() {
712 Ok(ast) => ast,
713 Err(e) => Node::new(
714 NodeKind::Error {
715 message: e.to_string(),
716 expected: vec![],
717 found: None,
718 partial: None,
719 },
720 SourceLocation { start: 0, end: state.source.len() },
721 ),
722 };
723
724 let mut lexer = PerlLexer::new(&state.source);
726 let mut tokens = Vec::new();
727 loop {
728 match lexer.next_token() {
729 Some(token) => {
730 if token.token_type == TokenType::EOF {
731 break;
732 }
733 tokens.push(token);
734 }
735 None => break,
736 }
737 }
738 state.tokens = tokens;
739
740 state.rope = Rope::from_str(&state.source);
741 state.line_index = LineIndex::new(&state.source);
742
743 state.lex_checkpoints =
745 IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
746 state.parse_checkpoints = IncrementalState::create_parse_checkpoints(&state.ast);
747
748 let diagnostics = vec![];
750
751 Ok(ReparseResult {
752 changed_ranges: vec![0..state.source.len()],
753 diagnostics,
754 reparsed_bytes: state.source.len(),
755 })
756}