1#![allow(missing_docs)]
2
3use anyhow::Result;
4use lsp_types::Diagnostic;
5use lsp_types::TextDocumentContentChangeEvent;
6pub use perl_line_index::LineIndex;
7use ropey::Rope;
8use std::ops::Range;
9
10use crate::ast::{Node, NodeKind, SourceLocation};
11use crate::parser::Parser;
12use perl_lexer::{LexerMode, PerlLexer, Token, TokenType};
13
14pub mod incremental_advanced_reuse;
15pub mod incremental_checkpoint;
16pub mod incremental_document;
17pub mod incremental_edit;
18pub mod incremental_handler_v2;
19pub mod incremental_integration;
20pub mod incremental_simple;
21pub mod incremental_v2;
22
23#[derive(Clone, Copy, Debug)]
25pub struct LexCheckpoint {
26 pub byte: usize,
27 pub mode: LexerMode,
28 pub line: usize,
29 pub column: usize,
30}
31
32#[derive(Clone, Debug, Default)]
34pub struct ScopeSnapshot {
35 pub package_name: String,
36 pub locals: Vec<String>,
37 pub our_vars: Vec<String>,
38 pub parent_isa: Vec<String>,
39}
40
41#[derive(Clone, Debug)]
43pub struct ParseCheckpoint {
44 pub byte: usize,
45 pub scope_snapshot: ScopeSnapshot,
46 pub node_id: usize, }
48
49#[derive(Clone)]
51pub struct IncrementalState {
52 pub rope: Rope,
53 pub line_index: LineIndex,
54 pub lex_checkpoints: Vec<LexCheckpoint>,
55 pub parse_checkpoints: Vec<ParseCheckpoint>,
56 pub ast: Node,
57 pub tokens: Vec<Token>,
58 pub source: String,
59}
60
61impl IncrementalState {
62 pub fn new(source: String) -> Self {
63 let rope = Rope::from_str(&source);
64 let line_index = LineIndex::new(&source);
65
66 let mut parser = Parser::new(&source);
68 let ast = match parser.parse() {
69 Ok(ast) => ast,
70 Err(e) => Node::new(
71 NodeKind::Error {
72 message: e.to_string(),
73 expected: vec![],
74 found: None,
75 partial: None,
76 },
77 SourceLocation { start: 0, end: source.len() },
78 ),
79 };
80
81 let mut lexer = PerlLexer::new(&source);
83 let mut tokens = Vec::new();
84 loop {
85 match lexer.next_token() {
86 Some(token) => {
87 if token.token_type == TokenType::EOF {
88 break;
89 }
90 tokens.push(token);
91 }
92 None => break,
93 }
94 }
95
96 let lex_checkpoints = Self::create_lex_checkpoints(&tokens, &line_index);
98 let parse_checkpoints = Self::create_parse_checkpoints(&ast);
99
100 Self { rope, line_index, lex_checkpoints, parse_checkpoints, ast, tokens, source }
101 }
102
103 fn create_lex_checkpoints(tokens: &[Token], line_index: &LineIndex) -> Vec<LexCheckpoint> {
105 let mut checkpoints =
106 vec![LexCheckpoint { byte: 0, mode: LexerMode::ExpectTerm, line: 0, column: 0 }];
107
108 let mut mode = LexerMode::ExpectTerm;
109
110 for token in tokens {
111 mode = match token.token_type {
113 TokenType::Semicolon | TokenType::LeftBrace | TokenType::RightBrace => {
114 let (line, column) = line_index.byte_to_position(token.end);
116 checkpoints.push(LexCheckpoint {
117 byte: token.end,
118 mode: LexerMode::ExpectTerm,
119 line,
120 column,
121 });
122 LexerMode::ExpectTerm
123 }
124 TokenType::Keyword(ref kw) if kw.as_ref() == "sub" || kw.as_ref() == "package" => {
125 let (line, column) = line_index.byte_to_position(token.start);
126 checkpoints.push(LexCheckpoint {
127 byte: token.start,
128 mode: LexerMode::ExpectTerm, line,
130 column,
131 });
132 LexerMode::ExpectTerm }
134 TokenType::Identifier(_) | TokenType::Number(_) | TokenType::StringLiteral => {
135 LexerMode::ExpectOperator
136 }
137 TokenType::Operator(_) => LexerMode::ExpectTerm,
138 _ => mode,
139 };
140 }
141
142 checkpoints
143 }
144
145 fn create_parse_checkpoints(ast: &Node) -> Vec<ParseCheckpoint> {
147 let mut checkpoints = vec![];
148 let mut scope = ScopeSnapshot::default();
149
150 Self::walk_ast_for_checkpoints(ast, &mut checkpoints, &mut scope, 0);
151 checkpoints
152 }
153
154 fn walk_ast_for_checkpoints(
155 node: &Node,
156 checkpoints: &mut Vec<ParseCheckpoint>,
157 scope: &mut ScopeSnapshot,
158 node_id: usize,
159 ) {
160 match &node.kind {
162 NodeKind::Package { name, .. } => {
163 scope.package_name = name.clone();
164 checkpoints.push(ParseCheckpoint {
165 byte: node.location.start,
166 scope_snapshot: scope.clone(),
167 node_id,
168 });
169 }
170 NodeKind::Subroutine { .. } | NodeKind::Block { .. } => {
171 checkpoints.push(ParseCheckpoint {
172 byte: node.location.start,
173 scope_snapshot: scope.clone(),
174 node_id,
175 });
176 }
177 NodeKind::VariableDeclaration { variable, .. } => {
178 if let NodeKind::Variable { name, sigil, .. } = &variable.kind {
180 scope.locals.push(format!("{}{}", sigil, name));
182 }
183 }
184 NodeKind::VariableListDeclaration { variables, .. } => {
185 for var in variables {
187 if let NodeKind::Variable { name, sigil, .. } = &var.kind {
188 scope.locals.push(format!("{}{}", sigil, name));
189 }
190 }
191 }
192 _ => {}
193 }
194
195 match &node.kind {
197 NodeKind::Program { statements } => {
198 for (i, stmt) in statements.iter().enumerate() {
199 let child_id = node_id.wrapping_mul(101).wrapping_add(i);
200 Self::walk_ast_for_checkpoints(stmt, checkpoints, scope, child_id);
201 }
202 }
203 NodeKind::Block { statements } => {
204 let mut local_scope = scope.clone();
206 for (i, stmt) in statements.iter().enumerate() {
207 let child_id = node_id.wrapping_mul(101).wrapping_add(i);
208 Self::walk_ast_for_checkpoints(stmt, checkpoints, &mut local_scope, child_id);
209 }
210 }
211 NodeKind::Subroutine { body, .. } => {
212 let mut local_scope = scope.clone();
214 let child_id = node_id.wrapping_mul(101);
215 Self::walk_ast_for_checkpoints(body, checkpoints, &mut local_scope, child_id);
216 }
217 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
218 let base_id = node_id.wrapping_mul(101);
219 Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
220
221 Self::walk_ast_for_checkpoints(
223 then_branch,
224 checkpoints,
225 scope,
226 base_id.wrapping_add(1),
227 );
228
229 for (i, (elsif_cond, elsif_block)) in elsif_branches.iter().enumerate() {
231 let elsif_base = base_id.wrapping_add((i + 2) * 2);
232 Self::walk_ast_for_checkpoints(elsif_cond, checkpoints, scope, elsif_base);
233 Self::walk_ast_for_checkpoints(
234 elsif_block,
235 checkpoints,
236 scope,
237 elsif_base.wrapping_add(1),
238 );
239 }
240 if let Some(else_br) = else_branch {
241 let else_id = base_id.wrapping_add((elsif_branches.len() + 2) * 2);
242 Self::walk_ast_for_checkpoints(else_br, checkpoints, scope, else_id);
243 }
244 }
245 NodeKind::While { condition, body, .. } => {
246 let base_id = node_id.wrapping_mul(101);
247 Self::walk_ast_for_checkpoints(condition, checkpoints, scope, base_id);
248 Self::walk_ast_for_checkpoints(body, checkpoints, scope, base_id.wrapping_add(1));
250 }
251 NodeKind::For { init, condition, update, body, .. } => {
252 let base_id = node_id.wrapping_mul(101);
253 let mut offset = 0;
254 if let Some(init) = init {
255 Self::walk_ast_for_checkpoints(
256 init,
257 checkpoints,
258 scope,
259 base_id.wrapping_add(offset),
260 );
261 offset += 1;
262 }
263 if let Some(cond) = condition {
264 Self::walk_ast_for_checkpoints(
265 cond,
266 checkpoints,
267 scope,
268 base_id.wrapping_add(offset),
269 );
270 offset += 1;
271 }
272 if let Some(upd) = update {
273 Self::walk_ast_for_checkpoints(
274 upd,
275 checkpoints,
276 scope,
277 base_id.wrapping_add(offset),
278 );
279 offset += 1;
280 }
281 Self::walk_ast_for_checkpoints(
283 body,
284 checkpoints,
285 scope,
286 base_id.wrapping_add(offset),
287 );
288 }
289 NodeKind::Binary { left, right, .. } => {
290 let base_id = node_id.wrapping_mul(101);
291 Self::walk_ast_for_checkpoints(left, checkpoints, scope, base_id);
292 Self::walk_ast_for_checkpoints(right, checkpoints, scope, base_id.wrapping_add(1));
293 }
294 NodeKind::Assignment { lhs, rhs, .. } => {
295 let base_id = node_id.wrapping_mul(101);
296 Self::walk_ast_for_checkpoints(lhs, checkpoints, scope, base_id);
297 Self::walk_ast_for_checkpoints(rhs, checkpoints, scope, base_id.wrapping_add(1));
298 }
299 NodeKind::VariableDeclaration { initializer, .. } => {
300 if let Some(init) = initializer {
301 let child_id = node_id.wrapping_mul(101);
302 Self::walk_ast_for_checkpoints(init, checkpoints, scope, child_id);
303 }
304 }
305 _ => {}
307 }
308 }
309
310 pub fn find_lex_checkpoint(&self, byte: usize) -> Option<&LexCheckpoint> {
312 self.lex_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
313 }
314
315 pub fn find_parse_checkpoint(&self, byte: usize) -> Option<&ParseCheckpoint> {
317 self.parse_checkpoints.iter().rev().find(|cp| cp.byte <= byte)
318 }
319}
320
321#[derive(Clone, Debug)]
323pub struct Edit {
324 pub start_byte: usize,
325 pub old_end_byte: usize,
326 pub new_end_byte: usize,
327 pub new_text: String,
328}
329
330impl Edit {
331 pub fn from_lsp_change(
333 change: &TextDocumentContentChangeEvent,
334 line_index: &LineIndex,
335 old_text: &str,
336 ) -> Option<Self> {
337 if let Some(range) = change.range {
338 let start_byte = line_index
339 .position_to_byte(range.start.line as usize, range.start.character as usize)?;
340 let old_end_byte = line_index
341 .position_to_byte(range.end.line as usize, range.end.character as usize)?;
342 let new_end_byte = start_byte + change.text.len();
343
344 Some(Edit { start_byte, old_end_byte, new_end_byte, new_text: change.text.clone() })
345 } else {
346 Some(Edit {
348 start_byte: 0,
349 old_end_byte: old_text.len(),
350 new_end_byte: change.text.len(),
351 new_text: change.text.clone(),
352 })
353 }
354 }
355}
356
357#[derive(Debug)]
359pub struct ReparseResult {
360 pub changed_ranges: Vec<Range<usize>>,
361 pub diagnostics: Vec<Diagnostic>,
362 pub reparsed_bytes: usize,
363}
364
365pub fn apply_edits(state: &mut IncrementalState, edits: &[Edit]) -> Result<ReparseResult> {
367 let mut sorted_edits = edits.to_vec();
369 sorted_edits.sort_by_key(|e| e.start_byte);
370 sorted_edits.reverse(); let total_changed = sorted_edits.iter().map(|e| e.new_text.len()).sum::<usize>();
374
375 const MAX_EDIT_SIZE: usize = 64 * 1024; if total_changed > MAX_EDIT_SIZE {
379 return full_reparse(state);
380 }
381
382 if sorted_edits.len() == 1 {
384 let edit = &sorted_edits[0];
385
386 if edit.new_text.len() > 1024 || edit.new_text.matches('\n').count() > 10 {
388 apply_single_edit(state, edit)?;
389 return full_reparse(state);
390 }
391
392 let reparsed_range = apply_single_edit(state, edit)?;
394 let reparsed_bytes = reparsed_range.end - reparsed_range.start;
395
396 Ok(ReparseResult {
400 changed_ranges: vec![reparsed_range],
401 diagnostics: vec![],
402 reparsed_bytes,
403 })
404 } else {
405 for edit in sorted_edits {
407 apply_single_edit(state, &edit)?;
408 }
409 full_reparse(state)
410 }
411}
412
413fn apply_single_edit(state: &mut IncrementalState, edit: &Edit) -> Result<Range<usize>> {
415 let checkpoint = state
417 .find_lex_checkpoint(edit.start_byte)
418 .copied()
419 .ok_or_else(|| anyhow::anyhow!("No checkpoint found"))?;
420
421 let old_end_byte = edit.old_end_byte.min(state.source.len());
423 let start_byte = edit.start_byte.min(state.source.len());
424
425 let byte_shift: isize = edit.new_text.len() as isize - (old_end_byte - start_byte) as isize;
427
428 let mut new_source = String::with_capacity(
429 state.source.len() - (old_end_byte - start_byte) + edit.new_text.len(),
430 );
431 new_source.push_str(&state.source[..start_byte]);
432 new_source.push_str(&edit.new_text);
433 new_source.push_str(&state.source[old_end_byte..]);
434 state.source = new_source;
435 state.rope = Rope::from_str(&state.source);
436 state.line_index = LineIndex::new(&state.source);
437
438 use perl_lexer::{Checkpointable, LexerCheckpoint, Position};
440 let mut lexer = PerlLexer::new(&state.source);
441 let mut lex_cp = LexerCheckpoint::new();
442 lex_cp.position = checkpoint.byte;
443 lex_cp.mode = checkpoint.mode;
444 lex_cp.current_pos = Position {
445 byte: checkpoint.byte,
446 line: (checkpoint.line + 1) as u32,
447 column: (checkpoint.column + 1) as u32,
448 };
449 lexer.restore(&lex_cp);
450
451 let start_idx =
453 state.tokens.iter().position(|t| t.start >= checkpoint.byte).unwrap_or(state.tokens.len());
454
455 let edit_end_in_new = start_byte + edit.new_text.len();
459
460 let old_sync_start =
462 state.tokens.iter().position(|t| t.start >= old_end_byte).unwrap_or(state.tokens.len());
463
464 let mut new_tokens = Vec::new();
465 let mut last_token_end = checkpoint.byte;
466 let mut synced = false;
467 let mut sync_old_idx = state.tokens.len();
468 loop {
469 match lexer.next_token() {
470 Some(token) => {
471 if token.token_type == TokenType::EOF {
472 break;
473 }
474 last_token_end = token.end;
475
476 if token.start >= edit_end_in_new {
480 let mut found_sync = false;
481 for (idx_offset, old_tok) in state.tokens[old_sync_start..].iter().enumerate() {
482 let shifted_start = (old_tok.start as isize + byte_shift) as usize;
483 let shifted_end = (old_tok.end as isize + byte_shift) as usize;
484 if token.start == shifted_start
485 && token.end == shifted_end
486 && token.token_type == old_tok.token_type
487 {
488 found_sync = true;
490 sync_old_idx = old_sync_start + idx_offset + 1;
491 break;
492 }
493 }
494 new_tokens.push(token);
496 if found_sync {
497 synced = true;
498 break;
499 }
500 } else {
501 new_tokens.push(token);
502 }
503 }
504 None => break,
505 }
506 }
507
508 if synced {
510 for old_tok in &state.tokens[sync_old_idx..] {
511 let mut adjusted = old_tok.clone();
512 adjusted.start = (adjusted.start as isize + byte_shift) as usize;
513 adjusted.end = (adjusted.end as isize + byte_shift) as usize;
514 last_token_end = adjusted.end;
515 new_tokens.push(adjusted);
516 }
517 }
518
519 state.tokens.splice(start_idx.., new_tokens);
520
521 state.lex_checkpoints =
523 IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
524
525 Ok(checkpoint.byte..last_token_end)
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532
533 #[test]
536 fn test_incremental_state_small_edit_uses_checkpoint() -> Result<()> {
537 let mut lines = Vec::with_capacity(30);
541 for i in 0..30usize {
542 lines.push(format!("my $var_{i} = {i};"));
543 }
544 let source = lines.join("\n");
545 let doc_len = source.len();
546
547 let mut state = IncrementalState::new(source.clone());
548
549 assert!(
551 state.lex_checkpoints.len() > 1,
552 "expected multiple lex checkpoints, got {}",
553 state.lex_checkpoints.len()
554 );
555
556 let edit_text = "999";
560 let edit_start = source.rfind("29;").unwrap_or(source.len() - 3);
561 let edit_end = edit_start + 2; let edit = Edit {
564 start_byte: edit_start,
565 old_end_byte: edit_end,
566 new_end_byte: edit_start + edit_text.len(),
567 new_text: edit_text.to_string(),
568 };
569
570 let result = apply_edits(&mut state, &[edit])?;
571
572 assert!(
575 result.reparsed_bytes < doc_len,
576 "incremental reparse should cover less than the full document ({} bytes reparsed, {} doc len)",
577 result.reparsed_bytes,
578 doc_len
579 );
580
581 assert!(state.source.contains("999"), "source must contain the new value after edit");
583 assert!(
584 !state.source.contains("= 29;"),
585 "source must not contain the old value after edit"
586 );
587
588 Ok(())
589 }
590
591 #[test]
593 fn test_incremental_state_large_edit_falls_back_to_full_reparse() -> Result<()> {
594 let source = "my $x = 1;\n".repeat(10);
595 let mut state = IncrementalState::new(source.clone());
596
597 let big_text = "x".repeat(65 * 1024);
599 let edit = Edit {
600 start_byte: 0,
601 old_end_byte: 0,
602 new_end_byte: big_text.len(),
603 new_text: big_text.clone(),
604 };
605
606 let result = apply_edits(&mut state, &[edit])?;
607
608 assert_eq!(
610 result.reparsed_bytes,
611 state.source.len(),
612 "large edit must trigger full reparse, reparsed_bytes should equal doc length"
613 );
614
615 Ok(())
616 }
617}
618
619fn full_reparse(state: &mut IncrementalState) -> Result<ReparseResult> {
621 let mut parser = Parser::new(&state.source);
622 state.ast = match parser.parse() {
623 Ok(ast) => ast,
624 Err(e) => Node::new(
625 NodeKind::Error {
626 message: e.to_string(),
627 expected: vec![],
628 found: None,
629 partial: None,
630 },
631 SourceLocation { start: 0, end: state.source.len() },
632 ),
633 };
634
635 let mut lexer = PerlLexer::new(&state.source);
637 let mut tokens = Vec::new();
638 loop {
639 match lexer.next_token() {
640 Some(token) => {
641 if token.token_type == TokenType::EOF {
642 break;
643 }
644 tokens.push(token);
645 }
646 None => break,
647 }
648 }
649 state.tokens = tokens;
650
651 state.rope = Rope::from_str(&state.source);
652 state.line_index = LineIndex::new(&state.source);
653
654 state.lex_checkpoints =
656 IncrementalState::create_lex_checkpoints(&state.tokens, &state.line_index);
657 state.parse_checkpoints = IncrementalState::create_parse_checkpoints(&state.ast);
658
659 let diagnostics = vec![];
661
662 Ok(ReparseResult {
663 changed_ranges: vec![0..state.source.len()],
664 diagnostics,
665 reparsed_bytes: state.source.len(),
666 })
667}