perl_incremental_parsing/incremental/
incremental_checkpoint.rs1use crate::{ast::Node, edit::Edit as OriginalEdit, error::ParseResult, parser::Parser};
18use perl_lexer::{CheckpointCache, Checkpointable, LexerCheckpoint, PerlLexer};
19use perl_parser_core::token_stream::{Token, TokenStream};
20
21pub struct CheckpointedIncrementalParser {
23 source: String,
25 tree: Option<Node>,
27 checkpoint_cache: CheckpointCache,
29 token_cache: TokenCache,
31 stats: IncrementalStats,
33}
34
35struct TokenCache {
40 tokens: Vec<Token>,
42 valid_range: Option<(usize, usize)>,
44}
45
46impl TokenCache {
47 fn new() -> Self {
48 TokenCache { tokens: Vec::new(), valid_range: None }
49 }
50
51 fn get_tokens_from(&self, position: usize) -> Option<&[Token]> {
56 let (valid_start, valid_end) = self.valid_range?;
57 if position < valid_start || position >= valid_end {
58 return None;
59 }
60 let idx = self.tokens.partition_point(|t| t.start < position);
61 Some(&self.tokens[idx..])
62 }
63
64 fn get_tokens_before(&self, position: usize) -> Option<&[Token]> {
66 let (valid_start, _valid_end) = self.valid_range?;
67 if self.tokens.is_empty() || valid_start >= position {
68 return None;
69 }
70 let idx = self.tokens.partition_point(|t| t.end <= position);
71 if idx == 0 { None } else { Some(&self.tokens[..idx]) }
72 }
73
74 fn cache_tokens(&mut self, start: usize, end: usize, tokens: Vec<Token>) {
76 self.tokens = tokens;
77 self.valid_range = Some((start, end));
78 }
79
80 fn invalidate_range(&mut self, start: usize, end: usize) {
82 if let Some((valid_start, valid_end)) = self.valid_range {
83 if start <= valid_end && end >= valid_start {
84 self.valid_range = None;
85 self.tokens.clear();
86 }
87 }
88 }
89}
90
91#[derive(Debug, Default)]
93pub struct IncrementalStats {
94 pub total_parses: usize,
95 pub incremental_parses: usize,
96 pub tokens_reused: usize,
97 pub tokens_relexed: usize,
98 pub checkpoints_used: usize,
99 pub cache_hits: usize,
100 pub cache_misses: usize,
101}
102
103#[derive(Debug, Clone)]
105pub struct SimpleEdit {
106 pub start: usize,
107 pub end: usize,
108 pub new_text: String,
109}
110
111impl SimpleEdit {
112 pub fn to_original_edit(&self) -> OriginalEdit {
114 OriginalEdit::new(
116 self.start,
117 self.end,
118 self.start + self.new_text.len(),
119 crate::position::Position::new(self.start, 0, 0),
120 crate::position::Position::new(self.end, 0, 0),
121 crate::position::Position::new(self.start + self.new_text.len(), 0, 0),
122 )
123 }
124}
125
126impl Default for CheckpointedIncrementalParser {
127 fn default() -> Self {
128 Self::new()
129 }
130}
131
132impl CheckpointedIncrementalParser {
133 pub fn new() -> Self {
135 CheckpointedIncrementalParser {
136 source: String::new(),
137 tree: None,
138 checkpoint_cache: CheckpointCache::new(50), token_cache: TokenCache::new(),
140 stats: IncrementalStats::default(),
141 }
142 }
143
144 pub fn parse(&mut self, source: String) -> ParseResult<Node> {
146 self.source = source;
147 self.stats.total_parses += 1;
148
149 let tree = self.parse_with_checkpoints()?;
151 self.tree = Some(tree.clone());
152
153 Ok(tree)
154 }
155
156 pub fn apply_edit(&mut self, edit: &SimpleEdit) -> ParseResult<Node> {
158 self.stats.total_parses += 1;
159 self.stats.incremental_parses += 1;
160
161 let new_content = &edit.new_text;
163 self.source.replace_range(edit.start..edit.end, new_content);
164
165 self.token_cache.invalidate_range(edit.start, edit.end);
167
168 let old_len = edit.end - edit.start;
170 let new_len = new_content.len();
171 self.checkpoint_cache.apply_edit(edit.start, old_len, new_len);
172
173 let checkpoint = self.checkpoint_cache.find_before(edit.start);
175
176 if let Some(checkpoint) = checkpoint {
177 self.stats.checkpoints_used += 1;
178 self.reparse_from_checkpoint(checkpoint.clone(), edit)
179 } else {
180 self.parse_with_checkpoints()
182 }
183 }
184
185 fn parse_with_checkpoints(&mut self) -> ParseResult<Node> {
191 let mut lexer = PerlLexer::new(&self.source);
192 let mut raw_tokens = Vec::new();
193 let mut checkpoint_positions = vec![0, 100, 500, 1000, 5000];
194
195 let mut position = 0;
197 while let Some(token) = lexer.next_token() {
198 if checkpoint_positions.first() == Some(&position) {
200 checkpoint_positions.remove(0);
201 let checkpoint = lexer.checkpoint();
202 self.checkpoint_cache.add(checkpoint);
203 }
204
205 position = token.end;
206
207 if matches!(token.token_type, perl_lexer::TokenType::EOF) {
209 break;
210 }
211
212 raw_tokens.push(token);
213 }
214
215 let parser_tokens = TokenStream::lexer_tokens_to_parser_tokens(raw_tokens);
218
219 if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
220 let start = first.start;
221 let end = last.end;
222 self.token_cache.cache_tokens(start, end, parser_tokens);
223 }
224
225 let mut parser = Parser::new(&self.source);
229 parser.parse()
230 }
231
232 fn reparse_from_checkpoint(
239 &mut self,
240 checkpoint: LexerCheckpoint,
241 edit: &SimpleEdit,
242 ) -> ParseResult<Node> {
243 let mut lexer = PerlLexer::new(&self.source);
246 lexer.restore(&checkpoint);
247
248 let relex_start = checkpoint.position;
249 let mut parser_tokens: Vec<Token> = Vec::new();
250
251 if let Some(cached) = self.token_cache.get_tokens_before(relex_start) {
253 parser_tokens.extend_from_slice(cached);
254 self.stats.tokens_reused += cached.len();
255 }
256
257 let relex_end = edit.start + edit.new_text.len() + 100; let mut raw_relexed: Vec<perl_lexer::Token> = Vec::new();
260 loop {
261 match lexer.next_token() {
262 Some(token) if matches!(token.token_type, perl_lexer::TokenType::EOF) => break,
263 Some(token) => {
264 let token_end = token.end;
265 raw_relexed.push(token);
266 self.stats.tokens_relexed += 1;
267 if token_end >= relex_end {
268 break;
269 }
270 }
271 None => break,
272 }
273 }
274 let converted = TokenStream::lexer_tokens_to_parser_tokens(raw_relexed);
275 parser_tokens.extend(converted);
276
277 let after_edit_pos = edit.start + edit.new_text.len();
279 let byte_shift: isize = edit.new_text.len() as isize - (edit.end - edit.start) as isize;
280
281 if let Some(cached) = self.token_cache.get_tokens_from(after_edit_pos) {
282 self.stats.cache_hits += 1;
283 for token in cached {
284 let adjusted = Token {
286 kind: token.kind,
287 text: token.text.clone(),
288 start: (token.start as isize + byte_shift) as usize,
289 end: (token.end as isize + byte_shift) as usize,
290 };
291 parser_tokens.push(adjusted);
292 self.stats.tokens_reused += 1;
293 }
294 } else {
295 self.stats.cache_misses += 1;
296 let mut raw_tail: Vec<perl_lexer::Token> = Vec::new();
298 while let Some(token) = lexer.next_token() {
299 if matches!(token.token_type, perl_lexer::TokenType::EOF) {
300 break;
301 }
302 raw_tail.push(token);
303 self.stats.tokens_relexed += 1;
304 }
305 parser_tokens.extend(TokenStream::lexer_tokens_to_parser_tokens(raw_tail));
306 }
307
308 if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
310 let start = first.start;
311 let end = last.end;
312 self.token_cache.cache_tokens(start, end, parser_tokens.clone());
313 }
314
315 let mut parser = Parser::from_tokens(parser_tokens, &self.source);
317 let tree = parser.parse()?;
318 self.tree = Some(tree.clone());
319
320 Ok(tree)
321 }
322
323 pub fn stats(&self) -> &IncrementalStats {
325 &self.stats
326 }
327
328 pub fn clear_caches(&mut self) {
330 self.checkpoint_cache.clear();
331 self.token_cache = TokenCache::new();
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use crate::NodeKind;
339 use perl_tdd_support::must;
340
341 #[test]
342 fn test_checkpoint_incremental_parsing() {
343 let mut parser = CheckpointedIncrementalParser::new();
344
345 let source = "my $x = 42;\nmy $y = 99;\n".to_string();
347 let tree1 = must(parser.parse(source));
348
349 let edit = SimpleEdit { start: 8, end: 10, new_text: "4242".to_string() };
351
352 let tree2 = must(parser.apply_edit(&edit));
353
354 let stats = parser.stats();
356 assert_eq!(stats.total_parses, 2);
357 assert_eq!(stats.incremental_parses, 1);
358 assert!(stats.checkpoints_used > 0 || stats.tokens_relexed > 0);
359
360 if let (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) =
362 (&tree1.kind, &tree2.kind)
363 {
364 assert_eq!(s1.len(), s2.len());
365 } else {
366 unreachable!("Expected program nodes");
367 }
368 }
369
370 #[test]
371 fn test_checkpoint_cache_update() {
372 let mut parser = CheckpointedIncrementalParser::new();
373
374 let source = "my $x = 1;\n".repeat(20);
376 must(parser.parse(source));
377
378 let edit1 = SimpleEdit { start: 8, end: 9, new_text: "42".to_string() };
380 must(parser.apply_edit(&edit1));
381
382 let edit2 = SimpleEdit { start: 20, end: 21, new_text: "99".to_string() };
383 must(parser.apply_edit(&edit2));
384
385 let stats = parser.stats();
386 assert_eq!(stats.incremental_parses, 2);
387 assert!(stats.tokens_relexed > 0);
388 }
389
390 #[test]
391 fn test_from_tokens_used_in_reparse() {
392 let mut parser = CheckpointedIncrementalParser::new();
396
397 let source = format!("my $preamble = {};\n", "1".repeat(5));
400 must(parser.parse(source.clone()));
401
402 let edit_start = source.find('=').unwrap_or(13) + 2; let edit_end = edit_start + 5; let edit = SimpleEdit { start: edit_start, end: edit_end, new_text: "99999".to_string() };
406
407 must(parser.apply_edit(&edit));
408
409 let stats = parser.stats();
410 assert_eq!(stats.incremental_parses, 1);
411 assert!(
413 stats.tokens_relexed > 0 || stats.tokens_reused > 0,
414 "expected either reused or relexed tokens, got {:?}",
415 stats
416 );
417 }
418}