use crate::{ast::Node, edit::Edit as OriginalEdit, error::ParseResult, parser::Parser};
use perl_lexer::{CheckpointCache, Checkpointable, LexerCheckpoint, PerlLexer};
use perl_parser_core::token_stream::{Token, TokenStream};
pub struct CheckpointedIncrementalParser {
source: String,
tree: Option<Node>,
checkpoint_cache: CheckpointCache,
token_cache: TokenCache,
stats: IncrementalStats,
}
struct TokenCache {
tokens: Vec<Token>,
valid_range: Option<(usize, usize)>,
}
impl TokenCache {
fn new() -> Self {
TokenCache { tokens: Vec::new(), valid_range: None }
}
fn get_tokens_from(&self, position: usize) -> Option<&[Token]> {
let (valid_start, valid_end) = self.valid_range?;
if position < valid_start || position >= valid_end {
return None;
}
let idx = self.tokens.partition_point(|t| t.start < position);
Some(&self.tokens[idx..])
}
fn get_tokens_before(&self, position: usize) -> Option<&[Token]> {
let (valid_start, _valid_end) = self.valid_range?;
if self.tokens.is_empty() || valid_start >= position {
return None;
}
let idx = self.tokens.partition_point(|t| t.end <= position);
if idx == 0 { None } else { Some(&self.tokens[..idx]) }
}
fn cache_tokens(&mut self, start: usize, end: usize, tokens: Vec<Token>) {
self.tokens = tokens;
self.valid_range = Some((start, end));
}
fn invalidate_range(&mut self, start: usize, end: usize) {
if let Some((valid_start, valid_end)) = self.valid_range {
if start <= valid_end && end >= valid_start {
self.valid_range = None;
self.tokens.clear();
}
}
}
}
#[derive(Debug, Default)]
pub struct IncrementalStats {
pub total_parses: usize,
pub incremental_parses: usize,
pub tokens_reused: usize,
pub tokens_relexed: usize,
pub checkpoints_used: usize,
pub cache_hits: usize,
pub cache_misses: usize,
}
#[derive(Debug, Clone)]
pub struct SimpleEdit {
pub start: usize,
pub end: usize,
pub new_text: String,
}
impl SimpleEdit {
pub fn to_original_edit(&self) -> OriginalEdit {
OriginalEdit::new(
self.start,
self.end,
self.start + self.new_text.len(),
crate::position::Position::new(self.start, 0, 0),
crate::position::Position::new(self.end, 0, 0),
crate::position::Position::new(self.start + self.new_text.len(), 0, 0),
)
}
}
impl Default for CheckpointedIncrementalParser {
fn default() -> Self {
Self::new()
}
}
impl CheckpointedIncrementalParser {
pub fn new() -> Self {
CheckpointedIncrementalParser {
source: String::new(),
tree: None,
checkpoint_cache: CheckpointCache::new(50), token_cache: TokenCache::new(),
stats: IncrementalStats::default(),
}
}
pub fn parse(&mut self, source: String) -> ParseResult<Node> {
self.source = source;
self.stats.total_parses += 1;
let tree = self.parse_with_checkpoints()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn apply_edit(&mut self, edit: &SimpleEdit) -> ParseResult<Node> {
self.stats.total_parses += 1;
self.stats.incremental_parses += 1;
let new_content = &edit.new_text;
self.source.replace_range(edit.start..edit.end, new_content);
self.token_cache.invalidate_range(edit.start, edit.end);
let old_len = edit.end - edit.start;
let new_len = new_content.len();
self.checkpoint_cache.apply_edit(edit.start, old_len, new_len);
let checkpoint = self.checkpoint_cache.find_before(edit.start);
if let Some(checkpoint) = checkpoint {
self.stats.checkpoints_used += 1;
self.reparse_from_checkpoint(checkpoint.clone(), edit)
} else {
self.parse_with_checkpoints()
}
}
fn parse_with_checkpoints(&mut self) -> ParseResult<Node> {
let mut lexer = PerlLexer::new(&self.source);
let mut raw_tokens = Vec::new();
let mut checkpoint_positions = vec![0, 100, 500, 1000, 5000];
let mut position = 0;
while let Some(token) = lexer.next_token() {
if checkpoint_positions.first() == Some(&position) {
checkpoint_positions.remove(0);
let checkpoint = lexer.checkpoint();
self.checkpoint_cache.add(checkpoint);
}
position = token.end;
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
raw_tokens.push(token);
}
let parser_tokens = TokenStream::lexer_tokens_to_parser_tokens(raw_tokens);
if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, parser_tokens);
}
let mut parser = Parser::new(&self.source);
parser.parse()
}
fn reparse_from_checkpoint(
&mut self,
checkpoint: LexerCheckpoint,
edit: &SimpleEdit,
) -> ParseResult<Node> {
let mut lexer = PerlLexer::new(&self.source);
lexer.restore(&checkpoint);
let relex_start = checkpoint.position;
let mut parser_tokens: Vec<Token> = Vec::new();
if let Some(cached) = self.token_cache.get_tokens_before(relex_start) {
parser_tokens.extend_from_slice(cached);
self.stats.tokens_reused += cached.len();
}
let relex_end = edit.start + edit.new_text.len() + 100; let mut raw_relexed: Vec<perl_lexer::Token> = Vec::new();
loop {
match lexer.next_token() {
Some(token) if matches!(token.token_type, perl_lexer::TokenType::EOF) => break,
Some(token) => {
let token_end = token.end;
raw_relexed.push(token);
self.stats.tokens_relexed += 1;
if token_end >= relex_end {
break;
}
}
None => break,
}
}
let converted = TokenStream::lexer_tokens_to_parser_tokens(raw_relexed);
parser_tokens.extend(converted);
let after_edit_pos = edit.start + edit.new_text.len();
let byte_shift: isize = edit.new_text.len() as isize - (edit.end - edit.start) as isize;
if let Some(cached) = self.token_cache.get_tokens_from(after_edit_pos) {
self.stats.cache_hits += 1;
for token in cached {
let adjusted = Token {
kind: token.kind,
text: token.text.clone(),
start: (token.start as isize + byte_shift) as usize,
end: (token.end as isize + byte_shift) as usize,
};
parser_tokens.push(adjusted);
self.stats.tokens_reused += 1;
}
} else {
self.stats.cache_misses += 1;
let mut raw_tail: Vec<perl_lexer::Token> = Vec::new();
while let Some(token) = lexer.next_token() {
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
raw_tail.push(token);
self.stats.tokens_relexed += 1;
}
parser_tokens.extend(TokenStream::lexer_tokens_to_parser_tokens(raw_tail));
}
if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, parser_tokens.clone());
}
let mut parser = Parser::from_tokens(parser_tokens, &self.source);
let tree = parser.parse()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn stats(&self) -> &IncrementalStats {
&self.stats
}
pub fn clear_caches(&mut self) {
self.checkpoint_cache.clear();
self.token_cache = TokenCache::new();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::NodeKind;
use perl_tdd_support::must;
#[test]
fn test_checkpoint_incremental_parsing() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 42;\nmy $y = 99;\n".to_string();
let tree1 = must(parser.parse(source));
let edit = SimpleEdit { start: 8, end: 10, new_text: "4242".to_string() };
let tree2 = must(parser.apply_edit(&edit));
let stats = parser.stats();
assert_eq!(stats.total_parses, 2);
assert_eq!(stats.incremental_parses, 1);
assert!(stats.checkpoints_used > 0 || stats.tokens_relexed > 0);
if let (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) =
(&tree1.kind, &tree2.kind)
{
assert_eq!(s1.len(), s2.len());
} else {
unreachable!("Expected program nodes");
}
}
#[test]
fn test_checkpoint_cache_update() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 1;\n".repeat(20);
must(parser.parse(source));
let edit1 = SimpleEdit { start: 8, end: 9, new_text: "42".to_string() };
must(parser.apply_edit(&edit1));
let edit2 = SimpleEdit { start: 20, end: 21, new_text: "99".to_string() };
must(parser.apply_edit(&edit2));
let stats = parser.stats();
assert_eq!(stats.incremental_parses, 2);
assert!(stats.tokens_relexed > 0);
}
#[test]
fn test_from_tokens_used_in_reparse() {
let mut parser = CheckpointedIncrementalParser::new();
let source = format!("my $preamble = {};\n", "1".repeat(5));
must(parser.parse(source.clone()));
let edit_start = source.find('=').unwrap_or(13) + 2; let edit_end = edit_start + 5; let edit = SimpleEdit { start: edit_start, end: edit_end, new_text: "99999".to_string() };
must(parser.apply_edit(&edit));
let stats = parser.stats();
assert_eq!(stats.incremental_parses, 1);
assert!(
stats.tokens_relexed > 0 || stats.tokens_reused > 0,
"expected either reused or relexed tokens, got {:?}",
stats
);
}
}