use crate::{ast::Node, edit::Edit as OriginalEdit, error::ParseResult, parser::Parser};
use perl_lexer::{CheckpointCache, Checkpointable, LexerCheckpoint, PerlLexer, Token};
use std::collections::HashMap;
pub struct CheckpointedIncrementalParser {
source: String,
tree: Option<Node>,
checkpoint_cache: CheckpointCache,
token_cache: TokenCache,
stats: IncrementalStats,
}
struct TokenCache {
tokens: HashMap<usize, Vec<Token>>,
valid_range: Option<(usize, usize)>,
}
impl TokenCache {
fn new() -> Self {
TokenCache { tokens: HashMap::new(), valid_range: None }
}
fn get_tokens_at(&self, position: usize) -> Option<&[Token]> {
if let Some((start, end)) = self.valid_range {
if position >= start && position < end {
return self.tokens.get(&position).map(|v| v.as_slice());
}
}
None
}
fn cache_tokens(&mut self, start: usize, end: usize, tokens: Vec<Token>) {
self.tokens.clear();
let mut current_pos = start;
let mut token_groups = Vec::new();
let mut current_group = Vec::new();
for token in tokens {
if token.start != current_pos && !current_group.is_empty() {
token_groups.push((current_pos, current_group));
current_group = Vec::new();
current_pos = token.start;
}
current_group.push(token);
}
if !current_group.is_empty() {
token_groups.push((current_pos, current_group));
}
for (pos, tokens) in token_groups {
self.tokens.insert(pos, tokens);
}
self.valid_range = Some((start, end));
}
fn invalidate_range(&mut self, start: usize, end: usize) {
if let Some((valid_start, valid_end)) = self.valid_range {
if start <= valid_end && end >= valid_start {
self.valid_range = None;
self.tokens.clear();
}
}
}
}
#[derive(Debug, Default)]
pub struct IncrementalStats {
pub total_parses: usize,
pub incremental_parses: usize,
pub tokens_reused: usize,
pub tokens_relexed: usize,
pub checkpoints_used: usize,
pub cache_hits: usize,
pub cache_misses: usize,
}
#[derive(Debug, Clone)]
pub struct SimpleEdit {
pub start: usize,
pub end: usize,
pub new_text: String,
}
impl SimpleEdit {
pub fn to_original_edit(&self) -> OriginalEdit {
OriginalEdit::new(
self.start,
self.end,
self.start + self.new_text.len(),
crate::position::Position::new(self.start, 0, 0),
crate::position::Position::new(self.end, 0, 0),
crate::position::Position::new(self.start + self.new_text.len(), 0, 0),
)
}
}
impl Default for CheckpointedIncrementalParser {
fn default() -> Self {
Self::new()
}
}
impl CheckpointedIncrementalParser {
pub fn new() -> Self {
CheckpointedIncrementalParser {
source: String::new(),
tree: None,
checkpoint_cache: CheckpointCache::new(10), token_cache: TokenCache::new(),
stats: IncrementalStats::default(),
}
}
pub fn parse(&mut self, source: String) -> ParseResult<Node> {
self.source = source;
self.stats.total_parses += 1;
let tree = self.parse_with_checkpoints()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn apply_edit(&mut self, edit: &SimpleEdit) -> ParseResult<Node> {
self.stats.total_parses += 1;
self.stats.incremental_parses += 1;
let new_content = &edit.new_text;
self.source.replace_range(edit.start..edit.end, new_content);
self.token_cache.invalidate_range(edit.start, edit.end);
let old_len = edit.end - edit.start;
let new_len = new_content.len();
self.checkpoint_cache.apply_edit(edit.start, old_len, new_len);
let checkpoint = self.checkpoint_cache.find_before(edit.start);
if let Some(checkpoint) = checkpoint {
self.stats.checkpoints_used += 1;
self.reparse_from_checkpoint(checkpoint.clone(), edit)
} else {
self.parse_with_checkpoints()
}
}
fn parse_with_checkpoints(&mut self) -> ParseResult<Node> {
let mut lexer = PerlLexer::new(&self.source);
let mut tokens = Vec::new();
let mut checkpoint_positions = vec![0, 100, 500, 1000, 5000];
let mut position = 0;
while let Some(token) = lexer.next_token() {
if checkpoint_positions.first() == Some(&position) {
checkpoint_positions.remove(0);
let checkpoint = lexer.checkpoint();
self.checkpoint_cache.add(checkpoint);
}
position = token.end;
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
tokens.push(token);
}
if let (Some(first), Some(last)) = (tokens.first(), tokens.last()) {
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, tokens);
}
let mut parser = Parser::new(&self.source);
parser.parse()
}
fn reparse_from_checkpoint(
&mut self,
checkpoint: LexerCheckpoint,
edit: &SimpleEdit,
) -> ParseResult<Node> {
let mut lexer = PerlLexer::new(&self.source);
lexer.restore(&checkpoint);
let mut tokens = Vec::new();
let relex_start = checkpoint.position;
if let Some(cached) = self.token_cache.get_tokens_at(0) {
for token in cached {
if token.end <= relex_start {
tokens.push(token.clone());
self.stats.tokens_reused += 1;
} else {
break;
}
}
}
let relex_end = edit.start + edit.new_text.len() + 100; loop {
if let Some(token) = lexer.next_token() {
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
let token_end = token.end;
tokens.push(token);
self.stats.tokens_relexed += 1;
if token_end >= relex_end {
break;
}
} else {
break;
}
}
let after_edit_pos = edit.start + edit.new_text.len();
if let Some(cached) = self.token_cache.get_tokens_at(after_edit_pos) {
self.stats.cache_hits += 1;
for token in cached {
let shift = edit.new_text.len() as isize - (edit.end - edit.start) as isize;
let mut adjusted_token = token.clone();
adjusted_token.start = (adjusted_token.start as isize + shift) as usize;
adjusted_token.end = (adjusted_token.end as isize + shift) as usize;
tokens.push(adjusted_token);
self.stats.tokens_reused += 1;
}
} else {
self.stats.cache_misses += 1;
while let Some(token) = lexer.next_token() {
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
tokens.push(token);
self.stats.tokens_relexed += 1;
}
}
if let (Some(first), Some(last)) = (tokens.first(), tokens.last()) {
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, tokens);
}
let mut parser = Parser::new(&self.source);
let tree = parser.parse()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn stats(&self) -> &IncrementalStats {
&self.stats
}
pub fn clear_caches(&mut self) {
self.checkpoint_cache.clear();
self.token_cache = TokenCache::new();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::NodeKind;
use perl_tdd_support::must;
#[test]
fn test_checkpoint_incremental_parsing() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 42;\nmy $y = 99;\n".to_string();
let tree1 = must(parser.parse(source));
let edit = SimpleEdit { start: 8, end: 10, new_text: "4242".to_string() };
let tree2 = must(parser.apply_edit(&edit));
let stats = parser.stats();
assert_eq!(stats.total_parses, 2);
assert_eq!(stats.incremental_parses, 1);
assert!(stats.checkpoints_used > 0 || stats.tokens_relexed > 0);
if let (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) =
(&tree1.kind, &tree2.kind)
{
assert_eq!(s1.len(), s2.len());
} else {
unreachable!("Expected program nodes");
}
}
#[test]
fn test_checkpoint_cache_update() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 1;\n".repeat(20);
must(parser.parse(source));
let edit1 = SimpleEdit { start: 8, end: 9, new_text: "42".to_string() };
must(parser.apply_edit(&edit1));
let edit2 = SimpleEdit { start: 20, end: 21, new_text: "99".to_string() };
must(parser.apply_edit(&edit2));
let stats = parser.stats();
assert_eq!(stats.incremental_parses, 2);
assert!(stats.tokens_relexed > 0);
}
}