use perl_lexer::{CheckpointCache, Checkpointable, LexerCheckpoint, PerlLexer};
use perl_parser_core::token_stream::{Token, TokenStream};
use perl_parser_core::{
ast::Node,
edit::Edit as OriginalEdit,
error::{ParseError, ParseResult},
parser::Parser,
};
pub struct CheckpointedIncrementalParser {
source: String,
tree: Option<Node>,
checkpoint_cache: CheckpointCache,
token_cache: TokenCache,
stats: IncrementalStats,
}
#[derive(Debug, Clone)]
struct TokenSegment {
start: usize,
end: usize,
tokens: Vec<Token>,
}
impl TokenSegment {
fn new(start: usize, end: usize, tokens: Vec<Token>) -> Self {
TokenSegment { start, end, tokens }
}
fn overlaps(&self, start: usize, end: usize) -> bool {
self.start < end && self.end > start
}
}
struct TokenCache {
segments: Vec<TokenSegment>,
}
impl TokenCache {
fn new() -> Self {
TokenCache { segments: Vec::new() }
}
fn get_segments_in_range(&self, start: usize, end: usize) -> Vec<TokenSegment> {
self.segments.iter().filter(|seg| seg.overlaps(start, end)).cloned().collect()
}
fn add_segment(&mut self, segment: TokenSegment) {
self.segments.retain(|seg| !seg.overlaps(segment.start, segment.end));
let idx = self.segments.partition_point(|seg| seg.start < segment.start);
self.segments.insert(idx, segment);
}
fn invalidate_range(&mut self, start: usize, end: usize) {
let mut rebuilt_segments = Vec::new();
for segment in &self.segments {
if !segment.overlaps(start, end) {
rebuilt_segments.push(segment.clone());
continue;
}
let before_tokens: Vec<Token> =
segment.tokens.iter().filter(|token| token.end <= start).cloned().collect();
if let (Some(first), Some(last)) = (before_tokens.first(), before_tokens.last()) {
rebuilt_segments.push(TokenSegment::new(first.start, last.end, before_tokens));
}
let after_tokens: Vec<Token> =
segment.tokens.iter().filter(|token| token.start >= end).cloned().collect();
if let (Some(first), Some(last)) = (after_tokens.first(), after_tokens.last()) {
rebuilt_segments.push(TokenSegment::new(first.start, last.end, after_tokens));
}
}
rebuilt_segments.sort_by_key(|segment| segment.start);
self.segments = rebuilt_segments;
}
fn adjust_positions(&mut self, edit_start: usize, old_len: usize, new_len: usize) {
let delta = new_len as isize - old_len as isize;
if delta == 0 {
return;
}
for segment in &mut self.segments {
if segment.start >= edit_start {
segment.start = (segment.start as isize + delta) as usize;
segment.end = (segment.end as isize + delta) as usize;
}
}
}
fn get_tokens_from(&self, position: usize) -> Option<Vec<Token>> {
let mut all_tokens = Vec::new();
for segment in &self.segments {
for token in &segment.tokens {
if token.start >= position {
all_tokens.push(token.clone());
}
}
}
if all_tokens.is_empty() { None } else { Some(all_tokens) }
}
fn get_tokens_before(&self, position: usize) -> Option<Vec<Token>> {
let mut all_tokens = Vec::new();
for segment in &self.segments {
for token in &segment.tokens {
if token.end <= position {
all_tokens.push(token.clone());
}
}
}
if all_tokens.is_empty() { None } else { Some(all_tokens) }
}
fn count_segments_with_tokens_before(&self, position: usize) -> usize {
self.segments
.iter()
.filter(|segment| segment.tokens.iter().any(|token| token.end <= position))
.count()
}
fn count_segments_with_tokens_after(&self, position: usize) -> usize {
self.segments
.iter()
.filter(|segment| segment.tokens.iter().any(|token| token.start >= position))
.count()
}
fn cache_tokens(&mut self, start: usize, end: usize, tokens: Vec<Token>) {
if tokens.is_empty() {
return;
}
let segment = TokenSegment::new(start, end, tokens);
self.add_segment(segment);
}
}
#[derive(Debug, Default)]
pub struct IncrementalStats {
pub total_parses: usize,
pub incremental_parses: usize,
pub tokens_reused: usize,
pub tokens_relexed: usize,
pub checkpoints_used: usize,
pub cache_hits: usize,
pub cache_misses: usize,
pub left_checkpoint_distance: usize,
pub right_checkpoint_distance: usize,
pub bytes_relexed: usize,
pub segments_reused_before: usize,
pub segments_reused_after: usize,
pub segments_invalidated: usize,
pub full_tail_fallbacks: usize,
pub tail_fallback_bytes: usize,
}
impl std::fmt::Display for IncrementalStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Incremental Parsing Statistics:")?;
writeln!(f, " Total parses: {}", self.total_parses)?;
writeln!(f, " Incremental parses: {}", self.incremental_parses)?;
writeln!(f, " Tokens reused: {}", self.tokens_reused)?;
writeln!(f, " Tokens relexed: {}", self.tokens_relexed)?;
writeln!(f, " Checkpoints used: {}", self.checkpoints_used)?;
writeln!(f, " Cache hits: {}", self.cache_hits)?;
writeln!(f, " Cache misses: {}", self.cache_misses)?;
writeln!(f, " Left checkpoint distance: {} bytes", self.left_checkpoint_distance)?;
writeln!(f, " Right checkpoint distance: {} bytes", self.right_checkpoint_distance)?;
writeln!(f, " Bytes relexed: {}", self.bytes_relexed)?;
writeln!(f, " Segments reused before edit: {}", self.segments_reused_before)?;
writeln!(f, " Segments reused after edit: {}", self.segments_reused_after)?;
writeln!(f, " Segments invalidated: {}", self.segments_invalidated)?;
writeln!(f, " Full tail fallbacks: {}", self.full_tail_fallbacks)?;
writeln!(f, " Tail fallback bytes: {}", self.tail_fallback_bytes)?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct SimpleEdit {
pub start: usize,
pub end: usize,
pub new_text: String,
}
impl SimpleEdit {
pub fn to_original_edit(&self) -> OriginalEdit {
OriginalEdit::new(
self.start,
self.end,
self.start + self.new_text.len(),
perl_parser_core::position::Position::new(self.start, 0, 0),
perl_parser_core::position::Position::new(self.end, 0, 0),
perl_parser_core::position::Position::new(self.start + self.new_text.len(), 0, 0),
)
}
}
impl Default for CheckpointedIncrementalParser {
fn default() -> Self {
Self::new()
}
}
impl CheckpointedIncrementalParser {
pub fn new() -> Self {
CheckpointedIncrementalParser {
source: String::new(),
tree: None,
checkpoint_cache: CheckpointCache::new(50), token_cache: TokenCache::new(),
stats: IncrementalStats::default(),
}
}
pub fn parse(&mut self, source: String) -> ParseResult<Node> {
self.source = source;
self.stats.total_parses += 1;
let tree = self.parse_with_checkpoints()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn apply_edit(&mut self, edit: &SimpleEdit) -> ParseResult<Node> {
self.validate_edit(edit)?;
self.stats.total_parses += 1;
self.stats.incremental_parses += 1;
let new_content = &edit.new_text;
self.source.replace_range(edit.start..edit.end, new_content);
let invalidated_segments = self.token_cache.get_segments_in_range(edit.start, edit.end);
self.stats.segments_invalidated += invalidated_segments.len();
self.token_cache.invalidate_range(edit.start, edit.end);
let old_len = edit.end - edit.start;
let new_len = new_content.len();
self.checkpoint_cache.apply_edit(edit.start, old_len, new_len);
self.token_cache.adjust_positions(edit.start, old_len, new_len);
let left_checkpoint = self.checkpoint_cache.find_before(edit.start);
let right_checkpoint = self.checkpoint_cache.find_after(edit.start + new_len);
if left_checkpoint.is_some() || right_checkpoint.is_some() {
self.stats.checkpoints_used += 1;
self.reparse_from_checkpoint_two_sided(
left_checkpoint.cloned(),
right_checkpoint.cloned(),
edit,
)
} else {
self.parse_with_checkpoints()
}
}
fn validate_edit(&self, edit: &SimpleEdit) -> ParseResult<()> {
if edit.start > edit.end {
return Err(ParseError::syntax(
format!(
"invalid edit range: start {} is greater than end {}",
edit.start, edit.end
),
edit.start,
));
}
if edit.end > self.source.len() {
return Err(ParseError::syntax(
format!(
"invalid edit range: end {} exceeds document length {}",
edit.end,
self.source.len()
),
edit.end,
));
}
if !self.source.is_char_boundary(edit.start) {
return Err(ParseError::syntax(
format!(
"invalid edit boundary: start {} is not on a UTF-8 character boundary",
edit.start
),
edit.start,
));
}
if !self.source.is_char_boundary(edit.end) {
return Err(ParseError::syntax(
format!(
"invalid edit boundary: end {} is not on a UTF-8 character boundary",
edit.end
),
edit.end,
));
}
Ok(())
}
fn parse_with_checkpoints(&mut self) -> ParseResult<Node> {
self.checkpoint_cache.clear();
self.token_cache = TokenCache::new();
let mut lexer = PerlLexer::new(&self.source);
let mut raw_tokens = Vec::new();
let mut checkpoint_positions = vec![0, 100, 500, 1000, 5000];
let mut position = 0;
while let Some(token) = lexer.next_token() {
if checkpoint_positions.first() == Some(&position) {
checkpoint_positions.remove(0);
let checkpoint = lexer.checkpoint();
self.checkpoint_cache.add(checkpoint);
}
position = token.end;
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
raw_tokens.push(token);
}
let parser_tokens = TokenStream::lexer_tokens_to_parser_tokens(raw_tokens);
if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, parser_tokens);
}
let mut parser = Parser::new(&self.source);
parser.parse()
}
fn reparse_from_checkpoint_two_sided(
&mut self,
left_checkpoint: Option<LexerCheckpoint>,
right_checkpoint: Option<LexerCheckpoint>,
edit: &SimpleEdit,
) -> ParseResult<Node> {
let relex_start = left_checkpoint.as_ref().map(|cp| cp.position).unwrap_or(0);
let relex_end =
right_checkpoint.as_ref().map(|cp| cp.position).unwrap_or(self.source.len());
let edit_end = edit.start + edit.new_text.len();
if edit.start >= relex_start {
self.stats.left_checkpoint_distance = edit.start - relex_start;
}
if relex_end >= edit_end {
self.stats.right_checkpoint_distance = relex_end - edit_end;
}
let mut parser_tokens: Vec<Token> = Vec::new();
let mut newly_lexed_parser_tokens: Vec<Token> = Vec::new();
let segments_before = self.token_cache.count_segments_with_tokens_before(relex_start);
self.stats.segments_reused_before += segments_before;
let cached_before = self.token_cache.get_tokens_before(relex_start);
if relex_start > 0 && cached_before.is_none() {
self.stats.cache_misses += 1;
return self.parse_with_checkpoints();
}
if let Some(cached) = cached_before {
self.stats.cache_hits += 1;
let reused_count = cached.len();
parser_tokens.extend(cached);
self.stats.tokens_reused += reused_count;
}
let mut lexer = PerlLexer::new(&self.source);
if let Some(ref cp) = left_checkpoint {
lexer.restore(cp);
}
let mut raw_relexed: Vec<perl_lexer::Token> = Vec::new();
let mut bytes_relexed_this_phase = 0usize;
loop {
match lexer.next_token() {
Some(token) if matches!(token.token_type, perl_lexer::TokenType::EOF) => break,
Some(token) => {
let token_end = token.end;
let token_start = token.start;
if token_start >= relex_end {
break;
}
raw_relexed.push(token);
self.stats.tokens_relexed += 1;
bytes_relexed_this_phase += token_end - token_start;
if token_end >= relex_end {
break;
}
}
None => break,
}
}
self.stats.bytes_relexed += bytes_relexed_this_phase;
let converted = TokenStream::lexer_tokens_to_parser_tokens(raw_relexed);
newly_lexed_parser_tokens.extend(converted.iter().cloned());
parser_tokens.extend(converted);
let byte_shift: isize = edit.new_text.len() as isize - (edit.end - edit.start) as isize;
if right_checkpoint.is_some() {
let segments_after = self.token_cache.count_segments_with_tokens_after(relex_end);
self.stats.segments_reused_after += segments_after;
if let Some(cached) = self.token_cache.get_tokens_from(relex_end) {
self.stats.cache_hits += 1;
for token in cached {
let adjusted = Token {
kind: token.kind,
text: token.text.clone(),
start: (token.start as isize + byte_shift) as usize,
end: (token.end as isize + byte_shift) as usize,
};
parser_tokens.push(adjusted);
self.stats.tokens_reused += 1;
}
} else {
self.stats.cache_misses += 1;
self.stats.full_tail_fallbacks += 1;
let mut raw_tail: Vec<perl_lexer::Token> = Vec::new();
let mut tail_bytes = 0usize;
while let Some(token) = lexer.next_token() {
if matches!(token.token_type, perl_lexer::TokenType::EOF) {
break;
}
tail_bytes += token.end - token.start;
raw_tail.push(token);
self.stats.tokens_relexed += 1;
}
self.stats.tail_fallback_bytes += tail_bytes;
let tail_converted = TokenStream::lexer_tokens_to_parser_tokens(raw_tail);
newly_lexed_parser_tokens.extend(tail_converted.iter().cloned());
parser_tokens.extend(tail_converted);
}
}
if let (Some(first), Some(last)) =
(newly_lexed_parser_tokens.first(), newly_lexed_parser_tokens.last())
{
let start = first.start;
let end = last.end;
self.token_cache.cache_tokens(start, end, newly_lexed_parser_tokens);
}
let mut parser = Parser::from_tokens(parser_tokens, &self.source);
let tree = parser.parse()?;
self.tree = Some(tree.clone());
Ok(tree)
}
pub fn stats(&self) -> &IncrementalStats {
&self.stats
}
pub fn clear_caches(&mut self) {
self.checkpoint_cache.clear();
self.token_cache = TokenCache::new();
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_parser_core::NodeKind;
use perl_parser_core::token_stream::TokenKind;
use perl_tdd_support::{must, must_some};
#[test]
fn test_checkpoint_incremental_parsing() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 42;\nmy $y = 99;\n".to_string();
let tree1 = must(parser.parse(source));
let edit = SimpleEdit { start: 8, end: 10, new_text: "4242".to_string() };
let tree2 = must(parser.apply_edit(&edit));
let stats = parser.stats();
assert_eq!(stats.total_parses, 2);
assert_eq!(stats.incremental_parses, 1);
assert!(stats.checkpoints_used > 0 || stats.tokens_relexed > 0);
if let (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) =
(&tree1.kind, &tree2.kind)
{
assert_eq!(s1.len(), s2.len());
} else {
unreachable!("Expected program nodes");
}
}
#[test]
fn test_checkpoint_cache_update() {
let mut parser = CheckpointedIncrementalParser::new();
let mut expected_source = "my $x = 1;\n".repeat(20);
must(parser.parse(expected_source.clone()));
let edit1 = SimpleEdit { start: 8, end: 9, new_text: "42".to_string() };
must(parser.apply_edit(&edit1));
expected_source.replace_range(edit1.start..edit1.end, &edit1.new_text);
let checkpoints_after_first = parser.stats().checkpoints_used;
let cache_events_after_first = parser.stats().cache_hits + parser.stats().cache_misses;
let edit2 = SimpleEdit { start: 20, end: 21, new_text: "99".to_string() };
let incremental_tree = must(parser.apply_edit(&edit2));
expected_source.replace_range(edit2.start..edit2.end, &edit2.new_text);
let stats = parser.stats();
assert_eq!(stats.incremental_parses, 2);
assert!(
stats.checkpoints_used > checkpoints_after_first,
"expected second edit to exercise checkpoint bookkeeping, got {stats:?}"
);
assert!(
stats.cache_hits + stats.cache_misses > cache_events_after_first,
"expected second edit to consult cache bookkeeping, got {stats:?}"
);
let mut full = CheckpointedIncrementalParser::new();
let full_tree = must(full.parse(expected_source));
assert_eq!(
format!("{incremental_tree:?}"),
format!("{full_tree:?}"),
"incremental tree diverged from fresh full parse"
);
}
#[test]
fn test_checkpointed_reparse_tracks_cache_or_fallback_path() {
let mut parser = CheckpointedIncrementalParser::new();
let source = format!("my $preamble = {};\n", "1".repeat(5));
must(parser.parse(source.clone()));
let edit_start = source.find('=').unwrap_or(13) + 2; let edit_end = edit_start + 5; let edit = SimpleEdit { start: edit_start, end: edit_end, new_text: "99999".to_string() };
let checkpoints_before = parser.stats().checkpoints_used;
let cache_events_before = parser.stats().cache_hits + parser.stats().cache_misses;
let incremental_tree = must(parser.apply_edit(&edit));
let mut expected_source = source;
expected_source.replace_range(edit.start..edit.end, &edit.new_text);
let stats = parser.stats();
assert_eq!(stats.incremental_parses, 1);
assert!(
stats.checkpoints_used > checkpoints_before,
"expected checkpoint bookkeeping from incremental reparse, got {stats:?}"
);
assert!(
stats.cache_hits + stats.cache_misses > cache_events_before,
"expected cache bookkeeping from incremental reparse or conservative fallback, got {stats:?}"
);
let mut full = CheckpointedIncrementalParser::new();
let full_tree = must(full.parse(expected_source));
assert_eq!(
format!("{incremental_tree:?}"),
format!("{full_tree:?}"),
"incremental tree diverged from fresh full parse"
);
}
#[test]
fn test_full_fallback_rebuilds_checkpoint_cache() {
let source = "my $value = 1;\n".repeat(80);
let edit = SimpleEdit { start: 125, end: 126, new_text: "999".to_string() };
let mut edited_source = source.clone();
edited_source.replace_range(edit.start..edit.end, &edit.new_text);
let mut incremental = CheckpointedIncrementalParser::new();
must(incremental.parse(source));
must(incremental.apply_edit(&edit));
let mut full = CheckpointedIncrementalParser::new();
must(full.parse(edited_source.clone()));
for query in (0..=edited_source.len()).step_by(17) {
let incremental_before =
incremental.checkpoint_cache.find_before(query).map(|cp| cp.position);
let full_before = full.checkpoint_cache.find_before(query).map(|cp| cp.position);
assert_eq!(incremental_before, full_before, "mismatched left checkpoint at {query}");
let incremental_after =
incremental.checkpoint_cache.find_after(query).map(|cp| cp.position);
let full_after = full.checkpoint_cache.find_after(query).map(|cp| cp.position);
assert_eq!(incremental_after, full_after, "mismatched right checkpoint at {query}");
}
}
#[test]
fn test_invalidate_range_splits_segment_and_preserves_non_overlapping_tokens() {
let mut cache = TokenCache::new();
let tokens = vec![
Token::new(TokenKind::Identifier, "a", 0, 10),
Token::new(TokenKind::Identifier, "b", 10, 20),
Token::new(TokenKind::Identifier, "c", 20, 30),
Token::new(TokenKind::Identifier, "d", 30, 40),
];
cache.cache_tokens(0, 40, tokens);
cache.invalidate_range(15, 25);
assert_eq!(cache.segments.len(), 2, "overlap invalidation should split one segment");
assert_eq!(cache.segments[0].start, 0);
assert_eq!(cache.segments[0].end, 10);
assert_eq!(cache.segments[1].start, 30);
assert_eq!(cache.segments[1].end, 40);
}
#[test]
fn test_checkpoint_window_reuses_suffix_without_tail_fallback() {
let mut parser = CheckpointedIncrementalParser::new();
let source = "my $x = 1;\n".repeat(140);
must(parser.parse(source.clone()));
let edit = SimpleEdit { start: 545, end: 546, new_text: "777".to_string() };
let incremental_tree = must(parser.apply_edit(&edit));
let stats = parser.stats();
assert!(stats.segments_reused_before > 0, "expected prefix segment reuse, got {stats:?}");
assert_eq!(
stats.full_tail_fallbacks, 0,
"missing-right-checkpoint path should not be counted as tail fallback, got {stats:?}"
);
assert!(stats.bytes_relexed > 0, "expected bounded relex bytes, got {stats:?}");
assert!(
stats.bytes_relexed <= source.len(),
"relexed bytes should be bounded by source length, got {stats:?}"
);
let mut expected_source = source;
expected_source.replace_range(edit.start..edit.end, &edit.new_text);
let mut full = CheckpointedIncrementalParser::new();
let full_tree = must(full.parse(expected_source));
assert_eq!(
format!("{incremental_tree:?}"),
format!("{full_tree:?}"),
"incremental tree diverged from fresh full parse"
);
}
#[test]
fn test_invalidate_range_non_overlapping_preserves_all_segments() {
let mut cache = TokenCache::new();
let tokens = vec![
Token::new(TokenKind::Identifier, "a", 0, 10),
Token::new(TokenKind::Identifier, "b", 10, 20),
];
cache.cache_tokens(0, 20, tokens);
cache.invalidate_range(30, 50);
assert_eq!(
cache.segments.len(),
1,
"non-overlapping invalidation should leave segment intact"
);
assert_eq!(cache.segments[0].start, 0);
assert_eq!(cache.segments[0].end, 20);
assert_eq!(cache.segments[0].tokens.len(), 2);
}
#[test]
fn test_invalidate_range_entirely_inside_segment_drops_middle_tokens() {
let mut cache = TokenCache::new();
let tokens = vec![
Token::new(TokenKind::Identifier, "a", 0, 5),
Token::new(TokenKind::Identifier, "b", 5, 10),
Token::new(TokenKind::Identifier, "c", 10, 15),
Token::new(TokenKind::Identifier, "d", 15, 20),
];
cache.cache_tokens(0, 20, tokens);
cache.invalidate_range(5, 15);
assert_eq!(cache.segments.len(), 2, "should produce prefix and suffix sub-segments");
assert_eq!(cache.segments[0].start, 0);
assert_eq!(cache.segments[0].end, 5);
assert_eq!(cache.segments[0].tokens.len(), 1);
assert_eq!(cache.segments[1].start, 15);
assert_eq!(cache.segments[1].end, 20);
assert_eq!(cache.segments[1].tokens.len(), 1);
}
#[test]
fn test_adjust_positions_shifts_segment_bounds_not_token_coords() {
let mut cache = TokenCache::new();
let tokens = vec![
Token::new(TokenKind::Identifier, "x", 100, 110),
Token::new(TokenKind::Identifier, "y", 110, 120),
];
cache.cache_tokens(100, 120, tokens);
cache.adjust_positions(50, 0, 5);
assert_eq!(cache.segments[0].start, 105, "segment start should shift by +5");
assert_eq!(cache.segments[0].end, 125, "segment end should shift by +5");
assert_eq!(
cache.segments[0].tokens[0].start, 100,
"token start must NOT be shifted by adjust_positions"
);
assert_eq!(
cache.segments[0].tokens[0].end, 110,
"token end must NOT be shifted by adjust_positions"
);
assert_eq!(
cache.segments[0].tokens[1].start, 110,
"token start must NOT be shifted by adjust_positions"
);
}
#[test]
fn test_apply_edit_rejects_out_of_bounds_range() {
let mut parser = CheckpointedIncrementalParser::new();
must(parser.parse("my $x = 1;\n".to_string()));
let edit = SimpleEdit { start: 0, end: 100, new_text: "2".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "out-of-bounds edit should return an error");
assert!(matches!(result, Err(ParseError::SyntaxError { location: 100, .. })));
}
#[test]
fn test_apply_edit_rejects_non_char_boundary_start() {
let mut parser = CheckpointedIncrementalParser::new();
must(parser.parse("my $x = \"é\";\n".to_string()));
let source = parser.source.clone();
let char_start = must_some(source.find('é'));
let invalid_start = char_start + 1;
let edit =
SimpleEdit { start: invalid_start, end: invalid_start + 1, new_text: "e".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "non-char-boundary edit should return an error");
assert!(matches!(
result,
Err(ParseError::SyntaxError {
location,
message,
}) if location == invalid_start && message.contains("UTF-8 character boundary")
));
}
#[test]
fn test_apply_edit_rejects_non_char_boundary_end() {
let mut parser = CheckpointedIncrementalParser::new();
must(parser.parse("my $x = 1; # \u{1F389}\n".to_string()));
let source = parser.source.clone();
let emoji_pos = must_some(source.find('\u{1F389}'));
let valid_start = emoji_pos;
let invalid_end = emoji_pos + 1;
let edit = SimpleEdit { start: valid_start, end: invalid_end, new_text: "x".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "edit whose end splits a 4-byte codepoint should return an error");
assert!(matches!(
result,
Err(ParseError::SyntaxError { location, .. }) if location == invalid_end
));
}
#[test]
fn test_apply_edit_accepts_full_source_replacement() {
let mut parser = CheckpointedIncrementalParser::new();
let original = "my $x = 1;\n".to_string();
must(parser.parse(original.clone()));
let edit =
SimpleEdit { start: 0, end: original.len(), new_text: "my $y = 2;\n".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_ok(), "full-document replacement should succeed: {result:?}");
}
#[test]
fn test_apply_edit_accepts_empty_insert_at_end() {
let mut parser = CheckpointedIncrementalParser::new();
let original = "my $x = 1;\n".to_string();
must(parser.parse(original.clone()));
let edit = SimpleEdit {
start: original.len(),
end: original.len(),
new_text: "my $y = 2;\n".to_string(),
};
let result = parser.apply_edit(&edit);
assert!(result.is_ok(), "insert-at-end edit should succeed: {result:?}");
}
#[test]
fn test_apply_edit_rejects_three_byte_bmp_boundary() {
let mut parser = CheckpointedIncrementalParser::new();
must(parser.parse("my $cost = 1; # \u{20AC}\n".to_string()));
let source = parser.source.clone();
let euro_pos = must_some(source.find('\u{20AC}'));
let invalid_start = euro_pos + 1; let edit =
SimpleEdit { start: invalid_start, end: invalid_start + 1, new_text: "e".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "edit splitting a 3-byte BMP codepoint should return an error");
assert!(matches!(
result,
Err(ParseError::SyntaxError { location, .. }) if location == invalid_start
));
}
#[test]
fn test_apply_edit_rejects_inverted_range() {
let mut parser = CheckpointedIncrementalParser::new();
must(parser.parse("my $x = 1;\n".to_string()));
let edit = SimpleEdit { start: 5, end: 2, new_text: "z".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "inverted range should return an error");
assert!(matches!(result, Err(ParseError::SyntaxError { location: 5, .. })));
}
#[test]
fn test_apply_edit_accepts_insert_into_empty_source() {
let mut parser = CheckpointedIncrementalParser::new();
let edit = SimpleEdit { start: 0, end: 0, new_text: "my $x = 1;\n".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_ok(), "insert into empty source should succeed: {result:?}");
}
#[test]
fn test_apply_edit_rejects_nonzero_range_on_empty_source() {
let mut parser = CheckpointedIncrementalParser::new();
let edit = SimpleEdit { start: 0, end: 1, new_text: "x".to_string() };
let result = parser.apply_edit(&edit);
assert!(result.is_err(), "end=1 on empty source should be rejected");
assert!(matches!(result, Err(ParseError::SyntaxError { location: 1, .. })));
}
}