1use perl_lexer::{CheckpointCache, Checkpointable, LexerCheckpoint, PerlLexer};
63use perl_parser_core::token_stream::{Token, TokenStream};
64use perl_parser_core::{
65 ast::Node,
66 edit::Edit as OriginalEdit,
67 error::{ParseError, ParseResult},
68 parser::Parser,
69};
70
71pub struct CheckpointedIncrementalParser {
73 source: String,
75 tree: Option<Node>,
77 checkpoint_cache: CheckpointCache,
79 token_cache: TokenCache,
81 stats: IncrementalStats,
83}
84
85#[derive(Debug, Clone)]
90struct TokenSegment {
91 start: usize,
93 end: usize,
95 tokens: Vec<Token>,
97}
98
99impl TokenSegment {
100 fn new(start: usize, end: usize, tokens: Vec<Token>) -> Self {
102 TokenSegment { start, end, tokens }
103 }
104
105 fn overlaps(&self, start: usize, end: usize) -> bool {
107 self.start < end && self.end > start
108 }
109}
110
111struct TokenCache {
124 segments: Vec<TokenSegment>,
126}
127
128impl TokenCache {
129 fn new() -> Self {
131 TokenCache { segments: Vec::new() }
132 }
133
134 fn get_segments_in_range(&self, start: usize, end: usize) -> Vec<TokenSegment> {
148 self.segments.iter().filter(|seg| seg.overlaps(start, end)).cloned().collect()
149 }
150
151 fn add_segment(&mut self, segment: TokenSegment) {
161 self.segments.retain(|seg| !seg.overlaps(segment.start, segment.end));
163
164 let idx = self.segments.partition_point(|seg| seg.start < segment.start);
166
167 self.segments.insert(idx, segment);
168 }
169
170 fn invalidate_range(&mut self, start: usize, end: usize) {
181 let mut rebuilt_segments = Vec::new();
182
183 for segment in &self.segments {
184 if !segment.overlaps(start, end) {
185 rebuilt_segments.push(segment.clone());
186 continue;
187 }
188
189 let before_tokens: Vec<Token> =
190 segment.tokens.iter().filter(|token| token.end <= start).cloned().collect();
191 if let (Some(first), Some(last)) = (before_tokens.first(), before_tokens.last()) {
192 rebuilt_segments.push(TokenSegment::new(first.start, last.end, before_tokens));
193 }
194
195 let after_tokens: Vec<Token> =
196 segment.tokens.iter().filter(|token| token.start >= end).cloned().collect();
197 if let (Some(first), Some(last)) = (after_tokens.first(), after_tokens.last()) {
198 rebuilt_segments.push(TokenSegment::new(first.start, last.end, after_tokens));
199 }
200 }
201
202 rebuilt_segments.sort_by_key(|segment| segment.start);
203 self.segments = rebuilt_segments;
204 }
205
206 fn adjust_positions(&mut self, edit_start: usize, old_len: usize, new_len: usize) {
242 let delta = new_len as isize - old_len as isize;
243
244 if delta == 0 {
245 return;
246 }
247
248 for segment in &mut self.segments {
249 if segment.start >= edit_start {
250 segment.start = (segment.start as isize + delta) as usize;
251 segment.end = (segment.end as isize + delta) as usize;
252 }
253 }
254 }
255
256 fn get_tokens_from(&self, position: usize) -> Option<Vec<Token>> {
270 let mut all_tokens = Vec::new();
271 for segment in &self.segments {
272 for token in &segment.tokens {
273 if token.start >= position {
274 all_tokens.push(token.clone());
275 }
276 }
277 }
278
279 if all_tokens.is_empty() { None } else { Some(all_tokens) }
280 }
281
282 fn get_tokens_before(&self, position: usize) -> Option<Vec<Token>> {
296 let mut all_tokens = Vec::new();
297 for segment in &self.segments {
298 for token in &segment.tokens {
299 if token.end <= position {
300 all_tokens.push(token.clone());
301 }
302 }
303 }
304
305 if all_tokens.is_empty() { None } else { Some(all_tokens) }
306 }
307
308 fn count_segments_with_tokens_before(&self, position: usize) -> usize {
309 self.segments
310 .iter()
311 .filter(|segment| segment.tokens.iter().any(|token| token.end <= position))
312 .count()
313 }
314
315 fn count_segments_with_tokens_after(&self, position: usize) -> usize {
316 self.segments
317 .iter()
318 .filter(|segment| segment.tokens.iter().any(|token| token.start >= position))
319 .count()
320 }
321
322 fn cache_tokens(&mut self, start: usize, end: usize, tokens: Vec<Token>) {
334 if tokens.is_empty() {
335 return;
336 }
337
338 let segment = TokenSegment::new(start, end, tokens);
339 self.add_segment(segment);
340 }
341}
342
343#[derive(Debug, Default)]
345pub struct IncrementalStats {
346 pub total_parses: usize,
347 pub incremental_parses: usize,
348 pub tokens_reused: usize,
349 pub tokens_relexed: usize,
350 pub checkpoints_used: usize,
351 pub cache_hits: usize,
352 pub cache_misses: usize,
353 pub left_checkpoint_distance: usize,
355 pub right_checkpoint_distance: usize,
357 pub bytes_relexed: usize,
359 pub segments_reused_before: usize,
361 pub segments_reused_after: usize,
363 pub segments_invalidated: usize,
365 pub full_tail_fallbacks: usize,
367 pub tail_fallback_bytes: usize,
369}
370
371impl std::fmt::Display for IncrementalStats {
372 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
373 writeln!(f, "Incremental Parsing Statistics:")?;
374 writeln!(f, " Total parses: {}", self.total_parses)?;
375 writeln!(f, " Incremental parses: {}", self.incremental_parses)?;
376 writeln!(f, " Tokens reused: {}", self.tokens_reused)?;
377 writeln!(f, " Tokens relexed: {}", self.tokens_relexed)?;
378 writeln!(f, " Checkpoints used: {}", self.checkpoints_used)?;
379 writeln!(f, " Cache hits: {}", self.cache_hits)?;
380 writeln!(f, " Cache misses: {}", self.cache_misses)?;
381 writeln!(f, " Left checkpoint distance: {} bytes", self.left_checkpoint_distance)?;
382 writeln!(f, " Right checkpoint distance: {} bytes", self.right_checkpoint_distance)?;
383 writeln!(f, " Bytes relexed: {}", self.bytes_relexed)?;
384 writeln!(f, " Segments reused before edit: {}", self.segments_reused_before)?;
385 writeln!(f, " Segments reused after edit: {}", self.segments_reused_after)?;
386 writeln!(f, " Segments invalidated: {}", self.segments_invalidated)?;
387 writeln!(f, " Full tail fallbacks: {}", self.full_tail_fallbacks)?;
388 writeln!(f, " Tail fallback bytes: {}", self.tail_fallback_bytes)?;
389 Ok(())
390 }
391}
392
393#[derive(Debug, Clone)]
395pub struct SimpleEdit {
396 pub start: usize,
397 pub end: usize,
398 pub new_text: String,
399}
400
401impl SimpleEdit {
402 pub fn to_original_edit(&self) -> OriginalEdit {
404 OriginalEdit::new(
406 self.start,
407 self.end,
408 self.start + self.new_text.len(),
409 perl_parser_core::position::Position::new(self.start, 0, 0),
410 perl_parser_core::position::Position::new(self.end, 0, 0),
411 perl_parser_core::position::Position::new(self.start + self.new_text.len(), 0, 0),
412 )
413 }
414}
415
416impl Default for CheckpointedIncrementalParser {
417 fn default() -> Self {
418 Self::new()
419 }
420}
421
422impl CheckpointedIncrementalParser {
423 pub fn new() -> Self {
425 CheckpointedIncrementalParser {
426 source: String::new(),
427 tree: None,
428 checkpoint_cache: CheckpointCache::new(50), token_cache: TokenCache::new(),
430 stats: IncrementalStats::default(),
431 }
432 }
433
434 pub fn parse(&mut self, source: String) -> ParseResult<Node> {
436 self.source = source;
437 self.stats.total_parses += 1;
438
439 let tree = self.parse_with_checkpoints()?;
441 self.tree = Some(tree.clone());
442
443 Ok(tree)
444 }
445
446 pub fn apply_edit(&mut self, edit: &SimpleEdit) -> ParseResult<Node> {
448 self.validate_edit(edit)?;
449
450 self.stats.total_parses += 1;
451 self.stats.incremental_parses += 1;
452
453 let new_content = &edit.new_text;
455 self.source.replace_range(edit.start..edit.end, new_content);
456
457 let invalidated_segments = self.token_cache.get_segments_in_range(edit.start, edit.end);
459 self.stats.segments_invalidated += invalidated_segments.len();
460
461 self.token_cache.invalidate_range(edit.start, edit.end);
463
464 let old_len = edit.end - edit.start;
466 let new_len = new_content.len();
467 self.checkpoint_cache.apply_edit(edit.start, old_len, new_len);
468
469 self.token_cache.adjust_positions(edit.start, old_len, new_len);
471
472 let left_checkpoint = self.checkpoint_cache.find_before(edit.start);
474 let right_checkpoint = self.checkpoint_cache.find_after(edit.start + new_len);
475
476 if left_checkpoint.is_some() || right_checkpoint.is_some() {
477 self.stats.checkpoints_used += 1;
478 self.reparse_from_checkpoint_two_sided(
479 left_checkpoint.cloned(),
480 right_checkpoint.cloned(),
481 edit,
482 )
483 } else {
484 self.parse_with_checkpoints()
486 }
487 }
488
489 fn validate_edit(&self, edit: &SimpleEdit) -> ParseResult<()> {
492 if edit.start > edit.end {
493 return Err(ParseError::syntax(
494 format!(
495 "invalid edit range: start {} is greater than end {}",
496 edit.start, edit.end
497 ),
498 edit.start,
499 ));
500 }
501
502 if edit.end > self.source.len() {
503 return Err(ParseError::syntax(
504 format!(
505 "invalid edit range: end {} exceeds document length {}",
506 edit.end,
507 self.source.len()
508 ),
509 edit.end,
510 ));
511 }
512
513 if !self.source.is_char_boundary(edit.start) {
514 return Err(ParseError::syntax(
515 format!(
516 "invalid edit boundary: start {} is not on a UTF-8 character boundary",
517 edit.start
518 ),
519 edit.start,
520 ));
521 }
522
523 if !self.source.is_char_boundary(edit.end) {
524 return Err(ParseError::syntax(
525 format!(
526 "invalid edit boundary: end {} is not on a UTF-8 character boundary",
527 edit.end
528 ),
529 edit.end,
530 ));
531 }
532
533 Ok(())
534 }
535
536 fn parse_with_checkpoints(&mut self) -> ParseResult<Node> {
542 self.checkpoint_cache.clear();
546 self.token_cache = TokenCache::new();
547
548 let mut lexer = PerlLexer::new(&self.source);
549 let mut raw_tokens = Vec::new();
550 let mut checkpoint_positions = vec![0, 100, 500, 1000, 5000];
551
552 let mut position = 0;
554 while let Some(token) = lexer.next_token() {
555 if checkpoint_positions.first() == Some(&position) {
557 checkpoint_positions.remove(0);
558 let checkpoint = lexer.checkpoint();
559 self.checkpoint_cache.add(checkpoint);
560 }
561
562 position = token.end;
563
564 if matches!(token.token_type, perl_lexer::TokenType::EOF) {
566 break;
567 }
568
569 raw_tokens.push(token);
570 }
571
572 let parser_tokens = TokenStream::lexer_tokens_to_parser_tokens(raw_tokens);
575
576 if let (Some(first), Some(last)) = (parser_tokens.first(), parser_tokens.last()) {
577 let start = first.start;
578 let end = last.end;
579 self.token_cache.cache_tokens(start, end, parser_tokens);
580 }
581
582 let mut parser = Parser::new(&self.source);
586 parser.parse()
587 }
588
589 fn reparse_from_checkpoint_two_sided(
643 &mut self,
644 left_checkpoint: Option<LexerCheckpoint>,
645 right_checkpoint: Option<LexerCheckpoint>,
646 edit: &SimpleEdit,
647 ) -> ParseResult<Node> {
648 let relex_start = left_checkpoint.as_ref().map(|cp| cp.position).unwrap_or(0);
650 let relex_end =
651 right_checkpoint.as_ref().map(|cp| cp.position).unwrap_or(self.source.len());
652
653 let edit_end = edit.start + edit.new_text.len();
655 if edit.start >= relex_start {
656 self.stats.left_checkpoint_distance = edit.start - relex_start;
657 }
658 if relex_end >= edit_end {
659 self.stats.right_checkpoint_distance = relex_end - edit_end;
660 }
661
662 let mut parser_tokens: Vec<Token> = Vec::new();
663 let mut newly_lexed_parser_tokens: Vec<Token> = Vec::new();
664
665 let segments_before = self.token_cache.count_segments_with_tokens_before(relex_start);
667 self.stats.segments_reused_before += segments_before;
668 let cached_before = self.token_cache.get_tokens_before(relex_start);
669
670 if relex_start > 0 && cached_before.is_none() {
673 self.stats.cache_misses += 1;
674 return self.parse_with_checkpoints();
675 }
676
677 if let Some(cached) = cached_before {
678 self.stats.cache_hits += 1;
679 let reused_count = cached.len();
680 parser_tokens.extend(cached);
681 self.stats.tokens_reused += reused_count;
682 }
683
684 let mut lexer = PerlLexer::new(&self.source);
687 if let Some(ref cp) = left_checkpoint {
688 lexer.restore(cp);
689 }
690
691 let mut raw_relexed: Vec<perl_lexer::Token> = Vec::new();
692 let mut bytes_relexed_this_phase = 0usize;
693
694 loop {
695 match lexer.next_token() {
696 Some(token) if matches!(token.token_type, perl_lexer::TokenType::EOF) => break,
697 Some(token) => {
698 let token_end = token.end;
699 let token_start = token.start;
700 if token_start >= relex_end {
701 break;
702 }
703 raw_relexed.push(token);
704 self.stats.tokens_relexed += 1;
705 bytes_relexed_this_phase += token_end - token_start;
706 if token_end >= relex_end {
707 break;
708 }
709 }
710 None => break,
711 }
712 }
713 self.stats.bytes_relexed += bytes_relexed_this_phase;
714
715 let converted = TokenStream::lexer_tokens_to_parser_tokens(raw_relexed);
716 newly_lexed_parser_tokens.extend(converted.iter().cloned());
717 parser_tokens.extend(converted);
718
719 let byte_shift: isize = edit.new_text.len() as isize - (edit.end - edit.start) as isize;
721
722 if right_checkpoint.is_some() {
723 let segments_after = self.token_cache.count_segments_with_tokens_after(relex_end);
724 self.stats.segments_reused_after += segments_after;
725
726 if let Some(cached) = self.token_cache.get_tokens_from(relex_end) {
727 self.stats.cache_hits += 1;
728 for token in cached {
729 let adjusted = Token {
731 kind: token.kind,
732 text: token.text.clone(),
733 start: (token.start as isize + byte_shift) as usize,
734 end: (token.end as isize + byte_shift) as usize,
735 };
736 parser_tokens.push(adjusted);
737 self.stats.tokens_reused += 1;
738 }
739 } else {
740 self.stats.cache_misses += 1;
741 self.stats.full_tail_fallbacks += 1;
742 let mut raw_tail: Vec<perl_lexer::Token> = Vec::new();
744 let mut tail_bytes = 0usize;
745 while let Some(token) = lexer.next_token() {
746 if matches!(token.token_type, perl_lexer::TokenType::EOF) {
747 break;
748 }
749 tail_bytes += token.end - token.start;
750 raw_tail.push(token);
751 self.stats.tokens_relexed += 1;
752 }
753 self.stats.tail_fallback_bytes += tail_bytes;
754 let tail_converted = TokenStream::lexer_tokens_to_parser_tokens(raw_tail);
755 newly_lexed_parser_tokens.extend(tail_converted.iter().cloned());
756 parser_tokens.extend(tail_converted);
757 }
758 }
759
760 if let (Some(first), Some(last)) =
763 (newly_lexed_parser_tokens.first(), newly_lexed_parser_tokens.last())
764 {
765 let start = first.start;
766 let end = last.end;
767 self.token_cache.cache_tokens(start, end, newly_lexed_parser_tokens);
768 }
769
770 let mut parser = Parser::from_tokens(parser_tokens, &self.source);
772 let tree = parser.parse()?;
773 self.tree = Some(tree.clone());
774
775 Ok(tree)
776 }
777
778 pub fn stats(&self) -> &IncrementalStats {
780 &self.stats
781 }
782
783 pub fn clear_caches(&mut self) {
785 self.checkpoint_cache.clear();
786 self.token_cache = TokenCache::new();
787 }
788}
789
790#[cfg(test)]
791mod tests {
792 use super::*;
793 use perl_parser_core::NodeKind;
794 use perl_parser_core::token_stream::TokenKind;
795 use perl_tdd_support::{must, must_some};
796
797 #[test]
798 fn test_checkpoint_incremental_parsing() {
799 let mut parser = CheckpointedIncrementalParser::new();
800
801 let source = "my $x = 42;\nmy $y = 99;\n".to_string();
803 let tree1 = must(parser.parse(source));
804
805 let edit = SimpleEdit { start: 8, end: 10, new_text: "4242".to_string() };
807
808 let tree2 = must(parser.apply_edit(&edit));
809
810 let stats = parser.stats();
812 assert_eq!(stats.total_parses, 2);
813 assert_eq!(stats.incremental_parses, 1);
814 assert!(stats.checkpoints_used > 0 || stats.tokens_relexed > 0);
815
816 if let (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) =
818 (&tree1.kind, &tree2.kind)
819 {
820 assert_eq!(s1.len(), s2.len());
821 } else {
822 unreachable!("Expected program nodes");
823 }
824 }
825
826 #[test]
827 fn test_checkpoint_cache_update() {
828 let mut parser = CheckpointedIncrementalParser::new();
829
830 let mut expected_source = "my $x = 1;\n".repeat(20);
832 must(parser.parse(expected_source.clone()));
833
834 let edit1 = SimpleEdit { start: 8, end: 9, new_text: "42".to_string() };
836 must(parser.apply_edit(&edit1));
837 expected_source.replace_range(edit1.start..edit1.end, &edit1.new_text);
838 let checkpoints_after_first = parser.stats().checkpoints_used;
839 let cache_events_after_first = parser.stats().cache_hits + parser.stats().cache_misses;
840
841 let edit2 = SimpleEdit { start: 20, end: 21, new_text: "99".to_string() };
842 let incremental_tree = must(parser.apply_edit(&edit2));
843 expected_source.replace_range(edit2.start..edit2.end, &edit2.new_text);
844
845 let stats = parser.stats();
846 assert_eq!(stats.incremental_parses, 2);
847 assert!(
848 stats.checkpoints_used > checkpoints_after_first,
849 "expected second edit to exercise checkpoint bookkeeping, got {stats:?}"
850 );
851 assert!(
852 stats.cache_hits + stats.cache_misses > cache_events_after_first,
853 "expected second edit to consult cache bookkeeping, got {stats:?}"
854 );
855
856 let mut full = CheckpointedIncrementalParser::new();
857 let full_tree = must(full.parse(expected_source));
858 assert_eq!(
859 format!("{incremental_tree:?}"),
860 format!("{full_tree:?}"),
861 "incremental tree diverged from fresh full parse"
862 );
863 }
864
865 #[test]
866 fn test_checkpointed_reparse_tracks_cache_or_fallback_path() {
867 let mut parser = CheckpointedIncrementalParser::new();
872
873 let source = format!("my $preamble = {};\n", "1".repeat(5));
875 must(parser.parse(source.clone()));
876
877 let edit_start = source.find('=').unwrap_or(13) + 2; let edit_end = edit_start + 5; let edit = SimpleEdit { start: edit_start, end: edit_end, new_text: "99999".to_string() };
881
882 let checkpoints_before = parser.stats().checkpoints_used;
883 let cache_events_before = parser.stats().cache_hits + parser.stats().cache_misses;
884
885 let incremental_tree = must(parser.apply_edit(&edit));
886 let mut expected_source = source;
887 expected_source.replace_range(edit.start..edit.end, &edit.new_text);
888
889 let stats = parser.stats();
890 assert_eq!(stats.incremental_parses, 1);
891 assert!(
892 stats.checkpoints_used > checkpoints_before,
893 "expected checkpoint bookkeeping from incremental reparse, got {stats:?}"
894 );
895 assert!(
896 stats.cache_hits + stats.cache_misses > cache_events_before,
897 "expected cache bookkeeping from incremental reparse or conservative fallback, got {stats:?}"
898 );
899
900 let mut full = CheckpointedIncrementalParser::new();
901 let full_tree = must(full.parse(expected_source));
902 assert_eq!(
903 format!("{incremental_tree:?}"),
904 format!("{full_tree:?}"),
905 "incremental tree diverged from fresh full parse"
906 );
907 }
908
909 #[test]
910 fn test_full_fallback_rebuilds_checkpoint_cache() {
911 let source = "my $value = 1;\n".repeat(80);
912 let edit = SimpleEdit { start: 125, end: 126, new_text: "999".to_string() };
913
914 let mut edited_source = source.clone();
915 edited_source.replace_range(edit.start..edit.end, &edit.new_text);
916
917 let mut incremental = CheckpointedIncrementalParser::new();
918 must(incremental.parse(source));
919 must(incremental.apply_edit(&edit));
920
921 let mut full = CheckpointedIncrementalParser::new();
922 must(full.parse(edited_source.clone()));
923
924 for query in (0..=edited_source.len()).step_by(17) {
925 let incremental_before =
926 incremental.checkpoint_cache.find_before(query).map(|cp| cp.position);
927 let full_before = full.checkpoint_cache.find_before(query).map(|cp| cp.position);
928 assert_eq!(incremental_before, full_before, "mismatched left checkpoint at {query}");
929
930 let incremental_after =
931 incremental.checkpoint_cache.find_after(query).map(|cp| cp.position);
932 let full_after = full.checkpoint_cache.find_after(query).map(|cp| cp.position);
933 assert_eq!(incremental_after, full_after, "mismatched right checkpoint at {query}");
934 }
935 }
936
937 #[test]
938 fn test_invalidate_range_splits_segment_and_preserves_non_overlapping_tokens() {
939 let mut cache = TokenCache::new();
940 let tokens = vec![
941 Token::new(TokenKind::Identifier, "a", 0, 10),
942 Token::new(TokenKind::Identifier, "b", 10, 20),
943 Token::new(TokenKind::Identifier, "c", 20, 30),
944 Token::new(TokenKind::Identifier, "d", 30, 40),
945 ];
946 cache.cache_tokens(0, 40, tokens);
947
948 cache.invalidate_range(15, 25);
949
950 assert_eq!(cache.segments.len(), 2, "overlap invalidation should split one segment");
951 assert_eq!(cache.segments[0].start, 0);
952 assert_eq!(cache.segments[0].end, 10);
953 assert_eq!(cache.segments[1].start, 30);
954 assert_eq!(cache.segments[1].end, 40);
955 }
956
957 #[test]
958 fn test_checkpoint_window_reuses_suffix_without_tail_fallback() {
959 let mut parser = CheckpointedIncrementalParser::new();
960 let source = "my $x = 1;\n".repeat(140);
961 must(parser.parse(source.clone()));
962
963 let edit = SimpleEdit { start: 545, end: 546, new_text: "777".to_string() };
964 let incremental_tree = must(parser.apply_edit(&edit));
965
966 let stats = parser.stats();
967 assert!(stats.segments_reused_before > 0, "expected prefix segment reuse, got {stats:?}");
968 assert_eq!(
969 stats.full_tail_fallbacks, 0,
970 "missing-right-checkpoint path should not be counted as tail fallback, got {stats:?}"
971 );
972 assert!(stats.bytes_relexed > 0, "expected bounded relex bytes, got {stats:?}");
973 assert!(
974 stats.bytes_relexed <= source.len(),
975 "relexed bytes should be bounded by source length, got {stats:?}"
976 );
977
978 let mut expected_source = source;
979 expected_source.replace_range(edit.start..edit.end, &edit.new_text);
980 let mut full = CheckpointedIncrementalParser::new();
981 let full_tree = must(full.parse(expected_source));
982 assert_eq!(
983 format!("{incremental_tree:?}"),
984 format!("{full_tree:?}"),
985 "incremental tree diverged from fresh full parse"
986 );
987 }
988
989 #[test]
990 fn test_invalidate_range_non_overlapping_preserves_all_segments() {
991 let mut cache = TokenCache::new();
993 let tokens = vec![
994 Token::new(TokenKind::Identifier, "a", 0, 10),
995 Token::new(TokenKind::Identifier, "b", 10, 20),
996 ];
997 cache.cache_tokens(0, 20, tokens);
998
999 cache.invalidate_range(30, 50);
1001
1002 assert_eq!(
1003 cache.segments.len(),
1004 1,
1005 "non-overlapping invalidation should leave segment intact"
1006 );
1007 assert_eq!(cache.segments[0].start, 0);
1008 assert_eq!(cache.segments[0].end, 20);
1009 assert_eq!(cache.segments[0].tokens.len(), 2);
1010 }
1011
1012 #[test]
1013 fn test_invalidate_range_entirely_inside_segment_drops_middle_tokens() {
1014 let mut cache = TokenCache::new();
1017 let tokens = vec![
1018 Token::new(TokenKind::Identifier, "a", 0, 5),
1019 Token::new(TokenKind::Identifier, "b", 5, 10),
1020 Token::new(TokenKind::Identifier, "c", 10, 15),
1021 Token::new(TokenKind::Identifier, "d", 15, 20),
1022 ];
1023 cache.cache_tokens(0, 20, tokens);
1024
1025 cache.invalidate_range(5, 15);
1027
1028 assert_eq!(cache.segments.len(), 2, "should produce prefix and suffix sub-segments");
1029
1030 assert_eq!(cache.segments[0].start, 0);
1032 assert_eq!(cache.segments[0].end, 5);
1033 assert_eq!(cache.segments[0].tokens.len(), 1);
1034
1035 assert_eq!(cache.segments[1].start, 15);
1037 assert_eq!(cache.segments[1].end, 20);
1038 assert_eq!(cache.segments[1].tokens.len(), 1);
1039 }
1040
1041 #[test]
1042 fn test_adjust_positions_shifts_segment_bounds_not_token_coords() {
1043 let mut cache = TokenCache::new();
1047 let tokens = vec![
1048 Token::new(TokenKind::Identifier, "x", 100, 110),
1049 Token::new(TokenKind::Identifier, "y", 110, 120),
1050 ];
1051 cache.cache_tokens(100, 120, tokens);
1052
1053 cache.adjust_positions(50, 0, 5); assert_eq!(cache.segments[0].start, 105, "segment start should shift by +5");
1058 assert_eq!(cache.segments[0].end, 125, "segment end should shift by +5");
1059
1060 assert_eq!(
1063 cache.segments[0].tokens[0].start, 100,
1064 "token start must NOT be shifted by adjust_positions"
1065 );
1066 assert_eq!(
1067 cache.segments[0].tokens[0].end, 110,
1068 "token end must NOT be shifted by adjust_positions"
1069 );
1070 assert_eq!(
1071 cache.segments[0].tokens[1].start, 110,
1072 "token start must NOT be shifted by adjust_positions"
1073 );
1074 }
1075
1076 #[test]
1077 fn test_apply_edit_rejects_out_of_bounds_range() {
1078 let mut parser = CheckpointedIncrementalParser::new();
1079 must(parser.parse("my $x = 1;\n".to_string()));
1080
1081 let edit = SimpleEdit { start: 0, end: 100, new_text: "2".to_string() };
1082 let result = parser.apply_edit(&edit);
1083
1084 assert!(result.is_err(), "out-of-bounds edit should return an error");
1085 assert!(matches!(result, Err(ParseError::SyntaxError { location: 100, .. })));
1086 }
1087
1088 #[test]
1089 fn test_apply_edit_rejects_non_char_boundary_start() {
1090 let mut parser = CheckpointedIncrementalParser::new();
1091 must(parser.parse("my $x = \"é\";\n".to_string()));
1092
1093 let source = parser.source.clone();
1094 let char_start = must_some(source.find('é'));
1095 let invalid_start = char_start + 1;
1096 let edit =
1097 SimpleEdit { start: invalid_start, end: invalid_start + 1, new_text: "e".to_string() };
1098 let result = parser.apply_edit(&edit);
1099
1100 assert!(result.is_err(), "non-char-boundary edit should return an error");
1101 assert!(matches!(
1102 result,
1103 Err(ParseError::SyntaxError {
1104 location,
1105 message,
1106 }) if location == invalid_start && message.contains("UTF-8 character boundary")
1107 ));
1108 }
1109
1110 #[test]
1111 fn test_apply_edit_rejects_non_char_boundary_end() {
1112 let mut parser = CheckpointedIncrementalParser::new();
1116 must(parser.parse("my $x = 1; # \u{1F389}\n".to_string()));
1118
1119 let source = parser.source.clone();
1120 let emoji_pos = must_some(source.find('\u{1F389}'));
1121 let valid_start = emoji_pos;
1123 let invalid_end = emoji_pos + 1;
1124 let edit = SimpleEdit { start: valid_start, end: invalid_end, new_text: "x".to_string() };
1125 let result = parser.apply_edit(&edit);
1126
1127 assert!(result.is_err(), "edit whose end splits a 4-byte codepoint should return an error");
1128 assert!(matches!(
1129 result,
1130 Err(ParseError::SyntaxError { location, .. }) if location == invalid_end
1131 ));
1132 }
1133
1134 #[test]
1135 fn test_apply_edit_accepts_full_source_replacement() {
1136 let mut parser = CheckpointedIncrementalParser::new();
1138 let original = "my $x = 1;\n".to_string();
1139 must(parser.parse(original.clone()));
1140
1141 let edit =
1142 SimpleEdit { start: 0, end: original.len(), new_text: "my $y = 2;\n".to_string() };
1143 let result = parser.apply_edit(&edit);
1144 assert!(result.is_ok(), "full-document replacement should succeed: {result:?}");
1145 }
1146
1147 #[test]
1148 fn test_apply_edit_accepts_empty_insert_at_end() {
1149 let mut parser = CheckpointedIncrementalParser::new();
1151 let original = "my $x = 1;\n".to_string();
1152 must(parser.parse(original.clone()));
1153
1154 let edit = SimpleEdit {
1155 start: original.len(),
1156 end: original.len(),
1157 new_text: "my $y = 2;\n".to_string(),
1158 };
1159 let result = parser.apply_edit(&edit);
1160 assert!(result.is_ok(), "insert-at-end edit should succeed: {result:?}");
1161 }
1162
1163 #[test]
1164 fn test_apply_edit_rejects_three_byte_bmp_boundary() {
1165 let mut parser = CheckpointedIncrementalParser::new();
1168 must(parser.parse("my $cost = 1; # \u{20AC}\n".to_string()));
1170
1171 let source = parser.source.clone();
1172 let euro_pos = must_some(source.find('\u{20AC}'));
1173 let invalid_start = euro_pos + 1; let edit =
1175 SimpleEdit { start: invalid_start, end: invalid_start + 1, new_text: "e".to_string() };
1176 let result = parser.apply_edit(&edit);
1177
1178 assert!(result.is_err(), "edit splitting a 3-byte BMP codepoint should return an error");
1179 assert!(matches!(
1180 result,
1181 Err(ParseError::SyntaxError { location, .. }) if location == invalid_start
1182 ));
1183 }
1184
1185 #[test]
1186 fn test_apply_edit_rejects_inverted_range() {
1187 let mut parser = CheckpointedIncrementalParser::new();
1189 must(parser.parse("my $x = 1;\n".to_string()));
1190
1191 let edit = SimpleEdit { start: 5, end: 2, new_text: "z".to_string() };
1192 let result = parser.apply_edit(&edit);
1193
1194 assert!(result.is_err(), "inverted range should return an error");
1195 assert!(matches!(result, Err(ParseError::SyntaxError { location: 5, .. })));
1196 }
1197
1198 #[test]
1199 fn test_apply_edit_accepts_insert_into_empty_source() {
1200 let mut parser = CheckpointedIncrementalParser::new();
1204
1205 let edit = SimpleEdit { start: 0, end: 0, new_text: "my $x = 1;\n".to_string() };
1206 let result = parser.apply_edit(&edit);
1207 assert!(result.is_ok(), "insert into empty source should succeed: {result:?}");
1208 }
1209
1210 #[test]
1211 fn test_apply_edit_rejects_nonzero_range_on_empty_source() {
1212 let mut parser = CheckpointedIncrementalParser::new();
1214
1215 let edit = SimpleEdit { start: 0, end: 1, new_text: "x".to_string() };
1216 let result = parser.apply_edit(&edit);
1217 assert!(result.is_err(), "end=1 on empty source should be rejected");
1218 assert!(matches!(result, Err(ParseError::SyntaxError { location: 1, .. })));
1219 }
1220}