1use super::incremental_edit::{IncrementalEdit, IncrementalEditSet};
9use perl_parser_core::{
10 ast::{Node, NodeKind},
11 error::ParseResult,
12 parser::Parser,
13};
14use std::collections::{HashMap, VecDeque};
15use std::sync::Arc;
16use std::time::Instant;
17use tracing::debug;
18
19#[derive(Debug, Clone)]
21pub struct IncrementalDocument {
22 pub root: Arc<Node>,
24 pub source: String,
26 pub version: u64,
28 pub subtree_cache: SubtreeCache,
30 pub metrics: ParseMetrics,
32}
33
34#[derive(Debug, Clone, Default)]
36pub struct SubtreeCache {
37 pub by_content: HashMap<u64, Arc<Node>>,
39 pub by_range: HashMap<(usize, usize), Arc<Node>>,
41 pub lru: VecDeque<u64>,
43 pub critical_symbols: HashMap<u64, SymbolPriority>,
45 pub max_size: usize,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
51pub enum SymbolPriority {
52 Low = 0,
53 Medium = 1,
54 High = 2,
55 Critical = 3,
56}
57
58#[derive(Debug, Clone, Default)]
60pub struct ParseMetrics {
61 pub last_parse_time_ms: f64,
62 pub nodes_reused: usize,
63 pub nodes_reparsed: usize,
64 pub cache_hits: usize,
65 pub cache_misses: usize,
66}
67
68impl IncrementalDocument {
69 pub fn new(source: String) -> ParseResult<Self> {
71 let start = Instant::now();
72 let mut parser = Parser::new(&source);
73 let root = parser.parse()?;
74
75 let mut doc = IncrementalDocument {
76 root: Arc::new(root),
77 source,
78 version: 0,
79 subtree_cache: SubtreeCache::new(1000),
80 metrics: ParseMetrics::default(),
81 };
82
83 doc.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
84 doc.cache_subtrees();
85
86 Ok(doc)
87 }
88
89 pub fn apply_edit(&mut self, edit: IncrementalEdit) -> ParseResult<()> {
91 let start = Instant::now();
92 self.version += 1;
93
94 self.metrics = ParseMetrics::default();
96
97 let new_source = self.apply_edit_to_source(&edit);
99
100 let affected_range = (edit.start_byte, edit.old_end_byte);
102 let reusable_subtrees = self.find_reusable_subtrees(affected_range, &edit);
103
104 let (new_root, used_fast_path) =
106 self.incremental_parse(&new_source, &edit, reusable_subtrees)?;
107
108 self.source = new_source;
110 self.root = Arc::new(new_root);
111
112 if !used_fast_path {
117 self.cache_subtrees();
118 }
119
120 self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
121
122 Ok(())
123 }
124
125 pub fn apply_edits(&mut self, edits: &IncrementalEditSet) -> ParseResult<()> {
127 let start = Instant::now();
128 self.version += 1;
129
130 self.metrics = ParseMetrics::default();
132
133 let Some(sorted_edits) = edits.normalize_for_source(&self.source) else {
134 return self.fallback_parse_batch(edits, start);
135 };
136
137 let mut new_source = self.source.clone();
141 for edit in &sorted_edits {
142 if !self.apply_edit_in_place(&mut new_source, edit) {
143 return self.fallback_parse_batch(edits, start);
144 }
145 }
146
147 let affected_ranges: Vec<_> =
149 sorted_edits.iter().map(|e| (e.start_byte, e.old_end_byte)).collect();
150
151 let reusable = self.find_reusable_for_ranges(&affected_ranges);
153
154 let new_root = if !reusable.is_empty() {
158 let reused_root = self.parse_with_reuse(&new_source, reusable)?;
159 let mut parser = Parser::new(&new_source);
162 let fresh_root = parser.parse()?;
163 if Self::nodes_match(&reused_root, &fresh_root) {
164 reused_root
165 } else {
166 debug!("Batch incremental reuse diverged from fresh parse; using fallback result");
167 fresh_root
168 }
169 } else {
170 let mut parser = Parser::new(&new_source);
171 parser.parse()?
172 };
173
174 self.source = new_source;
176 self.root = Arc::new(new_root);
177 self.cache_subtrees();
178
179 self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
180
181 Ok(())
182 }
183
184 fn fallback_parse_batch(
185 &mut self,
186 edits: &IncrementalEditSet,
187 start: Instant,
188 ) -> ParseResult<()> {
189 let new_source = edits.apply_to_string(&self.source);
190 let mut parser = Parser::new(&new_source);
191 let new_root = parser.parse()?;
192 self.source = new_source;
193 self.root = Arc::new(new_root);
194 self.cache_subtrees();
195 self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
196 Ok(())
197 }
198
199 fn apply_edit_to_source(&self, edit: &IncrementalEdit) -> String {
201 self.apply_edit_to_string(&self.source, edit)
202 }
203
204 fn apply_edit_to_string(&self, source: &str, edit: &IncrementalEdit) -> String {
205 let Some((start, end)) = Self::map_edit_range(source, edit) else {
206 debug!(
207 "Invalid edit mapping: start={}, end={}, source_len={}",
208 edit.start_byte,
209 edit.old_end_byte,
210 source.len()
211 );
212 return source.to_string();
213 };
214
215 let mut result = String::with_capacity(source.len() + edit.new_text.len());
216 result.push_str(&source[..start]);
217 result.push_str(&edit.new_text);
218 result.push_str(&source[end..]);
219 result
220 }
221
222 fn apply_edit_in_place(&self, source: &mut String, edit: &IncrementalEdit) -> bool {
223 let Some((start, end)) = Self::map_edit_range(source, edit) else {
224 debug!(
225 "Invalid edit mapping: start={}, end={}, source_len={}",
226 edit.start_byte,
227 edit.old_end_byte,
228 source.len()
229 );
230 return false;
231 };
232
233 source.replace_range(start..end, &edit.new_text);
234 true
235 }
236
237 fn map_edit_range(source: &str, edit: &IncrementalEdit) -> Option<(usize, usize)> {
238 if edit.start_byte > edit.old_end_byte {
239 return None;
240 }
241 if edit.old_end_byte > source.len() {
242 return None;
243 }
244 if !source.is_char_boundary(edit.start_byte) || !source.is_char_boundary(edit.old_end_byte)
245 {
246 return None;
247 }
248 Some((edit.start_byte, edit.old_end_byte))
249 }
250
251 fn nodes_match(left: &Node, right: &Node) -> bool {
252 left == right
253 }
254
255 fn find_reusable_subtrees(
257 &mut self,
258 affected_range: (usize, usize),
259 edit: &IncrementalEdit,
260 ) -> Vec<Arc<Node>> {
261 let mut reusable = Vec::new();
262 let delta = edit.byte_shift();
263
264 for ((start, end), node) in &self.subtree_cache.by_range {
266 if *end <= affected_range.0 {
267 reusable.push(node.clone());
269 self.metrics.cache_hits += 1;
270 self.metrics.nodes_reused += self.count_nodes(node);
271 } else if *start >= affected_range.1 {
272 if let Some(adjusted) = self.adjust_node_position(node, delta) {
274 reusable.push(Arc::new(adjusted));
275 self.metrics.cache_hits += 1;
276 self.metrics.nodes_reused += self.count_nodes(node);
277 }
278 } else {
279 self.metrics.cache_misses += 1;
280 }
281 }
282
283 reusable
284 }
285
286 fn find_reusable_for_ranges(&mut self, ranges: &[(usize, usize)]) -> Vec<Arc<Node>> {
288 let mut reusable = Vec::new();
289
290 for ((start, end), node) in &self.subtree_cache.by_range {
291 let affected = ranges.iter().any(|(r_start, r_end)| {
292 *start < *r_end && *end > *r_start
294 });
295
296 if !affected {
297 reusable.push(node.clone());
298 self.metrics.cache_hits += 1;
299 self.metrics.nodes_reused += self.count_nodes(node);
300 } else {
301 self.metrics.cache_misses += 1;
302 }
303 }
304
305 reusable
306 }
307
308 fn incremental_parse(
314 &mut self,
315 source: &str,
316 edit: &IncrementalEdit,
317 _reusable: Vec<Arc<Node>>,
318 ) -> ParseResult<(Node, bool)> {
319 if self.is_single_token_edit(edit) {
321 if let Some(node) = self.fast_token_update(source, edit) {
322 self.metrics.nodes_reparsed = 1;
323 return Ok((node, true));
324 }
325 }
326
327 let node = self.parse_with_reuse(source, _reusable)?;
329 Ok((node, false))
330 }
331
332 fn is_single_token_edit(&self, edit: &IncrementalEdit) -> bool {
334 if edit.old_end_byte - edit.start_byte > 100 {
336 return false; }
338
339 if let Some(node) = self.find_node_at_position(edit.start_byte) {
341 matches!(
342 node.kind,
343 NodeKind::Number { .. } | NodeKind::String { .. } | NodeKind::Identifier { .. }
344 )
345 } else {
346 false
347 }
348 }
349
350 fn fast_token_update(&self, source: &str, edit: &IncrementalEdit) -> Option<Node> {
352 let mut new_root = (*self.root).clone();
354
355 if self.update_token_in_tree(&mut new_root, source, edit) { Some(new_root) } else { None }
357 }
358
359 fn update_token_in_tree(&self, node: &mut Node, source: &str, edit: &IncrementalEdit) -> bool {
361 if node.location.start <= edit.start_byte && node.location.end >= edit.old_end_byte {
363 match &mut node.kind {
364 NodeKind::Number { .. } => {
365 let delta = edit.byte_shift();
367 let new_end = (node.location.end as isize + delta).max(0) as usize;
368
369 if new_end <= source.len()
371 && source.is_char_boundary(node.location.start)
372 && source.is_char_boundary(new_end)
373 {
374 node.location.end = new_end;
375 let new_text = &source[node.location.start..node.location.end];
376 if let Ok(value) = new_text.parse::<f64>() {
377 node.kind = NodeKind::Number { value: value.to_string() };
378 return true;
379 }
380 }
381 }
382 NodeKind::String { value, .. } => {
383 let delta = edit.byte_shift();
385 let new_end = (node.location.end as isize + delta).max(0) as usize;
386
387 if new_end <= source.len()
389 && source.is_char_boundary(node.location.start)
390 && source.is_char_boundary(new_end)
391 {
392 node.location.end = new_end;
393 let new_text = &source[node.location.start..node.location.end];
394 *value = new_text.to_string();
395 return true;
396 }
397 }
398 NodeKind::Identifier { name } => {
399 let delta = edit.byte_shift();
401 let new_end = (node.location.end as isize + delta).max(0) as usize;
402
403 if new_end <= source.len()
405 && source.is_char_boundary(node.location.start)
406 && source.is_char_boundary(new_end)
407 {
408 node.location.end = new_end;
409 *name = source[node.location.start..node.location.end].to_string();
410 return true;
411 }
412 }
413 _ => {
414 return self.update_token_in_children(node, source, edit);
416 }
417 }
418 }
419
420 false
421 }
422
423 fn update_token_in_children(
425 &self,
426 node: &mut Node,
427 source: &str,
428 edit: &IncrementalEdit,
429 ) -> bool {
430 match &mut node.kind {
431 NodeKind::Program { statements } | NodeKind::Block { statements } => {
432 for stmt in statements {
433 if self.update_token_in_tree(stmt, source, edit) {
434 return true;
435 }
436 }
437 }
438 NodeKind::Binary { left, right, .. }
439 | NodeKind::Assignment { lhs: left, rhs: right, .. } => {
440 if self.update_token_in_tree(left, source, edit) {
441 return true;
442 }
443 if self.update_token_in_tree(right, source, edit) {
444 return true;
445 }
446 }
447 NodeKind::Subroutine { body, .. } => {
448 if self.update_token_in_tree(body, source, edit) {
449 return true;
450 }
451 }
452 NodeKind::ExpressionStatement { expression } => {
453 if self.update_token_in_tree(expression, source, edit) {
454 return true;
455 }
456 }
457 NodeKind::VariableDeclaration { variable, initializer, .. } => {
458 if self.update_token_in_tree(variable, source, edit) {
459 return true;
460 }
461 if let Some(init) = initializer {
462 if self.update_token_in_tree(init, source, edit) {
463 return true;
464 }
465 }
466 }
467 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
468 if self.update_token_in_tree(condition, source, edit) {
469 return true;
470 }
471 if self.update_token_in_tree(then_branch, source, edit) {
472 return true;
473 }
474 for (cond, branch) in elsif_branches {
475 if self.update_token_in_tree(cond, source, edit) {
476 return true;
477 }
478 if self.update_token_in_tree(branch, source, edit) {
479 return true;
480 }
481 }
482 if let Some(else_b) = else_branch {
483 if self.update_token_in_tree(else_b, source, edit) {
484 return true;
485 }
486 }
487 }
488 NodeKind::While { condition, body, .. } => {
489 if self.update_token_in_tree(condition, source, edit) {
490 return true;
491 }
492 if self.update_token_in_tree(body, source, edit) {
493 return true;
494 }
495 }
496 NodeKind::FunctionCall { args, .. } => {
497 for arg in args {
498 if self.update_token_in_tree(arg, source, edit) {
499 return true;
500 }
501 }
502 }
503 NodeKind::Unary { operand, .. } => {
504 if self.update_token_in_tree(operand, source, edit) {
505 return true;
506 }
507 }
508 _ => {}
509 }
510
511 false
512 }
513
514 fn parse_with_reuse(&mut self, source: &str, reusable: Vec<Arc<Node>>) -> ParseResult<Node> {
516 let mut parser = Parser::new(source);
518 let mut root = parser.parse()?;
519
520 for node in reusable {
522 self.insert_reusable(&mut root, &node);
523 }
524
525 self.metrics.nodes_reparsed =
527 self.count_nodes(&root).saturating_sub(self.metrics.nodes_reused);
528
529 Ok(root)
530 }
531
532 fn insert_reusable(&self, target: &mut Node, reusable: &Arc<Node>) -> bool {
534 if target.location.start == reusable.location.start
535 && target.location.end == reusable.location.end
536 && std::mem::discriminant(&target.kind) == std::mem::discriminant(&reusable.kind)
537 {
538 *target = (**reusable).clone();
539 return true;
540 }
541
542 match &mut target.kind {
543 NodeKind::Program { statements } | NodeKind::Block { statements } => {
544 for stmt in statements {
545 if self.insert_reusable(stmt, reusable) {
546 return true;
547 }
548 }
549 }
550 NodeKind::Binary { left, right, .. } => {
551 if self.insert_reusable(left, reusable) {
552 return true;
553 }
554 if self.insert_reusable(right, reusable) {
555 return true;
556 }
557 }
558 NodeKind::Subroutine { body, .. } => {
559 if self.insert_reusable(body, reusable) {
560 return true;
561 }
562 }
563 NodeKind::ExpressionStatement { expression } => {
564 if self.insert_reusable(expression, reusable) {
565 return true;
566 }
567 }
568 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
569 if self.insert_reusable(condition, reusable) {
570 return true;
571 }
572 if self.insert_reusable(then_branch, reusable) {
573 return true;
574 }
575 for (cond, branch) in elsif_branches {
576 if self.insert_reusable(cond, reusable) {
577 return true;
578 }
579 if self.insert_reusable(branch, reusable) {
580 return true;
581 }
582 }
583 if let Some(else_b) = else_branch {
584 if self.insert_reusable(else_b, reusable) {
585 return true;
586 }
587 }
588 }
589 NodeKind::While { condition, body, .. } => {
590 if self.insert_reusable(condition, reusable) {
591 return true;
592 }
593 if self.insert_reusable(body, reusable) {
594 return true;
595 }
596 }
597 NodeKind::For { init, condition, update, body, .. } => {
598 if let Some(i) = init {
599 if self.insert_reusable(i, reusable) {
600 return true;
601 }
602 }
603 if let Some(c) = condition {
604 if self.insert_reusable(c, reusable) {
605 return true;
606 }
607 }
608 if let Some(u) = update {
609 if self.insert_reusable(u, reusable) {
610 return true;
611 }
612 }
613 if self.insert_reusable(body, reusable) {
614 return true;
615 }
616 }
617 NodeKind::VariableDeclaration { variable, initializer, .. } => {
618 if self.insert_reusable(variable, reusable) {
619 return true;
620 }
621 if let Some(init) = initializer {
622 if self.insert_reusable(init, reusable) {
623 return true;
624 }
625 }
626 }
627 NodeKind::Assignment { lhs, rhs, .. } => {
628 if self.insert_reusable(lhs, reusable) {
629 return true;
630 }
631 if self.insert_reusable(rhs, reusable) {
632 return true;
633 }
634 }
635 _ => {}
636 }
637
638 false
639 }
640
641 fn adjust_node_position(&self, node: &Node, delta: isize) -> Option<Node> {
647 let mut adjusted = node.clone();
648
649 let new_start = adjusted.location.start as isize + delta;
651 let new_end = adjusted.location.end as isize + delta;
652 if new_start < 0 || new_end < 0 {
653 return None;
654 }
655 adjusted.location.start = new_start as usize;
656 adjusted.location.end = new_end as usize;
657
658 match &mut adjusted.kind {
660 NodeKind::Program { statements } | NodeKind::Block { statements } => {
661 for stmt in statements {
662 *stmt = self.adjust_node_position(stmt, delta)?;
663 }
664 }
665 NodeKind::Binary { left, right, .. } => {
666 **left = self.adjust_node_position(left, delta)?;
667 **right = self.adjust_node_position(right, delta)?;
668 }
669 NodeKind::Subroutine { body, .. } => {
670 **body = self.adjust_node_position(body, delta)?;
671 }
672 NodeKind::ExpressionStatement { expression } => {
673 **expression = self.adjust_node_position(expression, delta)?;
674 }
675 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
676 **condition = self.adjust_node_position(condition, delta)?;
677 **then_branch = self.adjust_node_position(then_branch, delta)?;
678 for (cond, branch) in elsif_branches {
679 **cond = self.adjust_node_position(cond, delta)?;
680 **branch = self.adjust_node_position(branch, delta)?;
681 }
682 if let Some(else_b) = else_branch {
683 **else_b = self.adjust_node_position(else_b, delta)?;
684 }
685 }
686 NodeKind::While { condition, body, .. } => {
687 **condition = self.adjust_node_position(condition, delta)?;
688 **body = self.adjust_node_position(body, delta)?;
689 }
690 NodeKind::For { init, condition, update, body, .. } => {
691 if let Some(i) = init {
692 **i = self.adjust_node_position(i, delta)?;
693 }
694 if let Some(c) = condition {
695 **c = self.adjust_node_position(c, delta)?;
696 }
697 if let Some(u) = update {
698 **u = self.adjust_node_position(u, delta)?;
699 }
700 **body = self.adjust_node_position(body, delta)?;
701 }
702 NodeKind::VariableDeclaration { variable, initializer, .. } => {
703 **variable = self.adjust_node_position(variable, delta)?;
704 if let Some(init) = initializer {
705 **init = self.adjust_node_position(init, delta)?;
706 }
707 }
708 NodeKind::Assignment { lhs, rhs, .. } => {
709 **lhs = self.adjust_node_position(lhs, delta)?;
710 **rhs = self.adjust_node_position(rhs, delta)?;
711 }
712 _ => {}
713 }
714
715 Some(adjusted)
716 }
717
718 fn find_node_at_position(&self, pos: usize) -> Option<&Node> {
720 self.find_in_node(&self.root, pos)
721 }
722
723 fn find_in_node<'a>(&self, node: &'a Node, pos: usize) -> Option<&'a Node> {
724 if node.location.start <= pos && node.location.end > pos {
725 match &node.kind {
727 NodeKind::Program { statements } | NodeKind::Block { statements } => {
728 for stmt in statements {
729 if let Some(found) = self.find_in_node(stmt, pos) {
730 return Some(found);
731 }
732 }
733 }
734 NodeKind::Binary { left, right, .. }
735 | NodeKind::Assignment { lhs: left, rhs: right, .. } => {
736 if let Some(found) = self.find_in_node(left, pos) {
737 return Some(found);
738 }
739 if let Some(found) = self.find_in_node(right, pos) {
740 return Some(found);
741 }
742 }
743 NodeKind::Subroutine { body, .. } => {
744 if let Some(found) = self.find_in_node(body, pos) {
745 return Some(found);
746 }
747 }
748 NodeKind::ExpressionStatement { expression } => {
749 if let Some(found) = self.find_in_node(expression, pos) {
750 return Some(found);
751 }
752 }
753 NodeKind::VariableDeclaration { variable, initializer, .. } => {
754 if let Some(found) = self.find_in_node(variable, pos) {
755 return Some(found);
756 }
757 if let Some(init) = initializer {
758 if let Some(found) = self.find_in_node(init, pos) {
759 return Some(found);
760 }
761 }
762 }
763 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
764 if let Some(found) = self.find_in_node(condition, pos) {
765 return Some(found);
766 }
767 if let Some(found) = self.find_in_node(then_branch, pos) {
768 return Some(found);
769 }
770 for (cond, branch) in elsif_branches {
771 if let Some(found) = self.find_in_node(cond, pos) {
772 return Some(found);
773 }
774 if let Some(found) = self.find_in_node(branch, pos) {
775 return Some(found);
776 }
777 }
778 if let Some(else_b) = else_branch {
779 if let Some(found) = self.find_in_node(else_b, pos) {
780 return Some(found);
781 }
782 }
783 }
784 NodeKind::While { condition, body, .. } => {
785 if let Some(found) = self.find_in_node(condition, pos) {
786 return Some(found);
787 }
788 if let Some(found) = self.find_in_node(body, pos) {
789 return Some(found);
790 }
791 }
792 NodeKind::FunctionCall { args, .. } => {
793 for arg in args {
794 if let Some(found) = self.find_in_node(arg, pos) {
795 return Some(found);
796 }
797 }
798 }
799 NodeKind::Unary { operand, .. } => {
800 if let Some(found) = self.find_in_node(operand, pos) {
801 return Some(found);
802 }
803 }
804 _ => {}
805 }
806
807 Some(node)
809 } else {
810 None
811 }
812 }
813
814 fn cache_subtrees(&mut self) {
816 self.subtree_cache.clear();
817 let root = self.root.clone();
818 self.cache_node(&root);
819 }
820
821 fn cache_node(&mut self, node: &Node) {
822 let range = (node.location.start, node.location.end);
824 self.subtree_cache.by_range.insert(range, Arc::new(node.clone()));
825
826 let hash = self.hash_node(node);
828 let priority = self.get_symbol_priority(node);
829
830 self.subtree_cache.by_content.insert(hash, Arc::new(node.clone()));
831 self.subtree_cache.critical_symbols.insert(hash, priority);
832 self.subtree_cache.lru.push_back(hash);
833 self.subtree_cache.evict_if_needed();
834
835 match &node.kind {
837 NodeKind::Program { statements } | NodeKind::Block { statements } => {
838 for stmt in statements {
839 self.cache_node(stmt);
840 }
841 }
842 NodeKind::Binary { left, right, .. } => {
843 self.cache_node(left);
844 self.cache_node(right);
845 }
846 NodeKind::Subroutine { body, .. } => {
847 self.cache_node(body);
848 }
849 NodeKind::ExpressionStatement { expression } => {
850 self.cache_node(expression);
851 }
852 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
853 self.cache_node(condition);
854 self.cache_node(then_branch);
855 for (cond, branch) in elsif_branches {
856 self.cache_node(cond);
857 self.cache_node(branch);
858 }
859 if let Some(else_b) = else_branch {
860 self.cache_node(else_b);
861 }
862 }
863 NodeKind::While { condition, body, .. } => {
864 self.cache_node(condition);
865 self.cache_node(body);
866 }
867 NodeKind::For { init, condition, update, body, .. } => {
868 if let Some(i) = init {
869 self.cache_node(i);
870 }
871 if let Some(c) = condition {
872 self.cache_node(c);
873 }
874 if let Some(u) = update {
875 self.cache_node(u);
876 }
877 self.cache_node(body);
878 }
879 NodeKind::Foreach { variable, list, body, continue_block } => {
880 self.cache_node(variable);
881 self.cache_node(list);
882 self.cache_node(body);
883 if let Some(cb) = continue_block {
884 self.cache_node(cb);
885 }
886 }
887 NodeKind::VariableDeclaration { variable, initializer, .. } => {
888 self.cache_node(variable);
889 if let Some(init) = initializer {
890 self.cache_node(init);
891 }
892 }
893 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
894 for var in variables {
895 self.cache_node(var);
896 }
897 if let Some(init) = initializer {
898 self.cache_node(init);
899 }
900 }
901 NodeKind::Assignment { lhs, rhs, .. } => {
902 self.cache_node(lhs);
903 self.cache_node(rhs);
904 }
905 _ => {}
906 }
907 }
908
909 fn hash_node(&self, node: &Node) -> u64 {
911 use std::collections::hash_map::DefaultHasher;
912 use std::hash::{Hash, Hasher};
913
914 let mut hasher = DefaultHasher::new();
915
916 std::mem::discriminant(&node.kind).hash(&mut hasher);
918
919 match &node.kind {
921 NodeKind::Number { value } => value.hash(&mut hasher),
922 NodeKind::String { value, .. } => value.hash(&mut hasher),
923 NodeKind::Identifier { name } => name.hash(&mut hasher),
924 _ => {}
925 }
926
927 hasher.finish()
928 }
929
930 fn count_nodes(&self, node: &Node) -> usize {
932 let mut count = 1;
933
934 match &node.kind {
935 NodeKind::Program { statements } | NodeKind::Block { statements } => {
936 for stmt in statements {
937 count += self.count_nodes(stmt);
938 }
939 }
940 NodeKind::Binary { left, right, .. } => {
941 count += self.count_nodes(left);
942 count += self.count_nodes(right);
943 }
944 NodeKind::Subroutine { body, .. } => {
945 count += self.count_nodes(body);
946 }
947 NodeKind::ExpressionStatement { expression } => {
948 count += self.count_nodes(expression);
949 }
950 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
951 count += self.count_nodes(condition);
952 count += self.count_nodes(then_branch);
953 for (cond, branch) in elsif_branches {
954 count += self.count_nodes(cond);
955 count += self.count_nodes(branch);
956 }
957 if let Some(else_b) = else_branch {
958 count += self.count_nodes(else_b);
959 }
960 }
961 NodeKind::While { condition, body, .. } => {
962 count += self.count_nodes(condition);
963 count += self.count_nodes(body);
964 }
965 NodeKind::For { init, condition, update, body, .. } => {
966 if let Some(i) = init {
967 count += self.count_nodes(i);
968 }
969 if let Some(c) = condition {
970 count += self.count_nodes(c);
971 }
972 if let Some(u) = update {
973 count += self.count_nodes(u);
974 }
975 count += self.count_nodes(body);
976 }
977 NodeKind::Foreach { variable, list, body, continue_block } => {
978 count += self.count_nodes(variable);
979 count += self.count_nodes(list);
980 count += self.count_nodes(body);
981 if let Some(cb) = continue_block {
982 count += self.count_nodes(cb);
983 }
984 }
985 NodeKind::VariableDeclaration { variable, initializer, .. } => {
986 count += self.count_nodes(variable);
987 if let Some(init) = initializer {
988 count += self.count_nodes(init);
989 }
990 }
991 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
992 for var in variables {
993 count += self.count_nodes(var);
994 }
995 if let Some(init) = initializer {
996 count += self.count_nodes(init);
997 }
998 }
999 NodeKind::Assignment { lhs, rhs, .. } => {
1000 count += self.count_nodes(lhs);
1001 count += self.count_nodes(rhs);
1002 }
1003 NodeKind::FunctionCall { args, .. } => {
1004 for arg in args {
1005 count += self.count_nodes(arg);
1006 }
1007 }
1008 NodeKind::Unary { operand, .. } => {
1009 count += self.count_nodes(operand);
1010 }
1011 _ => {}
1012 }
1013
1014 count
1015 }
1016
1017 fn get_symbol_priority(&self, node: &Node) -> SymbolPriority {
1019 match &node.kind {
1020 NodeKind::Package { .. } => SymbolPriority::Critical,
1022 NodeKind::Use { .. } | NodeKind::No { .. } => SymbolPriority::Critical,
1023 NodeKind::Subroutine { .. } => SymbolPriority::Critical,
1024
1025 NodeKind::FunctionCall { .. } => SymbolPriority::High,
1027 NodeKind::Variable { .. } => SymbolPriority::High,
1028 NodeKind::VariableDeclaration { .. } => SymbolPriority::High,
1029
1030 NodeKind::Block { .. } => SymbolPriority::Medium,
1032 NodeKind::If { .. } | NodeKind::While { .. } | NodeKind::For { .. } => {
1033 SymbolPriority::Medium
1034 }
1035 NodeKind::Assignment { .. } => SymbolPriority::Medium,
1036
1037 NodeKind::Number { .. } | NodeKind::String { .. } => SymbolPriority::Low,
1039 NodeKind::Binary { .. } | NodeKind::Unary { .. } => SymbolPriority::Low,
1040
1041 _ => SymbolPriority::Medium,
1043 }
1044 }
1045
1046 pub fn tree(&self) -> &Node {
1048 &self.root
1049 }
1050
1051 pub fn text(&self) -> &str {
1053 &self.source
1054 }
1055
1056 pub fn metrics(&self) -> &ParseMetrics {
1058 &self.metrics
1059 }
1060
1061 pub fn set_cache_max_size(&mut self, max_size: usize) {
1063 self.subtree_cache.set_max_size(max_size);
1064 }
1065}
1066
1067impl SubtreeCache {
1068 fn new(max_size: usize) -> Self {
1069 SubtreeCache {
1070 by_content: HashMap::new(),
1071 by_range: HashMap::new(),
1072 lru: VecDeque::new(),
1073 critical_symbols: HashMap::new(),
1074 max_size,
1075 }
1076 }
1077
1078 fn clear(&mut self) {
1079 self.by_content.clear();
1080 self.by_range.clear();
1081 self.lru.clear();
1082 self.critical_symbols.clear();
1083 }
1084
1085 fn evict_if_needed(&mut self) {
1086 while self.by_content.len() > self.max_size {
1087 if let Some(hash) = self.find_least_important_entry() {
1088 debug!(
1089 "Evicting cache entry with hash {} (priority: {:?})",
1090 hash,
1091 self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low)
1092 );
1093 self.by_content.remove(&hash);
1094 self.critical_symbols.remove(&hash);
1095 self.lru.retain(|&h| h != hash);
1097 } else {
1098 if let Some(hash) = self.lru.pop_front() {
1100 debug!("Fallback eviction for hash {}", hash);
1101 self.by_content.remove(&hash);
1102 self.critical_symbols.remove(&hash);
1103 }
1104 }
1105 }
1106 }
1107
1108 fn find_least_important_entry(&self) -> Option<u64> {
1115 let mut best: Option<(u64, SymbolPriority)> = None;
1116
1117 for &hash in &self.lru {
1118 let priority = self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low);
1119
1120 match best {
1121 None => best = Some((hash, priority)),
1122 Some((_, best_priority)) => {
1123 if priority < best_priority {
1124 best = Some((hash, priority));
1126 }
1127 }
1129 }
1130 }
1131
1132 best.map(|(hash, _)| hash)
1133 }
1134
1135 fn set_max_size(&mut self, max_size: usize) {
1136 self.max_size = max_size;
1137 self.evict_if_needed();
1138 }
1139}
1140
1141#[cfg(test)]
1142mod tests {
1143 use super::super::incremental_edit::IncrementalEdit;
1144 use super::*;
1145
1146 #[test]
1147 fn test_incremental_single_token_edit() -> ParseResult<()> {
1148 let source = r#"
1149 my $x = 42;
1150 my $y = 100;
1151 print $x + $y;
1152 "#;
1153
1154 let mut doc = IncrementalDocument::new(source.to_string())?;
1155
1156 let pos =
1158 source.find("42").ok_or_else(|| perl_parser_core::error::ParseError::SyntaxError {
1159 message: "test source should contain '42'".to_string(),
1160 location: 0,
1161 })?;
1162 let edit = IncrementalEdit::new(pos + 1, pos + 2, "3".to_string());
1163
1164 doc.apply_edit(edit)?;
1165
1166 assert!(doc.metrics.nodes_reused > 0);
1168 assert!(doc.metrics.nodes_reparsed < 5);
1169 assert!(doc.metrics.last_parse_time_ms < 1.0);
1170
1171 Ok(())
1172 }
1173
1174 #[test]
1175 fn test_incremental_multiple_edits() -> ParseResult<()> {
1176 let source = r#"
1177 sub calculate {
1178 my $a = 10;
1179 my $b = 20;
1180 return $a + $b;
1181 }
1182 "#;
1183
1184 let mut doc = IncrementalDocument::new(source.to_string())?;
1185
1186 let mut edits = IncrementalEditSet::new();
1187
1188 let pos_10 =
1190 source.find("10").ok_or_else(|| perl_parser_core::error::ParseError::SyntaxError {
1191 message: "test source should contain '10'".to_string(),
1192 location: 0,
1193 })?;
1194 edits.add(IncrementalEdit::new(pos_10, pos_10 + 2, "15".to_string()));
1195
1196 let pos_20 =
1198 source.find("20").ok_or_else(|| perl_parser_core::error::ParseError::SyntaxError {
1199 message: "test source should contain '20'".to_string(),
1200 location: 0,
1201 })?;
1202 edits.add(IncrementalEdit::new(pos_20, pos_20 + 2, "25".to_string()));
1203
1204 doc.apply_edits(&edits)?;
1205
1206 let critical_count = doc
1208 .subtree_cache
1209 .critical_symbols
1210 .values()
1211 .filter(|&p| *p == SymbolPriority::Critical)
1212 .count();
1213 assert!(critical_count > 0, "Should preserve critical symbols during batch edits");
1214
1215 assert!(doc.metrics.nodes_reused > 0, "Incremental parsing should reuse nodes");
1219 assert!(
1220 doc.metrics.last_parse_time_ms < 10.0,
1221 "Incremental parse time was {:.2}ms, which exceeds 10.0ms threshold",
1222 doc.metrics.last_parse_time_ms
1223 );
1224
1225 Ok(())
1226 }
1227
1228 #[test]
1229 fn test_cache_eviction() -> ParseResult<()> {
1230 let source = "my $x = 1;";
1231 let doc = IncrementalDocument::new(source.to_string())?;
1232
1233 assert!(!doc.subtree_cache.by_range.is_empty());
1235 assert!(!doc.subtree_cache.by_content.is_empty());
1236
1237 Ok(())
1238 }
1239
1240 #[test]
1241 fn test_symbol_priority_classification() -> ParseResult<()> {
1242 let source = r#"
1243 package TestPkg;
1244 use strict;
1245
1246 sub test_func {
1247 my $var = 42;
1248 if ($var > 0) {
1249 return $var + 1;
1250 }
1251 }
1252 "#;
1253 let doc = IncrementalDocument::new(source.to_string())?;
1254
1255 let priorities: std::collections::HashSet<_> =
1257 doc.subtree_cache.critical_symbols.values().cloned().collect();
1258
1259 assert!(
1261 priorities.contains(&SymbolPriority::Critical),
1262 "Should classify package/use/sub as critical"
1263 );
1264 assert!(
1266 priorities.contains(&SymbolPriority::High),
1267 "Should classify variables as high priority"
1268 );
1269 assert!(
1271 priorities.contains(&SymbolPriority::Low)
1272 || priorities.contains(&SymbolPriority::Medium),
1273 "Should have lower priority symbols"
1274 );
1275
1276 Ok(())
1277 }
1278
1279 #[test]
1280 fn test_cache_respects_max_size() -> ParseResult<()> {
1281 let source = "my $x = 1; my $y = 2; my $z = 3;";
1282 let mut doc = IncrementalDocument::new(source.to_string())?;
1283
1284 assert!(doc.subtree_cache.by_content.len() > 1);
1286
1287 doc.set_cache_max_size(1);
1289 assert!(doc.subtree_cache.by_content.len() <= 1);
1290
1291 let pos =
1293 source.find('1').ok_or_else(|| perl_parser_core::error::ParseError::SyntaxError {
1294 message: "test source should contain '1'".to_string(),
1295 location: 0,
1296 })?;
1297 let edit = IncrementalEdit::new(pos, pos + 1, "10".to_string());
1298 doc.apply_edit(edit)?;
1299 assert!(doc.subtree_cache.by_content.len() <= 1);
1300
1301 Ok(())
1302 }
1303
1304 #[test]
1305 fn test_cache_priority_preservation() -> ParseResult<()> {
1306 let source = r#"
1307 package MyPackage;
1308 use strict;
1309 use warnings;
1310
1311 sub process {
1312 my $x = 42;
1313 my $y = "hello";
1314 return $x + 1;
1315 }
1316 "#;
1317 let mut doc = IncrementalDocument::new(source.to_string())?;
1318
1319 let initial_cache_size = doc.subtree_cache.by_content.len();
1321 assert!(initial_cache_size > 3, "Should have multiple cached nodes");
1322
1323 doc.set_cache_max_size(3);
1325 assert!(doc.subtree_cache.by_content.len() <= 3);
1326
1327 let has_critical_symbols = doc
1329 .subtree_cache
1330 .critical_symbols
1331 .values()
1332 .cloned()
1333 .any(|p| p == SymbolPriority::Critical);
1334 assert!(has_critical_symbols, "Should preserve critical symbols like package/use/sub");
1335
1336 let pos =
1338 source.find("42").ok_or_else(|| perl_parser_core::error::ParseError::SyntaxError {
1339 message: "test source should contain '42'".to_string(),
1340 location: 0,
1341 })?;
1342 let edit = IncrementalEdit::new(pos, pos + 2, "100".to_string());
1343 doc.apply_edit(edit)?;
1344 assert!(doc.subtree_cache.by_content.len() <= 3);
1345
1346 let has_critical_after_edit = doc
1348 .subtree_cache
1349 .critical_symbols
1350 .values()
1351 .cloned()
1352 .any(|p| p == SymbolPriority::Critical);
1353 assert!(has_critical_after_edit, "Should preserve critical symbols after edit");
1354
1355 Ok(())
1356 }
1357
1358 #[test]
1359 fn test_workspace_symbol_cache_preservation() -> ParseResult<()> {
1360 let source = r#"
1361 package TestModule;
1362
1363 sub exported_function { }
1364 sub internal_helper { }
1365
1366 my $global_var = "test";
1367 "#;
1368 let mut doc = IncrementalDocument::new(source.to_string())?;
1369
1370 doc.set_cache_max_size(2);
1372
1373 let package_preserved = doc
1375 .subtree_cache
1376 .by_content
1377 .values()
1378 .any(|node| matches!(node.kind, NodeKind::Package { .. }));
1379 assert!(package_preserved, "Package declaration should be preserved for workspace symbols");
1380
1381 Ok(())
1382 }
1383
1384 #[test]
1385 fn test_completion_metadata_preservation() -> ParseResult<()> {
1386 let source = r#"
1387 use Data::Dumper;
1388 use List::Util qw(first max);
1389
1390 sub calculate {
1391 my ($input, $multiplier) = @_;
1392 return $input * $multiplier;
1393 }
1394 "#;
1395 let mut doc = IncrementalDocument::new(source.to_string())?;
1396
1397 doc.set_cache_max_size(4);
1399
1400 let use_statements_count = doc
1402 .subtree_cache
1403 .by_content
1404 .values()
1405 .filter(|node| matches!(node.kind, NodeKind::Use { .. }))
1406 .count();
1407 assert!(
1408 use_statements_count >= 1,
1409 "Use statements should be preserved for completion metadata"
1410 );
1411
1412 let function_preserved = doc
1414 .subtree_cache
1415 .by_content
1416 .values()
1417 .any(|node| matches!(node.kind, NodeKind::Subroutine { .. }));
1418 assert!(function_preserved, "Function definitions should be preserved for completion");
1419
1420 Ok(())
1421 }
1422
1423 #[test]
1424 fn test_code_lens_reference_preservation() -> ParseResult<()> {
1425 let source = r#"
1426 package MyClass;
1427
1428 sub new {
1429 my $class = shift;
1430 return bless {}, $class;
1431 }
1432
1433 sub process_data {
1434 my ($self, $data) = @_;
1435 return $self->transform($data);
1436 }
1437 "#;
1438 let mut doc = IncrementalDocument::new(source.to_string())?;
1439
1440 doc.set_cache_max_size(3);
1442
1443 let critical_nodes = doc
1445 .subtree_cache
1446 .by_content
1447 .values()
1448 .filter(|node| {
1449 matches!(node.kind, NodeKind::Package { .. } | NodeKind::Subroutine { .. })
1450 })
1451 .count();
1452 assert!(critical_nodes >= 2, "Should preserve package and key subroutines for code lens");
1453
1454 Ok(())
1455 }
1456}