1use crate::{
7 ast::{Node, NodeKind},
8 error::ParseResult,
9 incremental_edit::{IncrementalEdit, IncrementalEditSet},
10 parser::Parser,
11};
12use std::collections::{HashMap, VecDeque};
13use std::sync::Arc;
14use std::time::Instant;
15use tracing::debug;
16
17#[derive(Debug, Clone)]
19pub struct IncrementalDocument {
20 pub root: Arc<Node>,
22 pub source: String,
24 pub version: u64,
26 pub subtree_cache: SubtreeCache,
28 pub metrics: ParseMetrics,
30}
31
32#[derive(Debug, Clone, Default)]
34pub struct SubtreeCache {
35 pub by_content: HashMap<u64, Arc<Node>>,
37 pub by_range: HashMap<(usize, usize), Arc<Node>>,
39 pub lru: VecDeque<u64>,
41 pub critical_symbols: HashMap<u64, SymbolPriority>,
43 pub max_size: usize,
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
49pub enum SymbolPriority {
50 Low = 0,
51 Medium = 1,
52 High = 2,
53 Critical = 3,
54}
55
56#[derive(Debug, Clone, Default)]
58pub struct ParseMetrics {
59 pub last_parse_time_ms: f64,
60 pub nodes_reused: usize,
61 pub nodes_reparsed: usize,
62 pub cache_hits: usize,
63 pub cache_misses: usize,
64}
65
66impl IncrementalDocument {
67 pub fn new(source: String) -> ParseResult<Self> {
69 let start = Instant::now();
70 let mut parser = Parser::new(&source);
71 let root = parser.parse()?;
72
73 let mut doc = IncrementalDocument {
74 root: Arc::new(root),
75 source,
76 version: 0,
77 subtree_cache: SubtreeCache::new(1000),
78 metrics: ParseMetrics::default(),
79 };
80
81 doc.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
82 doc.cache_subtrees();
83
84 Ok(doc)
85 }
86
87 pub fn apply_edit(&mut self, edit: IncrementalEdit) -> ParseResult<()> {
89 let start = Instant::now();
90 self.version += 1;
91
92 self.metrics = ParseMetrics::default();
94
95 let new_source = self.apply_edit_to_source(&edit);
97
98 let affected_range = (edit.start_byte, edit.old_end_byte);
100 let reusable_subtrees = self.find_reusable_subtrees(affected_range, &edit);
101
102 let (new_root, used_fast_path) =
104 self.incremental_parse(&new_source, &edit, reusable_subtrees)?;
105
106 self.source = new_source;
108 self.root = Arc::new(new_root);
109
110 if !used_fast_path {
115 self.cache_subtrees();
116 }
117
118 self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
119
120 Ok(())
121 }
122
123 pub fn apply_edits(&mut self, edits: &IncrementalEditSet) -> ParseResult<()> {
125 let start = Instant::now();
126 self.version += 1;
127
128 self.metrics = ParseMetrics::default();
130
131 let mut sorted_edits = edits.edits.clone();
133 sorted_edits.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
134
135 let mut new_source = self.source.clone();
137 for edit in &sorted_edits {
138 new_source = self.apply_edit_to_string(&new_source, edit);
139 }
140
141 let affected_ranges: Vec<_> =
143 sorted_edits.iter().map(|e| (e.start_byte, e.old_end_byte)).collect();
144
145 let reusable = self.find_reusable_for_ranges(&affected_ranges);
147
148 let new_root = if !reusable.is_empty() {
150 self.parse_with_reuse(&new_source, reusable)?
151 } else {
152 let mut parser = Parser::new(&new_source);
153 parser.parse()?
154 };
155
156 self.source = new_source;
158 self.root = Arc::new(new_root);
159 self.cache_subtrees();
160
161 self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
162
163 Ok(())
164 }
165
166 fn apply_edit_to_source(&self, edit: &IncrementalEdit) -> String {
168 self.apply_edit_to_string(&self.source, edit)
169 }
170
171 fn apply_edit_to_string(&self, source: &str, edit: &IncrementalEdit) -> String {
172 let mut result = String::with_capacity(source.len() + edit.new_text.len());
173
174 let start = edit.start_byte.min(source.len());
176 let end = edit.old_end_byte.min(source.len());
177
178 if source.is_char_boundary(start) && source.is_char_boundary(end) {
180 result.push_str(&source[..start]);
181 result.push_str(&edit.new_text);
182 result.push_str(&source[end..]);
183 } else {
184 debug!("Invalid UTF-8 boundaries in edit: start={}, end={}", start, end);
186 result.push_str(source);
187 }
188
189 result
190 }
191
192 fn find_reusable_subtrees(
194 &mut self,
195 affected_range: (usize, usize),
196 edit: &IncrementalEdit,
197 ) -> Vec<Arc<Node>> {
198 let mut reusable = Vec::new();
199 let delta = edit.byte_shift();
200
201 for ((start, end), node) in &self.subtree_cache.by_range {
203 if *end <= affected_range.0 {
204 reusable.push(node.clone());
206 self.metrics.cache_hits += 1;
207 self.metrics.nodes_reused += self.count_nodes(node);
208 } else if *start >= affected_range.1 {
209 if let Some(adjusted) = self.adjust_node_position(node, delta) {
211 reusable.push(Arc::new(adjusted));
212 self.metrics.cache_hits += 1;
213 self.metrics.nodes_reused += self.count_nodes(node);
214 }
215 } else {
216 self.metrics.cache_misses += 1;
217 }
218 }
219
220 reusable
221 }
222
223 fn find_reusable_for_ranges(&mut self, ranges: &[(usize, usize)]) -> Vec<Arc<Node>> {
225 let mut reusable = Vec::new();
226
227 for ((start, end), node) in &self.subtree_cache.by_range {
228 let affected = ranges.iter().any(|(r_start, r_end)| {
229 *start < *r_end && *end > *r_start
231 });
232
233 if !affected {
234 reusable.push(node.clone());
235 self.metrics.cache_hits += 1;
236 self.metrics.nodes_reused += self.count_nodes(node);
237 } else {
238 self.metrics.cache_misses += 1;
239 }
240 }
241
242 reusable
243 }
244
245 fn incremental_parse(
251 &mut self,
252 source: &str,
253 edit: &IncrementalEdit,
254 _reusable: Vec<Arc<Node>>,
255 ) -> ParseResult<(Node, bool)> {
256 if self.is_single_token_edit(edit) {
258 if let Some(node) = self.fast_token_update(source, edit) {
259 self.metrics.nodes_reparsed = 1;
260 return Ok((node, true));
261 }
262 }
263
264 let node = self.parse_with_reuse(source, _reusable)?;
266 Ok((node, false))
267 }
268
269 fn is_single_token_edit(&self, edit: &IncrementalEdit) -> bool {
271 if edit.old_end_byte - edit.start_byte > 100 {
273 return false; }
275
276 if let Some(node) = self.find_node_at_position(edit.start_byte) {
278 matches!(
279 node.kind,
280 NodeKind::Number { .. } | NodeKind::String { .. } | NodeKind::Identifier { .. }
281 )
282 } else {
283 false
284 }
285 }
286
287 fn fast_token_update(&self, source: &str, edit: &IncrementalEdit) -> Option<Node> {
289 let mut new_root = (*self.root).clone();
291
292 if self.update_token_in_tree(&mut new_root, source, edit) { Some(new_root) } else { None }
294 }
295
296 fn update_token_in_tree(&self, node: &mut Node, source: &str, edit: &IncrementalEdit) -> bool {
298 if node.location.start <= edit.start_byte && node.location.end >= edit.old_end_byte {
300 match &mut node.kind {
301 NodeKind::Number { .. } => {
302 let delta = edit.byte_shift();
304 let new_end = (node.location.end as isize + delta).max(0) as usize;
305
306 if new_end <= source.len()
308 && source.is_char_boundary(node.location.start)
309 && source.is_char_boundary(new_end)
310 {
311 node.location.end = new_end;
312 let new_text = &source[node.location.start..node.location.end];
313 if let Ok(value) = new_text.parse::<f64>() {
314 node.kind = NodeKind::Number { value: value.to_string() };
315 return true;
316 }
317 }
318 }
319 NodeKind::String { value, .. } => {
320 let delta = edit.byte_shift();
322 let new_end = (node.location.end as isize + delta).max(0) as usize;
323
324 if new_end <= source.len()
326 && source.is_char_boundary(node.location.start)
327 && source.is_char_boundary(new_end)
328 {
329 node.location.end = new_end;
330 let new_text = &source[node.location.start..node.location.end];
331 *value = new_text.to_string();
332 return true;
333 }
334 }
335 NodeKind::Identifier { name } => {
336 let delta = edit.byte_shift();
338 let new_end = (node.location.end as isize + delta).max(0) as usize;
339
340 if new_end <= source.len()
342 && source.is_char_boundary(node.location.start)
343 && source.is_char_boundary(new_end)
344 {
345 node.location.end = new_end;
346 *name = source[node.location.start..node.location.end].to_string();
347 return true;
348 }
349 }
350 _ => {
351 return self.update_token_in_children(node, source, edit);
353 }
354 }
355 }
356
357 false
358 }
359
360 fn update_token_in_children(
362 &self,
363 node: &mut Node,
364 source: &str,
365 edit: &IncrementalEdit,
366 ) -> bool {
367 match &mut node.kind {
368 NodeKind::Program { statements } | NodeKind::Block { statements } => {
369 for stmt in statements {
370 if self.update_token_in_tree(stmt, source, edit) {
371 return true;
372 }
373 }
374 }
375 NodeKind::Binary { left, right, .. }
376 | NodeKind::Assignment { lhs: left, rhs: right, .. } => {
377 if self.update_token_in_tree(left, source, edit) {
378 return true;
379 }
380 if self.update_token_in_tree(right, source, edit) {
381 return true;
382 }
383 }
384 NodeKind::Subroutine { body, .. } => {
385 if self.update_token_in_tree(body, source, edit) {
386 return true;
387 }
388 }
389 NodeKind::ExpressionStatement { expression } => {
390 if self.update_token_in_tree(expression, source, edit) {
391 return true;
392 }
393 }
394 NodeKind::VariableDeclaration { variable, initializer, .. } => {
395 if self.update_token_in_tree(variable, source, edit) {
396 return true;
397 }
398 if let Some(init) = initializer {
399 if self.update_token_in_tree(init, source, edit) {
400 return true;
401 }
402 }
403 }
404 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
405 if self.update_token_in_tree(condition, source, edit) {
406 return true;
407 }
408 if self.update_token_in_tree(then_branch, source, edit) {
409 return true;
410 }
411 for (cond, branch) in elsif_branches {
412 if self.update_token_in_tree(cond, source, edit) {
413 return true;
414 }
415 if self.update_token_in_tree(branch, source, edit) {
416 return true;
417 }
418 }
419 if let Some(else_b) = else_branch {
420 if self.update_token_in_tree(else_b, source, edit) {
421 return true;
422 }
423 }
424 }
425 NodeKind::While { condition, body, .. } => {
426 if self.update_token_in_tree(condition, source, edit) {
427 return true;
428 }
429 if self.update_token_in_tree(body, source, edit) {
430 return true;
431 }
432 }
433 NodeKind::FunctionCall { args, .. } => {
434 for arg in args {
435 if self.update_token_in_tree(arg, source, edit) {
436 return true;
437 }
438 }
439 }
440 NodeKind::Unary { operand, .. } => {
441 if self.update_token_in_tree(operand, source, edit) {
442 return true;
443 }
444 }
445 _ => {}
446 }
447
448 false
449 }
450
451 fn parse_with_reuse(&mut self, source: &str, reusable: Vec<Arc<Node>>) -> ParseResult<Node> {
453 let mut parser = Parser::new(source);
455 let mut root = parser.parse()?;
456
457 for node in reusable {
459 self.insert_reusable(&mut root, &node);
460 }
461
462 self.metrics.nodes_reparsed =
464 self.count_nodes(&root).saturating_sub(self.metrics.nodes_reused);
465
466 Ok(root)
467 }
468
469 fn insert_reusable(&self, target: &mut Node, reusable: &Arc<Node>) -> bool {
471 if target.location.start == reusable.location.start
472 && target.location.end == reusable.location.end
473 && std::mem::discriminant(&target.kind) == std::mem::discriminant(&reusable.kind)
474 {
475 *target = (**reusable).clone();
476 return true;
477 }
478
479 match &mut target.kind {
480 NodeKind::Program { statements } | NodeKind::Block { statements } => {
481 for stmt in statements {
482 if self.insert_reusable(stmt, reusable) {
483 return true;
484 }
485 }
486 }
487 NodeKind::Binary { left, right, .. } => {
488 if self.insert_reusable(left, reusable) {
489 return true;
490 }
491 if self.insert_reusable(right, reusable) {
492 return true;
493 }
494 }
495 NodeKind::Subroutine { body, .. } => {
496 if self.insert_reusable(body, reusable) {
497 return true;
498 }
499 }
500 NodeKind::ExpressionStatement { expression } => {
501 if self.insert_reusable(expression, reusable) {
502 return true;
503 }
504 }
505 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
506 if self.insert_reusable(condition, reusable) {
507 return true;
508 }
509 if self.insert_reusable(then_branch, reusable) {
510 return true;
511 }
512 for (cond, branch) in elsif_branches {
513 if self.insert_reusable(cond, reusable) {
514 return true;
515 }
516 if self.insert_reusable(branch, reusable) {
517 return true;
518 }
519 }
520 if let Some(else_b) = else_branch {
521 if self.insert_reusable(else_b, reusable) {
522 return true;
523 }
524 }
525 }
526 NodeKind::While { condition, body, .. } => {
527 if self.insert_reusable(condition, reusable) {
528 return true;
529 }
530 if self.insert_reusable(body, reusable) {
531 return true;
532 }
533 }
534 NodeKind::For { init, condition, update, body, .. } => {
535 if let Some(i) = init {
536 if self.insert_reusable(i, reusable) {
537 return true;
538 }
539 }
540 if let Some(c) = condition {
541 if self.insert_reusable(c, reusable) {
542 return true;
543 }
544 }
545 if let Some(u) = update {
546 if self.insert_reusable(u, reusable) {
547 return true;
548 }
549 }
550 if self.insert_reusable(body, reusable) {
551 return true;
552 }
553 }
554 NodeKind::VariableDeclaration { variable, initializer, .. } => {
555 if self.insert_reusable(variable, reusable) {
556 return true;
557 }
558 if let Some(init) = initializer {
559 if self.insert_reusable(init, reusable) {
560 return true;
561 }
562 }
563 }
564 NodeKind::Assignment { lhs, rhs, .. } => {
565 if self.insert_reusable(lhs, reusable) {
566 return true;
567 }
568 if self.insert_reusable(rhs, reusable) {
569 return true;
570 }
571 }
572 _ => {}
573 }
574
575 false
576 }
577
578 fn adjust_node_position(&self, node: &Node, delta: isize) -> Option<Node> {
584 let mut adjusted = node.clone();
585
586 let new_start = adjusted.location.start as isize + delta;
588 let new_end = adjusted.location.end as isize + delta;
589 if new_start < 0 || new_end < 0 {
590 return None;
591 }
592 adjusted.location.start = new_start as usize;
593 adjusted.location.end = new_end as usize;
594
595 match &mut adjusted.kind {
597 NodeKind::Program { statements } | NodeKind::Block { statements } => {
598 for stmt in statements {
599 *stmt = self.adjust_node_position(stmt, delta)?;
600 }
601 }
602 NodeKind::Binary { left, right, .. } => {
603 **left = self.adjust_node_position(left, delta)?;
604 **right = self.adjust_node_position(right, delta)?;
605 }
606 NodeKind::Subroutine { body, .. } => {
607 **body = self.adjust_node_position(body, delta)?;
608 }
609 NodeKind::ExpressionStatement { expression } => {
610 **expression = self.adjust_node_position(expression, delta)?;
611 }
612 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
613 **condition = self.adjust_node_position(condition, delta)?;
614 **then_branch = self.adjust_node_position(then_branch, delta)?;
615 for (cond, branch) in elsif_branches {
616 **cond = self.adjust_node_position(cond, delta)?;
617 **branch = self.adjust_node_position(branch, delta)?;
618 }
619 if let Some(else_b) = else_branch {
620 **else_b = self.adjust_node_position(else_b, delta)?;
621 }
622 }
623 NodeKind::While { condition, body, .. } => {
624 **condition = self.adjust_node_position(condition, delta)?;
625 **body = self.adjust_node_position(body, delta)?;
626 }
627 NodeKind::For { init, condition, update, body, .. } => {
628 if let Some(i) = init {
629 **i = self.adjust_node_position(i, delta)?;
630 }
631 if let Some(c) = condition {
632 **c = self.adjust_node_position(c, delta)?;
633 }
634 if let Some(u) = update {
635 **u = self.adjust_node_position(u, delta)?;
636 }
637 **body = self.adjust_node_position(body, delta)?;
638 }
639 NodeKind::VariableDeclaration { variable, initializer, .. } => {
640 **variable = self.adjust_node_position(variable, delta)?;
641 if let Some(init) = initializer {
642 **init = self.adjust_node_position(init, delta)?;
643 }
644 }
645 NodeKind::Assignment { lhs, rhs, .. } => {
646 **lhs = self.adjust_node_position(lhs, delta)?;
647 **rhs = self.adjust_node_position(rhs, delta)?;
648 }
649 _ => {}
650 }
651
652 Some(adjusted)
653 }
654
655 fn find_node_at_position(&self, pos: usize) -> Option<&Node> {
657 self.find_in_node(&self.root, pos)
658 }
659
660 fn find_in_node<'a>(&self, node: &'a Node, pos: usize) -> Option<&'a Node> {
661 if node.location.start <= pos && node.location.end > pos {
662 match &node.kind {
664 NodeKind::Program { statements } | NodeKind::Block { statements } => {
665 for stmt in statements {
666 if let Some(found) = self.find_in_node(stmt, pos) {
667 return Some(found);
668 }
669 }
670 }
671 NodeKind::Binary { left, right, .. }
672 | NodeKind::Assignment { lhs: left, rhs: right, .. } => {
673 if let Some(found) = self.find_in_node(left, pos) {
674 return Some(found);
675 }
676 if let Some(found) = self.find_in_node(right, pos) {
677 return Some(found);
678 }
679 }
680 NodeKind::Subroutine { body, .. } => {
681 if let Some(found) = self.find_in_node(body, pos) {
682 return Some(found);
683 }
684 }
685 NodeKind::ExpressionStatement { expression } => {
686 if let Some(found) = self.find_in_node(expression, pos) {
687 return Some(found);
688 }
689 }
690 NodeKind::VariableDeclaration { variable, initializer, .. } => {
691 if let Some(found) = self.find_in_node(variable, pos) {
692 return Some(found);
693 }
694 if let Some(init) = initializer {
695 if let Some(found) = self.find_in_node(init, pos) {
696 return Some(found);
697 }
698 }
699 }
700 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
701 if let Some(found) = self.find_in_node(condition, pos) {
702 return Some(found);
703 }
704 if let Some(found) = self.find_in_node(then_branch, pos) {
705 return Some(found);
706 }
707 for (cond, branch) in elsif_branches {
708 if let Some(found) = self.find_in_node(cond, pos) {
709 return Some(found);
710 }
711 if let Some(found) = self.find_in_node(branch, pos) {
712 return Some(found);
713 }
714 }
715 if let Some(else_b) = else_branch {
716 if let Some(found) = self.find_in_node(else_b, pos) {
717 return Some(found);
718 }
719 }
720 }
721 NodeKind::While { condition, body, .. } => {
722 if let Some(found) = self.find_in_node(condition, pos) {
723 return Some(found);
724 }
725 if let Some(found) = self.find_in_node(body, pos) {
726 return Some(found);
727 }
728 }
729 NodeKind::FunctionCall { args, .. } => {
730 for arg in args {
731 if let Some(found) = self.find_in_node(arg, pos) {
732 return Some(found);
733 }
734 }
735 }
736 NodeKind::Unary { operand, .. } => {
737 if let Some(found) = self.find_in_node(operand, pos) {
738 return Some(found);
739 }
740 }
741 _ => {}
742 }
743
744 Some(node)
746 } else {
747 None
748 }
749 }
750
751 fn cache_subtrees(&mut self) {
753 self.subtree_cache.clear();
754 let root = self.root.clone();
755 self.cache_node(&root);
756 }
757
758 fn cache_node(&mut self, node: &Node) {
759 let range = (node.location.start, node.location.end);
761 self.subtree_cache.by_range.insert(range, Arc::new(node.clone()));
762
763 let hash = self.hash_node(node);
765 let priority = self.get_symbol_priority(node);
766
767 self.subtree_cache.by_content.insert(hash, Arc::new(node.clone()));
768 self.subtree_cache.critical_symbols.insert(hash, priority);
769 self.subtree_cache.lru.push_back(hash);
770 self.subtree_cache.evict_if_needed();
771
772 match &node.kind {
774 NodeKind::Program { statements } | NodeKind::Block { statements } => {
775 for stmt in statements {
776 self.cache_node(stmt);
777 }
778 }
779 NodeKind::Binary { left, right, .. } => {
780 self.cache_node(left);
781 self.cache_node(right);
782 }
783 NodeKind::Subroutine { body, .. } => {
784 self.cache_node(body);
785 }
786 NodeKind::ExpressionStatement { expression } => {
787 self.cache_node(expression);
788 }
789 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
790 self.cache_node(condition);
791 self.cache_node(then_branch);
792 for (cond, branch) in elsif_branches {
793 self.cache_node(cond);
794 self.cache_node(branch);
795 }
796 if let Some(else_b) = else_branch {
797 self.cache_node(else_b);
798 }
799 }
800 NodeKind::While { condition, body, .. } => {
801 self.cache_node(condition);
802 self.cache_node(body);
803 }
804 NodeKind::For { init, condition, update, body, .. } => {
805 if let Some(i) = init {
806 self.cache_node(i);
807 }
808 if let Some(c) = condition {
809 self.cache_node(c);
810 }
811 if let Some(u) = update {
812 self.cache_node(u);
813 }
814 self.cache_node(body);
815 }
816 NodeKind::Foreach { variable, list, body, continue_block } => {
817 self.cache_node(variable);
818 self.cache_node(list);
819 self.cache_node(body);
820 if let Some(cb) = continue_block {
821 self.cache_node(cb);
822 }
823 }
824 NodeKind::VariableDeclaration { variable, initializer, .. } => {
825 self.cache_node(variable);
826 if let Some(init) = initializer {
827 self.cache_node(init);
828 }
829 }
830 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
831 for var in variables {
832 self.cache_node(var);
833 }
834 if let Some(init) = initializer {
835 self.cache_node(init);
836 }
837 }
838 NodeKind::Assignment { lhs, rhs, .. } => {
839 self.cache_node(lhs);
840 self.cache_node(rhs);
841 }
842 _ => {}
843 }
844 }
845
846 fn hash_node(&self, node: &Node) -> u64 {
848 use std::collections::hash_map::DefaultHasher;
849 use std::hash::{Hash, Hasher};
850
851 let mut hasher = DefaultHasher::new();
852
853 std::mem::discriminant(&node.kind).hash(&mut hasher);
855
856 match &node.kind {
858 NodeKind::Number { value } => value.hash(&mut hasher),
859 NodeKind::String { value, .. } => value.hash(&mut hasher),
860 NodeKind::Identifier { name } => name.hash(&mut hasher),
861 _ => {}
862 }
863
864 hasher.finish()
865 }
866
867 fn count_nodes(&self, node: &Node) -> usize {
869 let mut count = 1;
870
871 match &node.kind {
872 NodeKind::Program { statements } | NodeKind::Block { statements } => {
873 for stmt in statements {
874 count += self.count_nodes(stmt);
875 }
876 }
877 NodeKind::Binary { left, right, .. } => {
878 count += self.count_nodes(left);
879 count += self.count_nodes(right);
880 }
881 NodeKind::Subroutine { body, .. } => {
882 count += self.count_nodes(body);
883 }
884 NodeKind::ExpressionStatement { expression } => {
885 count += self.count_nodes(expression);
886 }
887 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
888 count += self.count_nodes(condition);
889 count += self.count_nodes(then_branch);
890 for (cond, branch) in elsif_branches {
891 count += self.count_nodes(cond);
892 count += self.count_nodes(branch);
893 }
894 if let Some(else_b) = else_branch {
895 count += self.count_nodes(else_b);
896 }
897 }
898 NodeKind::While { condition, body, .. } => {
899 count += self.count_nodes(condition);
900 count += self.count_nodes(body);
901 }
902 NodeKind::For { init, condition, update, body, .. } => {
903 if let Some(i) = init {
904 count += self.count_nodes(i);
905 }
906 if let Some(c) = condition {
907 count += self.count_nodes(c);
908 }
909 if let Some(u) = update {
910 count += self.count_nodes(u);
911 }
912 count += self.count_nodes(body);
913 }
914 NodeKind::Foreach { variable, list, body, continue_block } => {
915 count += self.count_nodes(variable);
916 count += self.count_nodes(list);
917 count += self.count_nodes(body);
918 if let Some(cb) = continue_block {
919 count += self.count_nodes(cb);
920 }
921 }
922 NodeKind::VariableDeclaration { variable, initializer, .. } => {
923 count += self.count_nodes(variable);
924 if let Some(init) = initializer {
925 count += self.count_nodes(init);
926 }
927 }
928 NodeKind::VariableListDeclaration { variables, initializer, .. } => {
929 for var in variables {
930 count += self.count_nodes(var);
931 }
932 if let Some(init) = initializer {
933 count += self.count_nodes(init);
934 }
935 }
936 NodeKind::Assignment { lhs, rhs, .. } => {
937 count += self.count_nodes(lhs);
938 count += self.count_nodes(rhs);
939 }
940 NodeKind::FunctionCall { args, .. } => {
941 for arg in args {
942 count += self.count_nodes(arg);
943 }
944 }
945 NodeKind::Unary { operand, .. } => {
946 count += self.count_nodes(operand);
947 }
948 _ => {}
949 }
950
951 count
952 }
953
954 fn get_symbol_priority(&self, node: &Node) -> SymbolPriority {
956 match &node.kind {
957 NodeKind::Package { .. } => SymbolPriority::Critical,
959 NodeKind::Use { .. } | NodeKind::No { .. } => SymbolPriority::Critical,
960 NodeKind::Subroutine { .. } => SymbolPriority::Critical,
961
962 NodeKind::FunctionCall { .. } => SymbolPriority::High,
964 NodeKind::Variable { .. } => SymbolPriority::High,
965 NodeKind::VariableDeclaration { .. } => SymbolPriority::High,
966
967 NodeKind::Block { .. } => SymbolPriority::Medium,
969 NodeKind::If { .. } | NodeKind::While { .. } | NodeKind::For { .. } => {
970 SymbolPriority::Medium
971 }
972 NodeKind::Assignment { .. } => SymbolPriority::Medium,
973
974 NodeKind::Number { .. } | NodeKind::String { .. } => SymbolPriority::Low,
976 NodeKind::Binary { .. } | NodeKind::Unary { .. } => SymbolPriority::Low,
977
978 _ => SymbolPriority::Medium,
980 }
981 }
982
983 pub fn tree(&self) -> &Node {
985 &self.root
986 }
987
988 pub fn text(&self) -> &str {
990 &self.source
991 }
992
993 pub fn metrics(&self) -> &ParseMetrics {
995 &self.metrics
996 }
997
998 pub fn set_cache_max_size(&mut self, max_size: usize) {
1000 self.subtree_cache.set_max_size(max_size);
1001 }
1002}
1003
1004impl SubtreeCache {
1005 fn new(max_size: usize) -> Self {
1006 SubtreeCache {
1007 by_content: HashMap::new(),
1008 by_range: HashMap::new(),
1009 lru: VecDeque::new(),
1010 critical_symbols: HashMap::new(),
1011 max_size,
1012 }
1013 }
1014
1015 fn clear(&mut self) {
1016 self.by_content.clear();
1017 self.by_range.clear();
1018 self.lru.clear();
1019 self.critical_symbols.clear();
1020 }
1021
1022 fn evict_if_needed(&mut self) {
1023 while self.by_content.len() > self.max_size {
1024 if let Some(hash) = self.find_least_important_entry() {
1025 debug!(
1026 "Evicting cache entry with hash {} (priority: {:?})",
1027 hash,
1028 self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low)
1029 );
1030 self.by_content.remove(&hash);
1031 self.critical_symbols.remove(&hash);
1032 self.lru.retain(|&h| h != hash);
1034 } else {
1035 if let Some(hash) = self.lru.pop_front() {
1037 debug!("Fallback eviction for hash {}", hash);
1038 self.by_content.remove(&hash);
1039 self.critical_symbols.remove(&hash);
1040 }
1041 }
1042 }
1043 }
1044
1045 fn find_least_important_entry(&self) -> Option<u64> {
1052 let mut best: Option<(u64, SymbolPriority)> = None;
1053
1054 for &hash in &self.lru {
1055 let priority = self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low);
1056
1057 match best {
1058 None => best = Some((hash, priority)),
1059 Some((_, best_priority)) => {
1060 if priority < best_priority {
1061 best = Some((hash, priority));
1063 }
1064 }
1066 }
1067 }
1068
1069 best.map(|(hash, _)| hash)
1070 }
1071
1072 fn set_max_size(&mut self, max_size: usize) {
1073 self.max_size = max_size;
1074 self.evict_if_needed();
1075 }
1076}
1077
1078#[cfg(test)]
1079mod tests {
1080 use super::*;
1081 use crate::incremental_edit::IncrementalEdit;
1082
1083 #[test]
1084 fn test_incremental_single_token_edit() -> ParseResult<()> {
1085 let source = r#"
1086 my $x = 42;
1087 my $y = 100;
1088 print $x + $y;
1089 "#;
1090
1091 let mut doc = IncrementalDocument::new(source.to_string())?;
1092
1093 let pos = source.find("42").ok_or_else(|| crate::error::ParseError::SyntaxError {
1095 message: "test source should contain '42'".to_string(),
1096 location: 0,
1097 })?;
1098 let edit = IncrementalEdit::new(pos + 1, pos + 2, "3".to_string());
1099
1100 doc.apply_edit(edit)?;
1101
1102 assert!(doc.metrics.nodes_reused > 0);
1104 assert!(doc.metrics.nodes_reparsed < 5);
1105 assert!(doc.metrics.last_parse_time_ms < 1.0);
1106
1107 Ok(())
1108 }
1109
1110 #[test]
1111 fn test_incremental_multiple_edits() -> ParseResult<()> {
1112 let source = r#"
1113 sub calculate {
1114 my $a = 10;
1115 my $b = 20;
1116 return $a + $b;
1117 }
1118 "#;
1119
1120 let mut doc = IncrementalDocument::new(source.to_string())?;
1121
1122 let mut edits = IncrementalEditSet::new();
1123
1124 let pos_10 = source.find("10").ok_or_else(|| crate::error::ParseError::SyntaxError {
1126 message: "test source should contain '10'".to_string(),
1127 location: 0,
1128 })?;
1129 edits.add(IncrementalEdit::new(pos_10, pos_10 + 2, "15".to_string()));
1130
1131 let pos_20 = source.find("20").ok_or_else(|| crate::error::ParseError::SyntaxError {
1133 message: "test source should contain '20'".to_string(),
1134 location: 0,
1135 })?;
1136 edits.add(IncrementalEdit::new(pos_20, pos_20 + 2, "25".to_string()));
1137
1138 doc.apply_edits(&edits)?;
1139
1140 let critical_count = doc
1142 .subtree_cache
1143 .critical_symbols
1144 .values()
1145 .filter(|&p| *p == SymbolPriority::Critical)
1146 .count();
1147 assert!(critical_count > 0, "Should preserve critical symbols during batch edits");
1148
1149 assert!(doc.metrics.nodes_reused > 0, "Incremental parsing should reuse nodes");
1153 assert!(
1154 doc.metrics.last_parse_time_ms < 10.0,
1155 "Incremental parse time was {:.2}ms, which exceeds 10.0ms threshold",
1156 doc.metrics.last_parse_time_ms
1157 );
1158
1159 Ok(())
1160 }
1161
1162 #[test]
1163 fn test_cache_eviction() -> ParseResult<()> {
1164 let source = "my $x = 1;";
1165 let doc = IncrementalDocument::new(source.to_string())?;
1166
1167 assert!(!doc.subtree_cache.by_range.is_empty());
1169 assert!(!doc.subtree_cache.by_content.is_empty());
1170
1171 Ok(())
1172 }
1173
1174 #[test]
1175 fn test_symbol_priority_classification() -> ParseResult<()> {
1176 let source = r#"
1177 package TestPkg;
1178 use strict;
1179
1180 sub test_func {
1181 my $var = 42;
1182 if ($var > 0) {
1183 return $var + 1;
1184 }
1185 }
1186 "#;
1187 let doc = IncrementalDocument::new(source.to_string())?;
1188
1189 let priorities: std::collections::HashSet<_> =
1191 doc.subtree_cache.critical_symbols.values().cloned().collect();
1192
1193 assert!(
1195 priorities.contains(&SymbolPriority::Critical),
1196 "Should classify package/use/sub as critical"
1197 );
1198 assert!(
1200 priorities.contains(&SymbolPriority::High),
1201 "Should classify variables as high priority"
1202 );
1203 assert!(
1205 priorities.contains(&SymbolPriority::Low)
1206 || priorities.contains(&SymbolPriority::Medium),
1207 "Should have lower priority symbols"
1208 );
1209
1210 Ok(())
1211 }
1212
1213 #[test]
1214 fn test_cache_respects_max_size() -> ParseResult<()> {
1215 let source = "my $x = 1; my $y = 2; my $z = 3;";
1216 let mut doc = IncrementalDocument::new(source.to_string())?;
1217
1218 assert!(doc.subtree_cache.by_content.len() > 1);
1220
1221 doc.set_cache_max_size(1);
1223 assert!(doc.subtree_cache.by_content.len() <= 1);
1224
1225 let pos = source.find('1').ok_or_else(|| crate::error::ParseError::SyntaxError {
1227 message: "test source should contain '1'".to_string(),
1228 location: 0,
1229 })?;
1230 let edit = IncrementalEdit::new(pos, pos + 1, "10".to_string());
1231 doc.apply_edit(edit)?;
1232 assert!(doc.subtree_cache.by_content.len() <= 1);
1233
1234 Ok(())
1235 }
1236
1237 #[test]
1238 fn test_cache_priority_preservation() -> ParseResult<()> {
1239 let source = r#"
1240 package MyPackage;
1241 use strict;
1242 use warnings;
1243
1244 sub process {
1245 my $x = 42;
1246 my $y = "hello";
1247 return $x + 1;
1248 }
1249 "#;
1250 let mut doc = IncrementalDocument::new(source.to_string())?;
1251
1252 let initial_cache_size = doc.subtree_cache.by_content.len();
1254 assert!(initial_cache_size > 3, "Should have multiple cached nodes");
1255
1256 doc.set_cache_max_size(3);
1258 assert!(doc.subtree_cache.by_content.len() <= 3);
1259
1260 let has_critical_symbols = doc
1262 .subtree_cache
1263 .critical_symbols
1264 .values()
1265 .cloned()
1266 .any(|p| p == SymbolPriority::Critical);
1267 assert!(has_critical_symbols, "Should preserve critical symbols like package/use/sub");
1268
1269 let pos = source.find("42").ok_or_else(|| crate::error::ParseError::SyntaxError {
1271 message: "test source should contain '42'".to_string(),
1272 location: 0,
1273 })?;
1274 let edit = IncrementalEdit::new(pos, pos + 2, "100".to_string());
1275 doc.apply_edit(edit)?;
1276 assert!(doc.subtree_cache.by_content.len() <= 3);
1277
1278 let has_critical_after_edit = doc
1280 .subtree_cache
1281 .critical_symbols
1282 .values()
1283 .cloned()
1284 .any(|p| p == SymbolPriority::Critical);
1285 assert!(has_critical_after_edit, "Should preserve critical symbols after edit");
1286
1287 Ok(())
1288 }
1289
1290 #[test]
1291 fn test_workspace_symbol_cache_preservation() -> ParseResult<()> {
1292 let source = r#"
1293 package TestModule;
1294
1295 sub exported_function { }
1296 sub internal_helper { }
1297
1298 my $global_var = "test";
1299 "#;
1300 let mut doc = IncrementalDocument::new(source.to_string())?;
1301
1302 doc.set_cache_max_size(2);
1304
1305 let package_preserved = doc
1307 .subtree_cache
1308 .by_content
1309 .values()
1310 .any(|node| matches!(node.kind, NodeKind::Package { .. }));
1311 assert!(package_preserved, "Package declaration should be preserved for workspace symbols");
1312
1313 Ok(())
1314 }
1315
1316 #[test]
1317 fn test_completion_metadata_preservation() -> ParseResult<()> {
1318 let source = r#"
1319 use Data::Dumper;
1320 use List::Util qw(first max);
1321
1322 sub calculate {
1323 my ($input, $multiplier) = @_;
1324 return $input * $multiplier;
1325 }
1326 "#;
1327 let mut doc = IncrementalDocument::new(source.to_string())?;
1328
1329 doc.set_cache_max_size(4);
1331
1332 let use_statements_count = doc
1334 .subtree_cache
1335 .by_content
1336 .values()
1337 .filter(|node| matches!(node.kind, NodeKind::Use { .. }))
1338 .count();
1339 assert!(
1340 use_statements_count >= 1,
1341 "Use statements should be preserved for completion metadata"
1342 );
1343
1344 let function_preserved = doc
1346 .subtree_cache
1347 .by_content
1348 .values()
1349 .any(|node| matches!(node.kind, NodeKind::Subroutine { .. }));
1350 assert!(function_preserved, "Function definitions should be preserved for completion");
1351
1352 Ok(())
1353 }
1354
1355 #[test]
1356 fn test_code_lens_reference_preservation() -> ParseResult<()> {
1357 let source = r#"
1358 package MyClass;
1359
1360 sub new {
1361 my $class = shift;
1362 return bless {}, $class;
1363 }
1364
1365 sub process_data {
1366 my ($self, $data) = @_;
1367 return $self->transform($data);
1368 }
1369 "#;
1370 let mut doc = IncrementalDocument::new(source.to_string())?;
1371
1372 doc.set_cache_max_size(3);
1374
1375 let critical_nodes = doc
1377 .subtree_cache
1378 .by_content
1379 .values()
1380 .filter(|node| {
1381 matches!(node.kind, NodeKind::Package { .. } | NodeKind::Subroutine { .. })
1382 })
1383 .count();
1384 assert!(critical_nodes >= 2, "Should preserve package and key subroutines for code lens");
1385
1386 Ok(())
1387 }
1388}