use crate::{
ast::{Node, NodeKind},
error::ParseResult,
incremental_edit::{IncrementalEdit, IncrementalEditSet},
parser::Parser,
};
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
use std::time::Instant;
use tracing::debug;
#[derive(Debug, Clone)]
pub struct IncrementalDocument {
pub root: Arc<Node>,
pub source: String,
pub version: u64,
pub subtree_cache: SubtreeCache,
pub metrics: ParseMetrics,
}
#[derive(Debug, Clone, Default)]
pub struct SubtreeCache {
pub by_content: HashMap<u64, Arc<Node>>,
pub by_range: HashMap<(usize, usize), Arc<Node>>,
pub lru: VecDeque<u64>,
pub critical_symbols: HashMap<u64, SymbolPriority>,
pub max_size: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SymbolPriority {
Low = 0,
Medium = 1,
High = 2,
Critical = 3,
}
#[derive(Debug, Clone, Default)]
pub struct ParseMetrics {
pub last_parse_time_ms: f64,
pub nodes_reused: usize,
pub nodes_reparsed: usize,
pub cache_hits: usize,
pub cache_misses: usize,
}
impl IncrementalDocument {
pub fn new(source: String) -> ParseResult<Self> {
let start = Instant::now();
let mut parser = Parser::new(&source);
let root = parser.parse()?;
let mut doc = IncrementalDocument {
root: Arc::new(root),
source,
version: 0,
subtree_cache: SubtreeCache::new(1000),
metrics: ParseMetrics::default(),
};
doc.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
doc.cache_subtrees();
Ok(doc)
}
pub fn apply_edit(&mut self, edit: IncrementalEdit) -> ParseResult<()> {
let start = Instant::now();
self.version += 1;
self.metrics = ParseMetrics::default();
let new_source = self.apply_edit_to_source(&edit);
let affected_range = (edit.start_byte, edit.old_end_byte);
let reusable_subtrees = self.find_reusable_subtrees(affected_range, &edit);
let (new_root, used_fast_path) =
self.incremental_parse(&new_source, &edit, reusable_subtrees)?;
self.source = new_source;
self.root = Arc::new(new_root);
if !used_fast_path {
self.cache_subtrees();
}
self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
Ok(())
}
pub fn apply_edits(&mut self, edits: &IncrementalEditSet) -> ParseResult<()> {
let start = Instant::now();
self.version += 1;
self.metrics = ParseMetrics::default();
let mut sorted_edits = edits.edits.clone();
sorted_edits.sort_by(|a, b| b.start_byte.cmp(&a.start_byte));
let mut new_source = self.source.clone();
for edit in &sorted_edits {
new_source = self.apply_edit_to_string(&new_source, edit);
}
let affected_ranges: Vec<_> =
sorted_edits.iter().map(|e| (e.start_byte, e.old_end_byte)).collect();
let reusable = self.find_reusable_for_ranges(&affected_ranges);
let new_root = if !reusable.is_empty() {
self.parse_with_reuse(&new_source, reusable)?
} else {
let mut parser = Parser::new(&new_source);
parser.parse()?
};
self.source = new_source;
self.root = Arc::new(new_root);
self.cache_subtrees();
self.metrics.last_parse_time_ms = start.elapsed().as_secs_f64() * 1000.0;
Ok(())
}
fn apply_edit_to_source(&self, edit: &IncrementalEdit) -> String {
self.apply_edit_to_string(&self.source, edit)
}
fn apply_edit_to_string(&self, source: &str, edit: &IncrementalEdit) -> String {
let mut result = String::with_capacity(source.len() + edit.new_text.len());
let start = edit.start_byte.min(source.len());
let end = edit.old_end_byte.min(source.len());
if source.is_char_boundary(start) && source.is_char_boundary(end) {
result.push_str(&source[..start]);
result.push_str(&edit.new_text);
result.push_str(&source[end..]);
} else {
debug!("Invalid UTF-8 boundaries in edit: start={}, end={}", start, end);
result.push_str(source);
}
result
}
fn find_reusable_subtrees(
&mut self,
affected_range: (usize, usize),
edit: &IncrementalEdit,
) -> Vec<Arc<Node>> {
let mut reusable = Vec::new();
let delta = edit.byte_shift();
for ((start, end), node) in &self.subtree_cache.by_range {
if *end <= affected_range.0 {
reusable.push(node.clone());
self.metrics.cache_hits += 1;
self.metrics.nodes_reused += self.count_nodes(node);
} else if *start >= affected_range.1 {
if let Some(adjusted) = self.adjust_node_position(node, delta) {
reusable.push(Arc::new(adjusted));
self.metrics.cache_hits += 1;
self.metrics.nodes_reused += self.count_nodes(node);
}
} else {
self.metrics.cache_misses += 1;
}
}
reusable
}
fn find_reusable_for_ranges(&mut self, ranges: &[(usize, usize)]) -> Vec<Arc<Node>> {
let mut reusable = Vec::new();
for ((start, end), node) in &self.subtree_cache.by_range {
let affected = ranges.iter().any(|(r_start, r_end)| {
*start < *r_end && *end > *r_start
});
if !affected {
reusable.push(node.clone());
self.metrics.cache_hits += 1;
self.metrics.nodes_reused += self.count_nodes(node);
} else {
self.metrics.cache_misses += 1;
}
}
reusable
}
fn incremental_parse(
&mut self,
source: &str,
edit: &IncrementalEdit,
_reusable: Vec<Arc<Node>>,
) -> ParseResult<(Node, bool)> {
if self.is_single_token_edit(edit) {
if let Some(node) = self.fast_token_update(source, edit) {
self.metrics.nodes_reparsed = 1;
return Ok((node, true));
}
}
let node = self.parse_with_reuse(source, _reusable)?;
Ok((node, false))
}
fn is_single_token_edit(&self, edit: &IncrementalEdit) -> bool {
if edit.old_end_byte - edit.start_byte > 100 {
return false; }
if let Some(node) = self.find_node_at_position(edit.start_byte) {
matches!(
node.kind,
NodeKind::Number { .. } | NodeKind::String { .. } | NodeKind::Identifier { .. }
)
} else {
false
}
}
fn fast_token_update(&self, source: &str, edit: &IncrementalEdit) -> Option<Node> {
let mut new_root = (*self.root).clone();
if self.update_token_in_tree(&mut new_root, source, edit) { Some(new_root) } else { None }
}
fn update_token_in_tree(&self, node: &mut Node, source: &str, edit: &IncrementalEdit) -> bool {
if node.location.start <= edit.start_byte && node.location.end >= edit.old_end_byte {
match &mut node.kind {
NodeKind::Number { .. } => {
let delta = edit.byte_shift();
let new_end = (node.location.end as isize + delta).max(0) as usize;
if new_end <= source.len()
&& source.is_char_boundary(node.location.start)
&& source.is_char_boundary(new_end)
{
node.location.end = new_end;
let new_text = &source[node.location.start..node.location.end];
if let Ok(value) = new_text.parse::<f64>() {
node.kind = NodeKind::Number { value: value.to_string() };
return true;
}
}
}
NodeKind::String { value, .. } => {
let delta = edit.byte_shift();
let new_end = (node.location.end as isize + delta).max(0) as usize;
if new_end <= source.len()
&& source.is_char_boundary(node.location.start)
&& source.is_char_boundary(new_end)
{
node.location.end = new_end;
let new_text = &source[node.location.start..node.location.end];
*value = new_text.to_string();
return true;
}
}
NodeKind::Identifier { name } => {
let delta = edit.byte_shift();
let new_end = (node.location.end as isize + delta).max(0) as usize;
if new_end <= source.len()
&& source.is_char_boundary(node.location.start)
&& source.is_char_boundary(new_end)
{
node.location.end = new_end;
*name = source[node.location.start..node.location.end].to_string();
return true;
}
}
_ => {
return self.update_token_in_children(node, source, edit);
}
}
}
false
}
fn update_token_in_children(
&self,
node: &mut Node,
source: &str,
edit: &IncrementalEdit,
) -> bool {
match &mut node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
if self.update_token_in_tree(stmt, source, edit) {
return true;
}
}
}
NodeKind::Binary { left, right, .. }
| NodeKind::Assignment { lhs: left, rhs: right, .. } => {
if self.update_token_in_tree(left, source, edit) {
return true;
}
if self.update_token_in_tree(right, source, edit) {
return true;
}
}
NodeKind::Subroutine { body, .. } => {
if self.update_token_in_tree(body, source, edit) {
return true;
}
}
NodeKind::ExpressionStatement { expression } => {
if self.update_token_in_tree(expression, source, edit) {
return true;
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
if self.update_token_in_tree(variable, source, edit) {
return true;
}
if let Some(init) = initializer {
if self.update_token_in_tree(init, source, edit) {
return true;
}
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
if self.update_token_in_tree(condition, source, edit) {
return true;
}
if self.update_token_in_tree(then_branch, source, edit) {
return true;
}
for (cond, branch) in elsif_branches {
if self.update_token_in_tree(cond, source, edit) {
return true;
}
if self.update_token_in_tree(branch, source, edit) {
return true;
}
}
if let Some(else_b) = else_branch {
if self.update_token_in_tree(else_b, source, edit) {
return true;
}
}
}
NodeKind::While { condition, body, .. } => {
if self.update_token_in_tree(condition, source, edit) {
return true;
}
if self.update_token_in_tree(body, source, edit) {
return true;
}
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
if self.update_token_in_tree(arg, source, edit) {
return true;
}
}
}
NodeKind::Unary { operand, .. } => {
if self.update_token_in_tree(operand, source, edit) {
return true;
}
}
_ => {}
}
false
}
fn parse_with_reuse(&mut self, source: &str, reusable: Vec<Arc<Node>>) -> ParseResult<Node> {
let mut parser = Parser::new(source);
let mut root = parser.parse()?;
for node in reusable {
self.insert_reusable(&mut root, &node);
}
self.metrics.nodes_reparsed =
self.count_nodes(&root).saturating_sub(self.metrics.nodes_reused);
Ok(root)
}
fn insert_reusable(&self, target: &mut Node, reusable: &Arc<Node>) -> bool {
if target.location.start == reusable.location.start
&& target.location.end == reusable.location.end
&& std::mem::discriminant(&target.kind) == std::mem::discriminant(&reusable.kind)
{
*target = (**reusable).clone();
return true;
}
match &mut target.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
if self.insert_reusable(stmt, reusable) {
return true;
}
}
}
NodeKind::Binary { left, right, .. } => {
if self.insert_reusable(left, reusable) {
return true;
}
if self.insert_reusable(right, reusable) {
return true;
}
}
NodeKind::Subroutine { body, .. } => {
if self.insert_reusable(body, reusable) {
return true;
}
}
NodeKind::ExpressionStatement { expression } => {
if self.insert_reusable(expression, reusable) {
return true;
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
if self.insert_reusable(condition, reusable) {
return true;
}
if self.insert_reusable(then_branch, reusable) {
return true;
}
for (cond, branch) in elsif_branches {
if self.insert_reusable(cond, reusable) {
return true;
}
if self.insert_reusable(branch, reusable) {
return true;
}
}
if let Some(else_b) = else_branch {
if self.insert_reusable(else_b, reusable) {
return true;
}
}
}
NodeKind::While { condition, body, .. } => {
if self.insert_reusable(condition, reusable) {
return true;
}
if self.insert_reusable(body, reusable) {
return true;
}
}
NodeKind::For { init, condition, update, body, .. } => {
if let Some(i) = init {
if self.insert_reusable(i, reusable) {
return true;
}
}
if let Some(c) = condition {
if self.insert_reusable(c, reusable) {
return true;
}
}
if let Some(u) = update {
if self.insert_reusable(u, reusable) {
return true;
}
}
if self.insert_reusable(body, reusable) {
return true;
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
if self.insert_reusable(variable, reusable) {
return true;
}
if let Some(init) = initializer {
if self.insert_reusable(init, reusable) {
return true;
}
}
}
NodeKind::Assignment { lhs, rhs, .. } => {
if self.insert_reusable(lhs, reusable) {
return true;
}
if self.insert_reusable(rhs, reusable) {
return true;
}
}
_ => {}
}
false
}
fn adjust_node_position(&self, node: &Node, delta: isize) -> Option<Node> {
let mut adjusted = node.clone();
let new_start = adjusted.location.start as isize + delta;
let new_end = adjusted.location.end as isize + delta;
if new_start < 0 || new_end < 0 {
return None;
}
adjusted.location.start = new_start as usize;
adjusted.location.end = new_end as usize;
match &mut adjusted.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
*stmt = self.adjust_node_position(stmt, delta)?;
}
}
NodeKind::Binary { left, right, .. } => {
**left = self.adjust_node_position(left, delta)?;
**right = self.adjust_node_position(right, delta)?;
}
NodeKind::Subroutine { body, .. } => {
**body = self.adjust_node_position(body, delta)?;
}
NodeKind::ExpressionStatement { expression } => {
**expression = self.adjust_node_position(expression, delta)?;
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
**condition = self.adjust_node_position(condition, delta)?;
**then_branch = self.adjust_node_position(then_branch, delta)?;
for (cond, branch) in elsif_branches {
**cond = self.adjust_node_position(cond, delta)?;
**branch = self.adjust_node_position(branch, delta)?;
}
if let Some(else_b) = else_branch {
**else_b = self.adjust_node_position(else_b, delta)?;
}
}
NodeKind::While { condition, body, .. } => {
**condition = self.adjust_node_position(condition, delta)?;
**body = self.adjust_node_position(body, delta)?;
}
NodeKind::For { init, condition, update, body, .. } => {
if let Some(i) = init {
**i = self.adjust_node_position(i, delta)?;
}
if let Some(c) = condition {
**c = self.adjust_node_position(c, delta)?;
}
if let Some(u) = update {
**u = self.adjust_node_position(u, delta)?;
}
**body = self.adjust_node_position(body, delta)?;
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
**variable = self.adjust_node_position(variable, delta)?;
if let Some(init) = initializer {
**init = self.adjust_node_position(init, delta)?;
}
}
NodeKind::Assignment { lhs, rhs, .. } => {
**lhs = self.adjust_node_position(lhs, delta)?;
**rhs = self.adjust_node_position(rhs, delta)?;
}
_ => {}
}
Some(adjusted)
}
fn find_node_at_position(&self, pos: usize) -> Option<&Node> {
self.find_in_node(&self.root, pos)
}
fn find_in_node<'a>(&self, node: &'a Node, pos: usize) -> Option<&'a Node> {
if node.location.start <= pos && node.location.end > pos {
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
if let Some(found) = self.find_in_node(stmt, pos) {
return Some(found);
}
}
}
NodeKind::Binary { left, right, .. }
| NodeKind::Assignment { lhs: left, rhs: right, .. } => {
if let Some(found) = self.find_in_node(left, pos) {
return Some(found);
}
if let Some(found) = self.find_in_node(right, pos) {
return Some(found);
}
}
NodeKind::Subroutine { body, .. } => {
if let Some(found) = self.find_in_node(body, pos) {
return Some(found);
}
}
NodeKind::ExpressionStatement { expression } => {
if let Some(found) = self.find_in_node(expression, pos) {
return Some(found);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
if let Some(found) = self.find_in_node(variable, pos) {
return Some(found);
}
if let Some(init) = initializer {
if let Some(found) = self.find_in_node(init, pos) {
return Some(found);
}
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
if let Some(found) = self.find_in_node(condition, pos) {
return Some(found);
}
if let Some(found) = self.find_in_node(then_branch, pos) {
return Some(found);
}
for (cond, branch) in elsif_branches {
if let Some(found) = self.find_in_node(cond, pos) {
return Some(found);
}
if let Some(found) = self.find_in_node(branch, pos) {
return Some(found);
}
}
if let Some(else_b) = else_branch {
if let Some(found) = self.find_in_node(else_b, pos) {
return Some(found);
}
}
}
NodeKind::While { condition, body, .. } => {
if let Some(found) = self.find_in_node(condition, pos) {
return Some(found);
}
if let Some(found) = self.find_in_node(body, pos) {
return Some(found);
}
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
if let Some(found) = self.find_in_node(arg, pos) {
return Some(found);
}
}
}
NodeKind::Unary { operand, .. } => {
if let Some(found) = self.find_in_node(operand, pos) {
return Some(found);
}
}
_ => {}
}
Some(node)
} else {
None
}
}
fn cache_subtrees(&mut self) {
self.subtree_cache.clear();
let root = self.root.clone();
self.cache_node(&root);
}
fn cache_node(&mut self, node: &Node) {
let range = (node.location.start, node.location.end);
self.subtree_cache.by_range.insert(range, Arc::new(node.clone()));
let hash = self.hash_node(node);
let priority = self.get_symbol_priority(node);
self.subtree_cache.by_content.insert(hash, Arc::new(node.clone()));
self.subtree_cache.critical_symbols.insert(hash, priority);
self.subtree_cache.lru.push_back(hash);
self.subtree_cache.evict_if_needed();
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
self.cache_node(stmt);
}
}
NodeKind::Binary { left, right, .. } => {
self.cache_node(left);
self.cache_node(right);
}
NodeKind::Subroutine { body, .. } => {
self.cache_node(body);
}
NodeKind::ExpressionStatement { expression } => {
self.cache_node(expression);
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
self.cache_node(condition);
self.cache_node(then_branch);
for (cond, branch) in elsif_branches {
self.cache_node(cond);
self.cache_node(branch);
}
if let Some(else_b) = else_branch {
self.cache_node(else_b);
}
}
NodeKind::While { condition, body, .. } => {
self.cache_node(condition);
self.cache_node(body);
}
NodeKind::For { init, condition, update, body, .. } => {
if let Some(i) = init {
self.cache_node(i);
}
if let Some(c) = condition {
self.cache_node(c);
}
if let Some(u) = update {
self.cache_node(u);
}
self.cache_node(body);
}
NodeKind::Foreach { variable, list, body, continue_block } => {
self.cache_node(variable);
self.cache_node(list);
self.cache_node(body);
if let Some(cb) = continue_block {
self.cache_node(cb);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
self.cache_node(variable);
if let Some(init) = initializer {
self.cache_node(init);
}
}
NodeKind::VariableListDeclaration { variables, initializer, .. } => {
for var in variables {
self.cache_node(var);
}
if let Some(init) = initializer {
self.cache_node(init);
}
}
NodeKind::Assignment { lhs, rhs, .. } => {
self.cache_node(lhs);
self.cache_node(rhs);
}
_ => {}
}
}
fn hash_node(&self, node: &Node) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
std::mem::discriminant(&node.kind).hash(&mut hasher);
match &node.kind {
NodeKind::Number { value } => value.hash(&mut hasher),
NodeKind::String { value, .. } => value.hash(&mut hasher),
NodeKind::Identifier { name } => name.hash(&mut hasher),
_ => {}
}
hasher.finish()
}
fn count_nodes(&self, node: &Node) -> usize {
let mut count = 1;
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
count += self.count_nodes(stmt);
}
}
NodeKind::Binary { left, right, .. } => {
count += self.count_nodes(left);
count += self.count_nodes(right);
}
NodeKind::Subroutine { body, .. } => {
count += self.count_nodes(body);
}
NodeKind::ExpressionStatement { expression } => {
count += self.count_nodes(expression);
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
count += self.count_nodes(condition);
count += self.count_nodes(then_branch);
for (cond, branch) in elsif_branches {
count += self.count_nodes(cond);
count += self.count_nodes(branch);
}
if let Some(else_b) = else_branch {
count += self.count_nodes(else_b);
}
}
NodeKind::While { condition, body, .. } => {
count += self.count_nodes(condition);
count += self.count_nodes(body);
}
NodeKind::For { init, condition, update, body, .. } => {
if let Some(i) = init {
count += self.count_nodes(i);
}
if let Some(c) = condition {
count += self.count_nodes(c);
}
if let Some(u) = update {
count += self.count_nodes(u);
}
count += self.count_nodes(body);
}
NodeKind::Foreach { variable, list, body, continue_block } => {
count += self.count_nodes(variable);
count += self.count_nodes(list);
count += self.count_nodes(body);
if let Some(cb) = continue_block {
count += self.count_nodes(cb);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
count += self.count_nodes(variable);
if let Some(init) = initializer {
count += self.count_nodes(init);
}
}
NodeKind::VariableListDeclaration { variables, initializer, .. } => {
for var in variables {
count += self.count_nodes(var);
}
if let Some(init) = initializer {
count += self.count_nodes(init);
}
}
NodeKind::Assignment { lhs, rhs, .. } => {
count += self.count_nodes(lhs);
count += self.count_nodes(rhs);
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
count += self.count_nodes(arg);
}
}
NodeKind::Unary { operand, .. } => {
count += self.count_nodes(operand);
}
_ => {}
}
count
}
fn get_symbol_priority(&self, node: &Node) -> SymbolPriority {
match &node.kind {
NodeKind::Package { .. } => SymbolPriority::Critical,
NodeKind::Use { .. } | NodeKind::No { .. } => SymbolPriority::Critical,
NodeKind::Subroutine { .. } => SymbolPriority::Critical,
NodeKind::FunctionCall { .. } => SymbolPriority::High,
NodeKind::Variable { .. } => SymbolPriority::High,
NodeKind::VariableDeclaration { .. } => SymbolPriority::High,
NodeKind::Block { .. } => SymbolPriority::Medium,
NodeKind::If { .. } | NodeKind::While { .. } | NodeKind::For { .. } => {
SymbolPriority::Medium
}
NodeKind::Assignment { .. } => SymbolPriority::Medium,
NodeKind::Number { .. } | NodeKind::String { .. } => SymbolPriority::Low,
NodeKind::Binary { .. } | NodeKind::Unary { .. } => SymbolPriority::Low,
_ => SymbolPriority::Medium,
}
}
pub fn tree(&self) -> &Node {
&self.root
}
pub fn text(&self) -> &str {
&self.source
}
pub fn metrics(&self) -> &ParseMetrics {
&self.metrics
}
pub fn set_cache_max_size(&mut self, max_size: usize) {
self.subtree_cache.set_max_size(max_size);
}
}
impl SubtreeCache {
fn new(max_size: usize) -> Self {
SubtreeCache {
by_content: HashMap::new(),
by_range: HashMap::new(),
lru: VecDeque::new(),
critical_symbols: HashMap::new(),
max_size,
}
}
fn clear(&mut self) {
self.by_content.clear();
self.by_range.clear();
self.lru.clear();
self.critical_symbols.clear();
}
fn evict_if_needed(&mut self) {
while self.by_content.len() > self.max_size {
if let Some(hash) = self.find_least_important_entry() {
debug!(
"Evicting cache entry with hash {} (priority: {:?})",
hash,
self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low)
);
self.by_content.remove(&hash);
self.critical_symbols.remove(&hash);
self.lru.retain(|&h| h != hash);
} else {
if let Some(hash) = self.lru.pop_front() {
debug!("Fallback eviction for hash {}", hash);
self.by_content.remove(&hash);
self.critical_symbols.remove(&hash);
}
}
}
}
fn find_least_important_entry(&self) -> Option<u64> {
let mut best: Option<(u64, SymbolPriority)> = None;
for &hash in &self.lru {
let priority = self.critical_symbols.get(&hash).copied().unwrap_or(SymbolPriority::Low);
match best {
None => best = Some((hash, priority)),
Some((_, best_priority)) => {
if priority < best_priority {
best = Some((hash, priority));
}
}
}
}
best.map(|(hash, _)| hash)
}
fn set_max_size(&mut self, max_size: usize) {
self.max_size = max_size;
self.evict_if_needed();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::incremental_edit::IncrementalEdit;
#[test]
fn test_incremental_single_token_edit() -> ParseResult<()> {
let source = r#"
my $x = 42;
my $y = 100;
print $x + $y;
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
let pos = source.find("42").ok_or_else(|| crate::error::ParseError::SyntaxError {
message: "test source should contain '42'".to_string(),
location: 0,
})?;
let edit = IncrementalEdit::new(pos + 1, pos + 2, "3".to_string());
doc.apply_edit(edit)?;
assert!(doc.metrics.nodes_reused > 0);
assert!(doc.metrics.nodes_reparsed < 5);
assert!(doc.metrics.last_parse_time_ms < 1.0);
Ok(())
}
#[test]
fn test_incremental_multiple_edits() -> ParseResult<()> {
let source = r#"
sub calculate {
my $a = 10;
my $b = 20;
return $a + $b;
}
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
let mut edits = IncrementalEditSet::new();
let pos_10 = source.find("10").ok_or_else(|| crate::error::ParseError::SyntaxError {
message: "test source should contain '10'".to_string(),
location: 0,
})?;
edits.add(IncrementalEdit::new(pos_10, pos_10 + 2, "15".to_string()));
let pos_20 = source.find("20").ok_or_else(|| crate::error::ParseError::SyntaxError {
message: "test source should contain '20'".to_string(),
location: 0,
})?;
edits.add(IncrementalEdit::new(pos_20, pos_20 + 2, "25".to_string()));
doc.apply_edits(&edits)?;
let critical_count = doc
.subtree_cache
.critical_symbols
.values()
.filter(|&p| *p == SymbolPriority::Critical)
.count();
assert!(critical_count > 0, "Should preserve critical symbols during batch edits");
assert!(doc.metrics.nodes_reused > 0, "Incremental parsing should reuse nodes");
assert!(
doc.metrics.last_parse_time_ms < 10.0,
"Incremental parse time was {:.2}ms, which exceeds 10.0ms threshold",
doc.metrics.last_parse_time_ms
);
Ok(())
}
#[test]
fn test_cache_eviction() -> ParseResult<()> {
let source = "my $x = 1;";
let doc = IncrementalDocument::new(source.to_string())?;
assert!(!doc.subtree_cache.by_range.is_empty());
assert!(!doc.subtree_cache.by_content.is_empty());
Ok(())
}
#[test]
fn test_symbol_priority_classification() -> ParseResult<()> {
let source = r#"
package TestPkg;
use strict;
sub test_func {
my $var = 42;
if ($var > 0) {
return $var + 1;
}
}
"#;
let doc = IncrementalDocument::new(source.to_string())?;
let priorities: std::collections::HashSet<_> =
doc.subtree_cache.critical_symbols.values().cloned().collect();
assert!(
priorities.contains(&SymbolPriority::Critical),
"Should classify package/use/sub as critical"
);
assert!(
priorities.contains(&SymbolPriority::High),
"Should classify variables as high priority"
);
assert!(
priorities.contains(&SymbolPriority::Low)
|| priorities.contains(&SymbolPriority::Medium),
"Should have lower priority symbols"
);
Ok(())
}
#[test]
fn test_cache_respects_max_size() -> ParseResult<()> {
let source = "my $x = 1; my $y = 2; my $z = 3;";
let mut doc = IncrementalDocument::new(source.to_string())?;
assert!(doc.subtree_cache.by_content.len() > 1);
doc.set_cache_max_size(1);
assert!(doc.subtree_cache.by_content.len() <= 1);
let pos = source.find('1').ok_or_else(|| crate::error::ParseError::SyntaxError {
message: "test source should contain '1'".to_string(),
location: 0,
})?;
let edit = IncrementalEdit::new(pos, pos + 1, "10".to_string());
doc.apply_edit(edit)?;
assert!(doc.subtree_cache.by_content.len() <= 1);
Ok(())
}
#[test]
fn test_cache_priority_preservation() -> ParseResult<()> {
let source = r#"
package MyPackage;
use strict;
use warnings;
sub process {
my $x = 42;
my $y = "hello";
return $x + 1;
}
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
let initial_cache_size = doc.subtree_cache.by_content.len();
assert!(initial_cache_size > 3, "Should have multiple cached nodes");
doc.set_cache_max_size(3);
assert!(doc.subtree_cache.by_content.len() <= 3);
let has_critical_symbols = doc
.subtree_cache
.critical_symbols
.values()
.cloned()
.any(|p| p == SymbolPriority::Critical);
assert!(has_critical_symbols, "Should preserve critical symbols like package/use/sub");
let pos = source.find("42").ok_or_else(|| crate::error::ParseError::SyntaxError {
message: "test source should contain '42'".to_string(),
location: 0,
})?;
let edit = IncrementalEdit::new(pos, pos + 2, "100".to_string());
doc.apply_edit(edit)?;
assert!(doc.subtree_cache.by_content.len() <= 3);
let has_critical_after_edit = doc
.subtree_cache
.critical_symbols
.values()
.cloned()
.any(|p| p == SymbolPriority::Critical);
assert!(has_critical_after_edit, "Should preserve critical symbols after edit");
Ok(())
}
#[test]
fn test_workspace_symbol_cache_preservation() -> ParseResult<()> {
let source = r#"
package TestModule;
sub exported_function { }
sub internal_helper { }
my $global_var = "test";
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
doc.set_cache_max_size(2);
let package_preserved = doc
.subtree_cache
.by_content
.values()
.any(|node| matches!(node.kind, NodeKind::Package { .. }));
assert!(package_preserved, "Package declaration should be preserved for workspace symbols");
Ok(())
}
#[test]
fn test_completion_metadata_preservation() -> ParseResult<()> {
let source = r#"
use Data::Dumper;
use List::Util qw(first max);
sub calculate {
my ($input, $multiplier) = @_;
return $input * $multiplier;
}
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
doc.set_cache_max_size(4);
let use_statements_count = doc
.subtree_cache
.by_content
.values()
.filter(|node| matches!(node.kind, NodeKind::Use { .. }))
.count();
assert!(
use_statements_count >= 1,
"Use statements should be preserved for completion metadata"
);
let function_preserved = doc
.subtree_cache
.by_content
.values()
.any(|node| matches!(node.kind, NodeKind::Subroutine { .. }));
assert!(function_preserved, "Function definitions should be preserved for completion");
Ok(())
}
#[test]
fn test_code_lens_reference_preservation() -> ParseResult<()> {
let source = r#"
package MyClass;
sub new {
my $class = shift;
return bless {}, $class;
}
sub process_data {
my ($self, $data) = @_;
return $self->transform($data);
}
"#;
let mut doc = IncrementalDocument::new(source.to_string())?;
doc.set_cache_max_size(3);
let critical_nodes = doc
.subtree_cache
.by_content
.values()
.filter(|node| {
matches!(node.kind, NodeKind::Package { .. } | NodeKind::Subroutine { .. })
})
.count();
assert!(critical_nodes >= 2, "Should preserve package and key subroutines for code lens");
Ok(())
}
}