use crate::{
ast::{Node, NodeKind, SourceLocation},
edit::{Edit, EditSet},
error::ParseResult,
incremental_advanced_reuse::{AdvancedReuseAnalyzer, ReuseAnalysisResult, ReuseConfig},
parser::Parser,
position::Range,
};
use std::collections::HashMap;
#[derive(Debug, Clone, Default)]
pub struct IncrementalMetrics {
pub parse_time_micros: u128,
pub nodes_reused: usize,
pub nodes_reparsed: usize,
pub cache_hit_ratio: f64,
pub edit_count: usize,
}
impl IncrementalMetrics {
pub fn new() -> Self {
Self::default()
}
pub fn efficiency_percentage(&self) -> f64 {
if self.nodes_reused + self.nodes_reparsed == 0 {
return 0.0;
}
self.nodes_reused as f64 / (self.nodes_reused + self.nodes_reparsed) as f64 * 100.0
}
pub fn is_sub_millisecond(&self) -> bool {
self.parse_time_micros < 1000
}
pub fn performance_category(&self) -> &'static str {
match self.parse_time_micros {
0..=100 => "Excellent (<100µs)",
101..=500 => "Very Good (<500µs)",
501..=1000 => "Good (<1ms)",
1001..=5000 => "Acceptable (<5ms)",
_ => "Needs Optimization (>5ms)",
}
}
}
#[derive(Debug, Clone)]
pub struct IncrementalTree {
pub root: Node,
pub source: String,
node_map: HashMap<usize, Vec<Node>>,
}
impl IncrementalTree {
pub fn new(root: Node, source: String) -> Self {
let mut tree = IncrementalTree { root, source, node_map: HashMap::new() };
tree.build_node_map();
tree
}
fn build_node_map(&mut self) {
self.node_map.clear();
self.map_node(&self.root.clone());
}
fn map_node(&mut self, node: &Node) {
self.node_map.entry(node.location.start).or_default().push(node.clone());
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
self.map_node(stmt);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
self.map_node(variable);
if let Some(init) = initializer {
self.map_node(init);
}
}
NodeKind::Binary { left, right, .. } => {
self.map_node(left);
self.map_node(right);
}
NodeKind::Unary { operand, .. } => {
self.map_node(operand);
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
self.map_node(arg);
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
self.map_node(condition);
self.map_node(then_branch);
for (cond, branch) in elsif_branches {
self.map_node(cond);
self.map_node(branch);
}
if let Some(branch) = else_branch {
self.map_node(branch);
}
}
_ => {}
}
}
pub fn find_containing_node(&self, start: usize, end: usize) -> Option<&Node> {
let mut smallest: Option<&Node> = None;
let mut smallest_size = usize::MAX;
for nodes in self.node_map.values() {
for node in nodes {
if node.location.start <= start && node.location.end >= end {
let size = node.location.end - node.location.start;
if size < smallest_size {
smallest = Some(node);
smallest_size = size;
}
}
}
}
smallest
}
}
pub struct IncrementalParserV2 {
last_tree: Option<IncrementalTree>,
pending_edits: EditSet,
pub reused_nodes: usize,
pub reparsed_nodes: usize,
pub metrics: IncrementalMetrics,
reuse_analyzer: AdvancedReuseAnalyzer,
reuse_config: ReuseConfig,
pub last_reuse_analysis: Option<ReuseAnalysisResult>,
}
impl IncrementalParserV2 {
pub fn new() -> Self {
IncrementalParserV2 {
last_tree: None,
pending_edits: EditSet::new(),
reused_nodes: 0,
reparsed_nodes: 0,
metrics: IncrementalMetrics::new(),
reuse_analyzer: AdvancedReuseAnalyzer::new(),
reuse_config: ReuseConfig::default(),
last_reuse_analysis: None,
}
}
pub fn with_reuse_config(config: ReuseConfig) -> Self {
IncrementalParserV2 {
last_tree: None,
pending_edits: EditSet::new(),
reused_nodes: 0,
reparsed_nodes: 0,
metrics: IncrementalMetrics::new(),
reuse_analyzer: AdvancedReuseAnalyzer::with_config(config.clone()),
reuse_config: config,
last_reuse_analysis: None,
}
}
pub fn edit(&mut self, edit: Edit) {
self.pending_edits.add(edit);
}
pub fn parse(&mut self, source: &str) -> ParseResult<Node> {
self.reused_nodes = 0;
self.reparsed_nodes = 0;
if let Some(ref last_tree) = self.last_tree {
if !self.pending_edits.is_empty() {
let last_tree_clone = last_tree.clone();
if let Some(new_tree) = self.try_incremental_parse(source, &last_tree_clone) {
self.last_tree =
Some(IncrementalTree::new(new_tree.clone(), source.to_string()));
self.pending_edits = EditSet::new();
return Ok(new_tree);
}
}
}
self.full_parse(source)
}
fn full_parse(&mut self, source: &str) -> ParseResult<Node> {
let mut parser = Parser::new(source);
let root = parser.parse()?;
if self.last_tree.is_none() {
self.reused_nodes = 0;
self.reparsed_nodes = self.count_nodes(&root);
} else {
let should_skip_reuse = source.is_empty()
|| self.pending_edits.len() > 10
|| self.last_tree.as_ref().is_none_or(|tree| !self.is_simple_value_edit(tree));
if should_skip_reuse {
self.reused_nodes = 0;
self.reparsed_nodes = self.count_nodes(&root);
} else if let Some(ref old_tree) = self.last_tree {
let (reused, reparsed) = self.analyze_reuse(&old_tree.root, &root);
self.reused_nodes = reused;
self.reparsed_nodes = reparsed;
} else {
self.reused_nodes = 0;
self.reparsed_nodes = self.count_nodes(&root);
}
}
self.last_tree = Some(IncrementalTree::new(root.clone(), source.to_string()));
self.pending_edits = EditSet::new();
Ok(root)
}
fn try_incremental_parse(&mut self, source: &str, last_tree: &IncrementalTree) -> Option<Node> {
if let Some(advanced_result) = self.try_advanced_reuse_parse(source, last_tree) {
return Some(advanced_result);
}
let is_simple = self.is_simple_value_edit(last_tree);
if is_simple {
return self.incremental_parse_simple(source, last_tree);
}
if self.is_whitespace_or_comment_edit(last_tree) {
return self.incremental_parse_whitespace(source, last_tree);
}
None
}
fn try_advanced_reuse_parse(
&mut self,
source: &str,
last_tree: &IncrementalTree,
) -> Option<Node> {
let mut parser = Parser::new(source);
let new_tree = match parser.parse() {
Ok(tree) => tree,
Err(_) => {
return None;
}
};
let analysis_result = self.reuse_analyzer.analyze_reuse_opportunities(
&last_tree.root,
&new_tree,
&self.pending_edits,
&self.reuse_config,
);
self.last_reuse_analysis = Some(analysis_result);
if let Some(ref analysis) = self.last_reuse_analysis {
if analysis.meets_efficiency_target(self.reuse_config.min_confidence * 100.0) {
self.reused_nodes = analysis.reused_nodes;
self.reparsed_nodes = analysis.total_new_nodes - analysis.reused_nodes;
return Some(new_tree);
}
}
None
}
fn is_simple_value_edit(&self, tree: &IncrementalTree) -> bool {
if self.pending_edits.len() > 10 {
return false;
}
let mut cumulative_shift: isize = 0;
for edit in self.pending_edits.edits() {
let original_start = (edit.start_byte as isize - cumulative_shift) as usize;
let original_end = (edit.old_end_byte as isize - cumulative_shift) as usize;
let affected_node = tree.find_containing_node(original_start, original_end);
match affected_node {
Some(node) => {
match &node.kind {
NodeKind::Number { .. } | NodeKind::String { .. } => {
if original_start >= node.location.start
&& original_end <= node.location.end
{
cumulative_shift += edit.byte_shift();
continue;
} else {
return false;
}
}
NodeKind::Variable { .. } => {
if original_start >= node.location.start
&& original_end <= node.location.end
{
cumulative_shift += edit.byte_shift();
continue;
} else {
return false;
}
}
NodeKind::Identifier { .. } => {
if original_start >= node.location.start
&& original_end <= node.location.end
{
cumulative_shift += edit.byte_shift();
continue;
} else {
return false;
}
}
_ => {
return false; }
}
}
None => {
return false; }
}
}
true
}
fn is_whitespace_or_comment_edit(&self, tree: &IncrementalTree) -> bool {
for edit in self.pending_edits.edits() {
let start = edit.start_byte;
let end = edit.old_end_byte;
if !self.is_in_non_structural_content(tree, start, end) {
return false;
}
}
true
}
fn is_in_non_structural_content(
&self,
tree: &IncrementalTree,
start: usize,
end: usize,
) -> bool {
use perl_lexer::{PerlLexer, TokenType};
if start >= end || end > tree.source.len() {
return false;
}
let affected_text = &tree.source[start..end];
let mut lexer = PerlLexer::new(affected_text);
loop {
match lexer.next_token() {
Some(token) => {
match token.token_type {
TokenType::Whitespace | TokenType::Newline | TokenType::Comment(_) => {
}
TokenType::EOF => {
return true;
}
_ => {
return false;
}
}
}
None => {
return true;
}
}
}
}
fn incremental_parse_whitespace(
&mut self,
_source: &str,
last_tree: &IncrementalTree,
) -> Option<Node> {
let shift = self.calculate_total_shift();
Some(self.clone_with_shifted_positions(&last_tree.root, shift))
}
fn calculate_total_shift(&self) -> isize {
self.pending_edits.edits().iter().map(|edit| edit.byte_shift()).sum()
}
fn incremental_parse_simple(
&mut self,
source: &str,
last_tree: &IncrementalTree,
) -> Option<Node> {
if source.is_empty() {
return None;
}
let new_root = self.clone_and_update_node(&last_tree.root, source, &last_tree.source);
if !self.validate_incremental_result(&new_root, source) {
return None;
}
self.count_reuse_potential(&last_tree.root, &new_root);
Some(new_root)
}
fn validate_incremental_result(&self, node: &Node, source: &str) -> bool {
if source.is_empty() {
return match &node.kind {
NodeKind::Program { statements } => statements.is_empty(),
_ => false,
};
}
if node.location.start > source.len() || node.location.end > source.len() {
return false;
}
if node.location.start > node.location.end {
return false;
}
if !source.is_char_boundary(node.location.start)
|| !source.is_char_boundary(node.location.end)
{
return false;
}
if node.location.start < node.location.end {
let node_text = &source[node.location.start..node.location.end];
match &node.kind {
NodeKind::Number { value } => {
if value.trim() != node_text.trim() {
return false;
}
if value.parse::<f64>().is_err() && value.parse::<i64>().is_err() {
return false;
}
}
NodeKind::String { value, .. } => {
if !node_text.is_empty()
&& !value.contains(node_text.trim_matches(|c| c == '"' || c == '\''))
{
}
}
NodeKind::Variable { name, .. } => {
if !node_text.contains(name) {
return false;
}
}
NodeKind::Identifier { name } => {
if name.trim() != node_text.trim() {
return false;
}
}
_ => {
}
}
}
self.validate_node_tree_consistency(node, source, 0, 3)
}
fn validate_node_tree_consistency(
&self,
node: &Node,
source: &str,
depth: usize,
max_depth: usize,
) -> bool {
if depth > max_depth {
return true; }
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
if stmt.location.start < node.location.start
|| stmt.location.end > node.location.end
{
return false;
}
if !self.validate_node_tree_consistency(stmt, source, depth + 1, max_depth) {
return false;
}
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
if !self.validate_node_tree_consistency(variable, source, depth + 1, max_depth) {
return false;
}
if let Some(init) = initializer {
if !self.validate_node_tree_consistency(init, source, depth + 1, max_depth) {
return false;
}
}
}
NodeKind::Binary { left, right, .. } => {
if !self.validate_node_tree_consistency(left, source, depth + 1, max_depth)
|| !self.validate_node_tree_consistency(right, source, depth + 1, max_depth)
{
return false;
}
}
_ => {
}
}
true
}
fn clone_and_update_node(&self, node: &Node, new_source: &str, old_source: &str) -> Node {
let shift = self.calculate_shift_at(node.location.start);
let affected = self.is_node_affected(node);
match &node.kind {
NodeKind::Program { statements } => {
let new_statements: Vec<Node> = statements
.iter()
.map(|stmt| self.clone_and_update_node(stmt, new_source, old_source))
.collect();
let new_start = (node.location.start as isize + shift) as usize;
let new_end = (node.location.end as isize
+ shift
+ self.calculate_content_delta(node)) as usize;
return Node::new(
NodeKind::Program { statements: new_statements },
SourceLocation { start: new_start, end: new_end },
);
}
NodeKind::VariableDeclaration { declarator, variable, initializer, attributes } => {
let new_variable = self.clone_and_update_node(variable, new_source, old_source);
let new_initializer = initializer
.as_ref()
.map(|init| self.clone_and_update_node(init, new_source, old_source));
let new_start = (node.location.start as isize + shift) as usize;
let new_end = (node.location.end as isize
+ shift
+ self.calculate_content_delta(node)) as usize;
return Node::new(
NodeKind::VariableDeclaration {
declarator: declarator.clone(),
variable: Box::new(new_variable),
initializer: new_initializer.map(Box::new),
attributes: attributes.clone(),
},
SourceLocation { start: new_start, end: new_end },
);
}
_ => {}
}
if affected {
match &node.kind {
NodeKind::Number { .. } => {
let new_start = (node.location.start as isize + shift) as usize;
let new_end =
(node.location.end as isize + shift + self.calculate_content_delta(node))
as usize;
if new_start < new_source.len() && new_end <= new_source.len() {
let new_value = &new_source[new_start..new_end];
return Node::new(
NodeKind::Number { value: new_value.to_string() },
SourceLocation { start: new_start, end: new_end },
);
}
}
NodeKind::String { interpolated, .. } => {
let new_start = (node.location.start as isize + shift) as usize;
let new_end =
(node.location.end as isize + shift + self.calculate_content_delta(node))
as usize;
if new_start < new_source.len() && new_end <= new_source.len() {
let new_value = &new_source[new_start..new_end];
return Node::new(
NodeKind::String {
value: new_value.to_string(),
interpolated: *interpolated,
},
SourceLocation { start: new_start, end: new_end },
);
}
}
NodeKind::Program { statements } => {
let new_statements = statements
.iter()
.map(|stmt| self.clone_and_update_node(stmt, new_source, old_source))
.collect();
let new_location = SourceLocation {
start: (node.location.start as isize + shift) as usize,
end: (node.location.end as isize + shift) as usize,
};
return Node::new(
NodeKind::Program { statements: new_statements },
new_location,
);
}
NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
let new_variable =
Box::new(self.clone_and_update_node(variable, new_source, old_source));
let new_initializer = initializer.as_ref().map(|init| {
Box::new(self.clone_and_update_node(init, new_source, old_source))
});
let new_location = SourceLocation {
start: (node.location.start as isize + shift) as usize,
end: (node.location.end as isize + shift) as usize,
};
return Node::new(
NodeKind::VariableDeclaration {
declarator: declarator.clone(),
variable: new_variable,
attributes: attributes.clone(),
initializer: new_initializer,
},
new_location,
);
}
_ => {}
}
}
self.clone_with_shifted_positions(node, shift)
}
fn calculate_shift_at(&self, position: usize) -> isize {
let mut shift = 0;
for edit in self.pending_edits.edits() {
let original_old_end = (edit.old_end_byte as isize - shift) as usize;
if original_old_end <= position {
let edit_shift = edit.byte_shift();
shift += edit_shift;
} else {
break;
}
}
shift
}
#[allow(dead_code)]
fn ensure_unicode_boundary(&self, source: &str, position: usize) -> usize {
if position >= source.len() {
return source.len();
}
if source.is_char_boundary(position) {
return position;
}
for i in (0..=position).rev() {
if i < source.len() && source.is_char_boundary(i) {
return i;
}
}
0
}
#[allow(dead_code)]
fn calculate_unicode_safe_position(
&self,
original_pos: usize,
shift: isize,
source: &str,
) -> usize {
let new_pos = if shift >= 0 {
original_pos.saturating_add(shift as usize)
} else {
original_pos.saturating_sub((-shift) as usize)
};
self.ensure_unicode_boundary(source, new_pos)
}
pub fn get_metrics(&self) -> &IncrementalMetrics {
&self.metrics
}
pub fn reset_metrics(&mut self) {
self.metrics = IncrementalMetrics::new();
}
pub fn get_last_reuse_analysis(&self) -> Option<&ReuseAnalysisResult> {
self.last_reuse_analysis.as_ref()
}
pub fn set_reuse_config(&mut self, config: ReuseConfig) {
self.reuse_config = config.clone();
self.reuse_analyzer = AdvancedReuseAnalyzer::with_config(config);
}
pub fn used_advanced_reuse(&self) -> bool {
self.last_reuse_analysis.as_ref().is_some_and(|analysis| analysis.reuse_percentage > 0.0)
}
pub fn get_reuse_efficiency_report(&self) -> String {
if let Some(analysis) = &self.last_reuse_analysis {
format!(
"Advanced Reuse Analysis:\n Efficiency: {:.1}%\n Nodes reused: {}\n Total nodes: {}\n {}",
analysis.reuse_percentage,
analysis.reused_nodes,
analysis.total_old_nodes,
analysis.performance_summary()
)
} else {
format!(
"Basic Incremental Analysis:\n Efficiency: {:.1}%\n Nodes reused: {}\n Nodes reparsed: {}",
self.reused_nodes as f64 / (self.reused_nodes + self.reparsed_nodes) as f64 * 100.0,
self.reused_nodes,
self.reparsed_nodes
)
}
}
fn calculate_content_delta(&self, node: &Node) -> isize {
let mut delta = 0;
let mut shift = 0;
for edit in self.pending_edits.edits() {
let start = (edit.start_byte as isize - shift) as usize;
let end = (edit.old_end_byte as isize - shift) as usize;
if start >= node.location.start && end <= node.location.end {
delta += edit.byte_shift();
}
shift += edit.byte_shift();
}
delta
}
fn is_node_affected(&self, node: &Node) -> bool {
let node_range = Range::from(node.location);
self.pending_edits.affects_range(&node_range)
}
fn clone_with_shifted_positions(&self, node: &Node, shift: isize) -> Node {
let new_start = if shift >= 0 {
node.location.start.saturating_add(shift as usize)
} else {
node.location.start.saturating_sub((-shift) as usize)
};
let new_end = if shift >= 0 {
node.location.end.saturating_add(shift as usize)
} else {
node.location.end.saturating_sub((-shift) as usize)
};
let new_location = SourceLocation { start: new_start, end: new_end };
let new_kind = match &node.kind {
NodeKind::Program { statements } => NodeKind::Program {
statements: statements
.iter()
.map(|s| self.clone_with_shifted_positions(s, shift))
.collect(),
},
NodeKind::Block { statements } => NodeKind::Block {
statements: statements
.iter()
.map(|s| self.clone_with_shifted_positions(s, shift))
.collect(),
},
NodeKind::VariableDeclaration { declarator, variable, attributes, initializer } => {
NodeKind::VariableDeclaration {
declarator: declarator.clone(),
variable: Box::new(self.clone_with_shifted_positions(variable, shift)),
attributes: attributes.clone(),
initializer: initializer
.as_ref()
.map(|i| Box::new(self.clone_with_shifted_positions(i, shift))),
}
}
NodeKind::Binary { op, left, right } => NodeKind::Binary {
op: op.clone(),
left: Box::new(self.clone_with_shifted_positions(left, shift)),
right: Box::new(self.clone_with_shifted_positions(right, shift)),
},
NodeKind::Unary { op, operand } => NodeKind::Unary {
op: op.clone(),
operand: Box::new(self.clone_with_shifted_positions(operand, shift)),
},
NodeKind::FunctionCall { name, args } => NodeKind::FunctionCall {
name: name.clone(),
args: args.iter().map(|a| self.clone_with_shifted_positions(a, shift)).collect(),
},
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => NodeKind::If {
condition: Box::new(self.clone_with_shifted_positions(condition, shift)),
then_branch: Box::new(self.clone_with_shifted_positions(then_branch, shift)),
elsif_branches: elsif_branches
.iter()
.map(|(c, b)| {
(
self.clone_with_shifted_positions(c, shift),
self.clone_with_shifted_positions(b, shift),
)
})
.map(|(c, b)| (Box::new(c), Box::new(b)))
.collect(),
else_branch: else_branch
.as_ref()
.map(|b| Box::new(self.clone_with_shifted_positions(b, shift))),
},
_ => node.kind.clone(), };
Node::new(new_kind, new_location)
}
fn count_reuse_potential(&mut self, old_root: &Node, new_root: &Node) {
let (reused, reparsed) = self.analyze_reuse(old_root, new_root);
self.reused_nodes = reused;
self.reparsed_nodes = reparsed;
}
fn analyze_reuse(&self, old_node: &Node, new_node: &Node) -> (usize, usize) {
match (&old_node.kind, &new_node.kind) {
(
NodeKind::Program { statements: old_stmts },
NodeKind::Program { statements: new_stmts },
) => {
let mut reused = 1; let mut reparsed = 0;
for (old_stmt, new_stmt) in old_stmts.iter().zip(new_stmts.iter()) {
let (r, p) = self.analyze_reuse(old_stmt, new_stmt);
reused += r;
reparsed += p;
}
(reused, reparsed)
}
(
NodeKind::VariableDeclaration { variable: old_var, initializer: old_init, .. },
NodeKind::VariableDeclaration { variable: new_var, initializer: new_init, .. },
) => {
let mut reused = 1; let mut reparsed = 0;
let (r, p) = self.analyze_reuse(old_var, new_var);
reused += r;
reparsed += p;
if let (Some(old_i), Some(new_i)) = (old_init, new_init) {
let (r, p) = self.analyze_reuse(old_i, new_i);
reused += r;
reparsed += p;
}
(reused, reparsed)
}
(NodeKind::Number { value: old_val }, NodeKind::Number { value: new_val }) => {
if old_val != new_val {
(0, 1) } else {
(1, 0) }
}
(
NodeKind::Variable { sigil: old_s, name: old_n },
NodeKind::Variable { sigil: new_s, name: new_n },
) => {
if old_s == new_s && old_n == new_n {
(1, 0) } else {
(0, 1) }
}
_ => {
if self.nodes_match(old_node, new_node) {
(1, 0)
} else {
(0, 1)
}
}
}
}
fn nodes_match(&self, node1: &Node, node2: &Node) -> bool {
match (&node1.kind, &node2.kind) {
(NodeKind::Number { value: v1 }, NodeKind::Number { value: v2 }) => v1 == v2,
(
NodeKind::String { value: v1, interpolated: i1 },
NodeKind::String { value: v2, interpolated: i2 },
) => v1 == v2 && i1 == i2,
(
NodeKind::Variable { sigil: s1, name: n1 },
NodeKind::Variable { sigil: s2, name: n2 },
) => s1 == s2 && n1 == n2,
(NodeKind::Identifier { name: n1 }, NodeKind::Identifier { name: n2 }) => n1 == n2,
(NodeKind::Binary { op: op1, .. }, NodeKind::Binary { op: op2, .. }) => op1 == op2,
(NodeKind::Unary { op: op1, .. }, NodeKind::Unary { op: op2, .. }) => op1 == op2,
(
NodeKind::FunctionCall { name: n1, args: args1 },
NodeKind::FunctionCall { name: n2, args: args2 },
) => n1 == n2 && args1.len() == args2.len(),
(
NodeKind::VariableDeclaration { declarator: d1, .. },
NodeKind::VariableDeclaration { declarator: d2, .. },
) => d1 == d2,
(NodeKind::ArrayLiteral { elements: e1 }, NodeKind::ArrayLiteral { elements: e2 }) => {
e1.len() == e2.len()
}
(NodeKind::HashLiteral { pairs: p1 }, NodeKind::HashLiteral { pairs: p2 }) => {
p1.len() == p2.len()
}
(NodeKind::Block { statements: s1 }, NodeKind::Block { statements: s2 }) => {
s1.len() == s2.len()
}
(NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) => {
s1.len() == s2.len()
}
(NodeKind::If { .. }, NodeKind::If { .. }) => true, (NodeKind::While { .. }, NodeKind::While { .. }) => true,
(NodeKind::For { .. }, NodeKind::For { .. }) => true,
(NodeKind::Foreach { .. }, NodeKind::Foreach { .. }) => true,
(NodeKind::Subroutine { name: n1, .. }, NodeKind::Subroutine { name: n2, .. }) => {
n1 == n2
}
(NodeKind::Package { name: n1, .. }, NodeKind::Package { name: n2, .. }) => n1 == n2,
(NodeKind::Use { module: m1, .. }, NodeKind::Use { module: m2, .. }) => m1 == m2,
(kind1, kind2) => std::mem::discriminant(kind1) == std::mem::discriminant(kind2),
}
}
fn count_nodes(&self, node: &Node) -> usize {
let mut count = 1;
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
count += self.count_nodes(stmt);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
count += self.count_nodes(variable);
if let Some(init) = initializer {
count += self.count_nodes(init);
}
}
NodeKind::Binary { left, right, .. } => {
count += self.count_nodes(left);
count += self.count_nodes(right);
}
NodeKind::Unary { operand, .. } => {
count += self.count_nodes(operand);
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
count += self.count_nodes(arg);
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
count += self.count_nodes(condition);
count += self.count_nodes(then_branch);
for (cond, branch) in elsif_branches {
count += self.count_nodes(cond);
count += self.count_nodes(branch);
}
if let Some(branch) = else_branch {
count += self.count_nodes(branch);
}
}
_ => {}
}
count
}
}
impl Default for IncrementalParserV2 {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::position::Position;
use std::time::Instant;
fn adaptive_perf_budget_micros(base_budget_micros: u128) -> u128 {
let thread_count = std::env::var("RUST_TEST_THREADS")
.ok()
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or_else(|| {
std::thread::available_parallelism().map_or(8, std::num::NonZeroUsize::get)
});
let mut budget = base_budget_micros;
if thread_count <= 2 {
budget = budget.saturating_mul(2);
} else if thread_count <= 4 {
budget = budget.saturating_mul(3) / 2;
}
if std::env::var("CI").is_ok() {
budget = budget.saturating_mul(3) / 2;
}
budget
}
#[test]
fn test_basic_compilation() {
let parser = IncrementalParserV2::new();
assert_eq!(parser.reused_nodes, 0);
assert_eq!(parser.reparsed_nodes, 0);
}
#[test]
fn test_performance_timing_detailed() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 42;";
let start = Instant::now();
let _tree1 = parser.parse(source1)?;
let initial_parse_time = start.elapsed();
println!("Initial parse time: {:?}", initial_parse_time);
println!("Initial nodes reparsed: {}", parser.reparsed_nodes);
parser.edit(Edit::new(
8,
10,
12, Position::new(8, 1, 9),
Position::new(10, 1, 11),
Position::new(12, 1, 13),
));
let source2 = "my $x = 4242;";
let start = Instant::now();
let _tree2 = parser.parse(source2)?;
let incremental_parse_time = start.elapsed();
println!("Incremental parse time: {:?}", incremental_parse_time);
println!(
"Incremental nodes reused: {}, reparsed: {}",
parser.reused_nodes, parser.reparsed_nodes
);
assert!(
incremental_parse_time.as_micros() < 5000,
"Incremental parse time should be <5ms, got {:?}",
incremental_parse_time
);
assert!(parser.reused_nodes >= 3, "Should reuse at least 3 nodes");
assert_eq!(parser.reparsed_nodes, 1, "Should only reparse the changed Number node");
let speedup =
initial_parse_time.as_nanos() as f64 / incremental_parse_time.as_nanos() as f64;
println!("Performance improvement: {:.2}x faster", speedup);
if speedup >= 1.5 {
println!("✅ Good speedup achieved: {:.2}x", speedup);
} else {
println!("⚠️ Limited speedup for micro-benchmark (expected for tiny examples)");
}
Ok(())
}
#[test]
fn test_incremental_value_change() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 42;";
let start = Instant::now();
let _tree1 = parser.parse(source1)?;
let initial_time = start.elapsed();
assert_eq!(parser.reparsed_nodes, 4); println!(
"Initial parse: {}µs, {} nodes parsed",
initial_time.as_micros(),
parser.reparsed_nodes
);
parser.edit(Edit::new(
8,
10,
12, Position::new(8, 1, 9),
Position::new(10, 1, 11),
Position::new(12, 1, 13),
));
let source2 = "my $x = 4242;";
let start = Instant::now();
let tree2 = parser.parse(source2)?;
let incremental_time = start.elapsed();
println!(
"Incremental parse: {}µs, reused_nodes = {}, reparsed_nodes = {}",
incremental_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
assert_eq!(parser.reused_nodes, 3); assert_eq!(parser.reparsed_nodes, 1);
assert!(incremental_time.as_micros() < 500, "Incremental update should be <500µs");
let efficiency =
parser.reused_nodes as f64 / (parser.reused_nodes + parser.reparsed_nodes) as f64;
assert!(
efficiency >= 0.75,
"Node reuse efficiency should be ≥75%, got {:.1}%",
efficiency * 100.0
);
if let NodeKind::Program { statements } = &tree2.kind {
if let NodeKind::VariableDeclaration { initializer: Some(init), .. } =
&statements[0].kind
{
if let NodeKind::Number { value } = &init.kind {
assert_eq!(value, "4242");
}
}
}
Ok(())
}
#[test]
fn test_multiple_value_changes() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 10;\nmy $y = 20;";
let start = Instant::now();
parser.parse(source1)?;
let initial_time = start.elapsed();
let initial_nodes = parser.reparsed_nodes;
println!(
"Initial parse (multi-statement): {}µs, {} nodes",
initial_time.as_micros(),
initial_nodes
);
parser.edit(Edit::new(
8,
10,
11, Position::new(8, 1, 9),
Position::new(10, 1, 11),
Position::new(11, 1, 12),
));
parser.edit(Edit::new(
21,
23,
24, Position::new(21, 2, 9),
Position::new(23, 2, 11),
Position::new(24, 2, 12),
));
let source2 = "my $x = 100;\nmy $y = 200;";
let start = Instant::now();
let tree = parser.parse(source2)?;
let incremental_time = start.elapsed();
println!(
"Multiple edits: {}µs, reused_nodes = {}, reparsed_nodes = {}",
incremental_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
assert!(
parser.reused_nodes >= 5,
"Should reuse at least 5 nodes, got {}",
parser.reused_nodes
);
assert!(
parser.reparsed_nodes >= 1,
"Should reparse at least 1 node, got {}",
parser.reparsed_nodes
);
assert!(incremental_time.as_micros() < 5000, "Multiple edits should be <5ms");
let total_nodes = parser.reused_nodes + parser.reparsed_nodes;
let reuse_ratio = parser.reused_nodes as f64 / total_nodes as f64;
assert!(
reuse_ratio >= 0.7,
"Multi-edit reuse ratio should be ≥70%, got {:.1}%",
reuse_ratio * 100.0
);
if let NodeKind::Program { statements } = &tree.kind {
if let NodeKind::VariableDeclaration { initializer: Some(init), .. } =
&statements[0].kind
{
if let NodeKind::Number { value } = &init.kind {
assert_eq!(value, "100");
}
}
if let NodeKind::VariableDeclaration { initializer: Some(init), .. } =
&statements[1].kind
{
if let NodeKind::Number { value } = &init.kind {
assert_eq!(value, "200");
}
}
}
Ok(())
}
#[test]
fn test_too_many_edits_fallback() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 1;";
parser.parse(source1)?;
for i in 0..15 {
parser.edit(Edit::new(
8 + i,
9 + i,
10 + i,
Position::new(8 + i, 1, (9 + i) as u32),
Position::new(9 + i, 1, (10 + i) as u32),
Position::new(10 + i, 1, (11 + i) as u32),
));
}
let source2 = "my $x = 123456789012345;";
let tree = parser.parse(source2)?;
assert!(parser.reparsed_nodes > 0, "Should reparse some nodes");
if let NodeKind::Program { statements } = &tree.kind {
assert_eq!(statements.len(), 1);
}
Ok(())
}
#[test]
fn test_invalid_edit_bounds() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 42;";
parser.parse(source1)?;
parser.edit(Edit::new(
8,
12, 13,
Position::new(8, 1, 9),
Position::new(12, 1, 13),
Position::new(13, 1, 14),
));
let source2 = "my $x = 123;";
let tree = parser.parse(source2)?;
assert!(parser.reparsed_nodes > 0, "Should reparse some nodes");
if let NodeKind::Program { statements } = &tree.kind {
if let NodeKind::VariableDeclaration { initializer: Some(init), .. } =
&statements[0].kind
{
if let NodeKind::Number { value } = &init.kind {
assert_eq!(value, "123");
}
}
}
Ok(())
}
#[test]
fn test_string_edit() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $name = \"hello\";";
parser.parse(source1)?;
parser.edit(Edit::new(
12,
17, 17,
Position::new(12, 1, 13),
Position::new(17, 1, 18),
Position::new(17, 1, 18),
));
let source2 = "my $name = \"world\";";
let tree = parser.parse(source2)?;
println!(
"DEBUG test_string_edit: reused_nodes = {}, reparsed_nodes = {}",
parser.reused_nodes, parser.reparsed_nodes
);
assert_eq!(parser.reused_nodes, 3); assert_eq!(parser.reparsed_nodes, 1);
if let NodeKind::Program { statements } = &tree.kind {
if let NodeKind::VariableDeclaration { initializer: Some(init), .. } =
&statements[0].kind
{
if let NodeKind::String { value, .. } = &init.kind {
assert_eq!(value, "\"world\"");
}
}
}
Ok(())
}
#[test]
fn test_empty_source_handling() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $x = 42;";
let start = Instant::now();
parser.parse(source1)?;
let initial_time = start.elapsed();
println!("Initial parse time: {}µs", initial_time.as_micros());
parser.edit(Edit::new(
8,
10,
11,
Position::new(8, 1, 9),
Position::new(10, 1, 11),
Position::new(11, 1, 12),
));
let source2 = "";
let start = Instant::now();
let result = parser.parse(source2);
let parse_time = start.elapsed();
println!("Empty source parse time: {}µs", parse_time.as_micros());
match result {
Ok(_) => {
assert_eq!(parser.reused_nodes, 0);
println!("Empty source parsing succeeded with fallback");
}
Err(_) => {
assert_eq!(parser.reused_nodes, 0);
println!("Empty source parsing failed gracefully (expected)");
}
}
assert!(parse_time.as_millis() < 100, "Empty source handling should be fast");
Ok(())
}
#[test]
fn test_complex_nested_structure_edits() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = r#"
if ($condition) {
my $nested = {
key1 => "value1",
key2 => 42,
key3 => [1, 2, 3]
};
process($nested);
}
"#;
let start = Instant::now();
parser.parse(source1)?;
let initial_time = start.elapsed();
let initial_nodes = parser.reparsed_nodes;
println!(
"Complex structure initial parse: {}µs, {} nodes",
initial_time.as_micros(),
initial_nodes
);
let value_start = source1.find("42").ok_or(crate::error::ParseError::UnexpectedEof)?;
parser.edit(Edit::new(
value_start,
value_start + 2,
value_start + 4, Position::new(value_start, 1, 1),
Position::new(value_start + 2, 1, 3),
Position::new(value_start + 4, 1, 5),
));
let source2 = source1.replace("42", "9999");
let start = Instant::now();
let _tree = parser.parse(&source2)?;
let incremental_time = start.elapsed();
println!(
"Complex nested edit: {}µs, reused={}, reparsed={}",
incremental_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
assert!(incremental_time.as_millis() < 10, "Complex nested edit should be <10ms");
if parser.reused_nodes > 0 {
println!("Successfully reused {} nodes in complex structure", parser.reused_nodes);
} else {
println!("Complex structure caused full reparse (acceptable for edge cases)");
}
Ok(())
}
#[test]
fn test_large_document_performance() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let mut large_source = String::new();
for i in 0..100 {
large_source.push_str(&format!("my $var{} = {};\n", i, i * 10));
}
let start = Instant::now();
parser.parse(&large_source)?;
let initial_time = start.elapsed();
let initial_nodes = parser.reparsed_nodes;
println!(
"Large document initial parse: {}ms, {} nodes",
initial_time.as_millis(),
initial_nodes
);
let edit_pos =
large_source.find("my $var50 = 500").ok_or(crate::error::ParseError::UnexpectedEof)?
+ 13;
parser.edit(Edit::new(
edit_pos,
edit_pos + 3, edit_pos + 3,
Position::new(edit_pos, 1, 1),
Position::new(edit_pos + 3, 1, 4),
Position::new(edit_pos + 3, 1, 4),
));
let source2 = large_source.replace("500", "999");
let start = Instant::now();
let _tree = parser.parse(&source2)?;
let incremental_time = start.elapsed();
println!(
"Large document incremental: {}ms, reused={}, reparsed={}",
incremental_time.as_millis(),
parser.reused_nodes,
parser.reparsed_nodes
);
assert!(incremental_time.as_millis() < 50, "Large document incremental should be <50ms");
if parser.reused_nodes > 0 {
let reuse_percentage = parser.reused_nodes as f64
/ (parser.reused_nodes + parser.reparsed_nodes) as f64
* 100.0;
println!("Large document reuse rate: {:.1}%", reuse_percentage);
assert!(reuse_percentage > 50.0, "Large document should reuse >50% of nodes");
}
Ok(())
}
#[test]
fn test_unicode_heavy_incremental_parsing() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "my $🌟variable = '你好世界'; # Comment with emoji 🚀\nmy $café = 'résumé';";
let start = Instant::now();
parser.parse(source1)?;
let initial_time = start.elapsed();
println!("Unicode document initial parse: {}µs", initial_time.as_micros());
let edit_start = source1.find("你好世界").ok_or(crate::error::ParseError::UnexpectedEof)?;
let edit_end = edit_start + "你好世界".len();
parser.edit(Edit::new(
edit_start,
edit_end,
edit_start + "再见".len(), Position::new(edit_start, 1, 1),
Position::new(edit_end, 1, 2),
Position::new(edit_start + "再见".len(), 1, 2),
));
let source2 = source1.replace("你好世界", "再见");
let start = Instant::now();
let _tree = parser.parse(&source2)?;
let incremental_time = start.elapsed();
println!(
"Unicode incremental edit: {}µs, reused={}, reparsed={}",
incremental_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
let unicode_budget_micros = adaptive_perf_budget_micros(5_000);
assert!(
incremental_time.as_micros() < unicode_budget_micros,
"Unicode incremental parsing should be <{}µs (got {}µs)",
unicode_budget_micros,
incremental_time.as_micros()
);
assert!(parser.reused_nodes > 0 || parser.reparsed_nodes > 0, "Should parse successfully");
Ok(())
}
#[test]
fn test_edit_near_ast_node_boundaries() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source1 = "sub func { my $x = 123; return $x * 2; }";
parser.parse(source1)?;
let number_end = source1.find("123").ok_or(crate::error::ParseError::UnexpectedEof)? + 3;
parser.edit(Edit::new(
number_end - 1, number_end,
number_end + 1, Position::new(number_end - 1, 1, 1),
Position::new(number_end, 1, 2),
Position::new(number_end + 1, 1, 3),
));
let source2 = source1.replace("123", "12456");
let start = Instant::now();
let _tree = parser.parse(&source2)?;
let boundary_edit_time = start.elapsed();
println!(
"Boundary edit time: {}µs, reused={}, reparsed={}",
boundary_edit_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
let boundary_budget_micros = adaptive_perf_budget_micros(5_000);
assert!(
boundary_edit_time.as_micros() < boundary_budget_micros,
"AST boundary edit should be <{}µs (got {}µs)",
boundary_budget_micros,
boundary_edit_time.as_micros()
);
assert!(parser.reparsed_nodes >= 1, "Should reparse at least the modified node");
Ok(())
}
#[test]
fn test_performance_regression_detection() -> ParseResult<()> {
let mut parser = IncrementalParserV2::new();
let source = "my $baseline = 42; my $test = 'hello';";
let mut parse_times = Vec::new();
for i in 0..10 {
let start = Instant::now();
parser.parse(source)?;
let time = start.elapsed();
parse_times.push(time.as_micros());
parser.edit(Edit::new(
15,
17,
19, Position::new(15, 1, 16),
Position::new(17, 1, 18),
Position::new(19, 1, 20),
));
let test_source = if i % 2 == 0 {
"my $baseline = 99; my $test = 'hello';"
} else {
"my $baseline = 42; my $test = 'hello';"
};
let start = Instant::now();
parser.parse(test_source)?;
let incremental_time = start.elapsed();
println!(
"Run {}: initial={}µs, incremental={}µs, reused={}, reparsed={}",
i + 1,
time.as_micros(),
incremental_time.as_micros(),
parser.reused_nodes,
parser.reparsed_nodes
);
assert!(
incremental_time.as_millis() < 10,
"Run {} performance regression detected: {}ms",
i + 1,
incremental_time.as_millis()
);
}
let avg_time = parse_times.iter().sum::<u128>() / parse_times.len() as u128;
let max_time = *parse_times.iter().max().ok_or(crate::error::ParseError::UnexpectedEof)?;
let min_time = *parse_times.iter().min().ok_or(crate::error::ParseError::UnexpectedEof)?;
println!(
"Performance statistics: avg={}µs, min={}µs, max={}µs",
avg_time, min_time, max_time
);
let variation_factor = max_time as f64 / avg_time as f64;
assert!(
variation_factor <= 10.0,
"Extreme performance inconsistency detected: max={}µs, avg={}µs ({}x variation)",
max_time,
avg_time,
variation_factor
);
if variation_factor > 5.0 {
println!(
"⚠️ High performance variation detected: max={}µs, avg={}µs ({}x variation) - may indicate system load",
max_time, avg_time, variation_factor
);
}
Ok(())
}
}