use perl_parser_core::{
ast::{Node, NodeKind},
edit::EditSet,
position::{Position, Range},
};
use std::collections::{HashMap, HashSet};
use std::hash::{DefaultHasher, Hash, Hasher};
#[derive(Debug)]
pub struct AdvancedReuseAnalyzer {
node_hashes: HashMap<usize, u64>,
position_map: HashMap<usize, Vec<NodeCandidate>>,
affected_nodes: HashSet<usize>,
pub analysis_stats: ReuseAnalysisStats,
}
#[derive(Debug, Default, Clone)]
pub struct ReuseAnalysisStats {
pub nodes_analyzed: usize,
pub structural_matches: usize,
pub content_matches: usize,
pub position_adjustments: usize,
pub reuse_candidates_found: usize,
pub validation_passes: usize,
pub validation_failures: usize,
}
#[derive(Debug, Clone)]
#[allow(dead_code)] struct NodeCandidate {
node: Node,
structural_hash: u64,
confidence_score: f64,
position_delta: isize,
reuse_type: ReuseType,
}
#[derive(Debug, Clone, PartialEq)]
#[allow(dead_code)] pub enum ReuseType {
Direct,
PositionShift,
ContentUpdate,
StructuralEquivalent,
}
#[derive(Debug, Clone)]
pub struct ReuseConfig {
pub min_confidence: f64,
pub max_position_shift: usize,
pub aggressive_structural_matching: bool,
pub enable_content_reuse: bool,
pub max_analysis_depth: usize,
}
impl Default for ReuseConfig {
fn default() -> Self {
ReuseConfig {
min_confidence: 0.75,
max_position_shift: 1000,
aggressive_structural_matching: true,
enable_content_reuse: true,
max_analysis_depth: 10,
}
}
}
impl Default for AdvancedReuseAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl AdvancedReuseAnalyzer {
pub fn new() -> Self {
AdvancedReuseAnalyzer {
node_hashes: HashMap::new(),
position_map: HashMap::new(),
affected_nodes: HashSet::new(),
analysis_stats: ReuseAnalysisStats::default(),
}
}
pub fn with_config(_config: ReuseConfig) -> Self {
Self::new()
}
pub fn analyze_reuse_opportunities(
&mut self,
old_tree: &Node,
new_tree: &Node,
edits: &EditSet,
config: &ReuseConfig,
) -> ReuseAnalysisResult {
self.analysis_stats = ReuseAnalysisStats::default();
self.node_hashes.clear();
self.position_map.clear();
self.affected_nodes.clear();
let old_analysis = self.build_tree_analysis(old_tree, config);
let new_analysis = self.build_tree_analysis(new_tree, config);
self.identify_affected_nodes(old_tree, edits);
let mut reuse_map = HashMap::new();
self.find_direct_structural_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
self.find_position_shifted_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
if config.enable_content_reuse {
self.find_content_updated_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
}
if config.aggressive_structural_matching {
self.find_aggressive_structural_matches(
&old_analysis,
&new_analysis,
&mut reuse_map,
config,
);
}
let validated_reuse_map =
self.validate_reuse_candidates(reuse_map, old_tree, new_tree, config);
let total_old_nodes = self.count_nodes(old_tree);
let total_new_nodes = self.count_nodes(new_tree);
let reused_nodes = validated_reuse_map.len();
let reuse_percentage = if total_old_nodes > 0 {
(reused_nodes as f64 / total_old_nodes as f64) * 100.0
} else {
0.0
};
ReuseAnalysisResult {
reuse_map: validated_reuse_map,
total_old_nodes,
total_new_nodes,
reused_nodes,
reuse_percentage,
analysis_stats: self.analysis_stats.clone(),
}
}
fn build_tree_analysis(&mut self, tree: &Node, config: &ReuseConfig) -> TreeAnalysis {
let mut analysis = TreeAnalysis::new();
self.analyze_node_recursive(tree, &mut analysis, 0, config);
analysis
}
fn analyze_node_recursive(
&mut self,
node: &Node,
analysis: &mut TreeAnalysis,
depth: usize,
config: &ReuseConfig,
) {
if depth > config.max_analysis_depth {
return;
}
self.analysis_stats.nodes_analyzed += 1;
let structural_hash = self.calculate_structural_hash(node);
self.node_hashes.insert(node.location.start, structural_hash);
let node_info = NodeAnalysisInfo {
node: node.clone(),
structural_hash,
depth,
children_count: self.get_children_count(node),
content_hash: self.calculate_content_hash(node),
};
analysis.add_node_info(node.location.start, node_info);
let candidate = NodeCandidate {
node: node.clone(),
structural_hash,
confidence_score: 1.0, position_delta: 0,
reuse_type: ReuseType::Direct,
};
self.position_map.entry(node.location.start).or_default().push(candidate);
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
self.analyze_node_recursive(stmt, analysis, depth + 1, config);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
self.analyze_node_recursive(variable, analysis, depth + 1, config);
if let Some(init) = initializer {
self.analyze_node_recursive(init, analysis, depth + 1, config);
}
}
NodeKind::Binary { left, right, .. } => {
self.analyze_node_recursive(left, analysis, depth + 1, config);
self.analyze_node_recursive(right, analysis, depth + 1, config);
}
NodeKind::Unary { operand, .. } => {
self.analyze_node_recursive(operand, analysis, depth + 1, config);
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
self.analyze_node_recursive(arg, analysis, depth + 1, config);
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
self.analyze_node_recursive(condition, analysis, depth + 1, config);
self.analyze_node_recursive(then_branch, analysis, depth + 1, config);
for (cond, branch) in elsif_branches {
self.analyze_node_recursive(cond, analysis, depth + 1, config);
self.analyze_node_recursive(branch, analysis, depth + 1, config);
}
if let Some(branch) = else_branch {
self.analyze_node_recursive(branch, analysis, depth + 1, config);
}
}
_ => {} }
}
fn identify_affected_nodes(&mut self, tree: &Node, edits: &EditSet) {
for range in edits.coalesced_affected_ranges() {
self.mark_affected_nodes_in_range(tree, range.start.byte, range.end.byte);
}
}
fn mark_affected_nodes_in_range(&mut self, node: &Node, start: usize, end: usize) {
let node_range = Range::from(node.location);
let edit_range = Range::new(Position::new(start, 0, 0), Position::new(end, 0, 0));
if !node_range.overlaps(&edit_range) {
return;
}
self.affected_nodes.insert(node.location.start);
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
self.mark_affected_nodes_in_range(stmt, start, end);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
self.mark_affected_nodes_in_range(variable, start, end);
if let Some(init) = initializer {
self.mark_affected_nodes_in_range(init, start, end);
}
}
NodeKind::Binary { left, right, .. } => {
self.mark_affected_nodes_in_range(left, start, end);
self.mark_affected_nodes_in_range(right, start, end);
}
_ => {} }
}
fn find_direct_structural_matches(
&mut self,
old_analysis: &TreeAnalysis,
new_analysis: &TreeAnalysis,
reuse_map: &mut HashMap<usize, ReuseStrategy>,
config: &ReuseConfig,
) {
let mut used_target_positions: HashSet<usize> =
reuse_map.values().map(|strategy| strategy.target_position).collect();
for (old_pos, old_info) in &old_analysis.node_info {
if self.affected_nodes.contains(old_pos) {
continue;
}
for (new_pos, new_info) in &new_analysis.node_info {
if used_target_positions.contains(new_pos) {
continue;
}
if old_info.structural_hash == new_info.structural_hash
&& old_info.children_count == new_info.children_count
{
let confidence = self.calculate_match_confidence(old_info, new_info);
if confidence >= config.min_confidence {
reuse_map.insert(
*old_pos,
ReuseStrategy {
target_position: *new_pos,
reuse_type: ReuseType::Direct,
confidence_score: confidence,
position_adjustment: (*new_pos as isize) - (*old_pos as isize),
},
);
used_target_positions.insert(*new_pos);
self.analysis_stats.structural_matches += 1;
break; }
}
}
}
}
fn find_position_shifted_matches(
&mut self,
old_analysis: &TreeAnalysis,
new_analysis: &TreeAnalysis,
reuse_map: &mut HashMap<usize, ReuseStrategy>,
config: &ReuseConfig,
) {
let mut used_target_positions: HashSet<usize> =
reuse_map.values().map(|strategy| strategy.target_position).collect();
for (old_pos, old_info) in &old_analysis.node_info {
if reuse_map.contains_key(old_pos) {
continue;
}
let mut best_match: Option<(usize, f64)> = None;
for (new_pos, new_info) in &new_analysis.node_info {
if used_target_positions.contains(new_pos) {
continue;
}
if old_info.content_hash == new_info.content_hash
&& old_info.structural_hash == new_info.structural_hash
{
let position_shift = (*new_pos as isize - *old_pos as isize).unsigned_abs();
if !self.is_position_shift_candidate_safe(
old_info,
new_info,
position_shift,
config,
) {
continue;
}
let confidence = self.calculate_shifted_match_confidence(
old_info,
new_info,
position_shift,
config,
);
if confidence >= config.min_confidence
&& best_match
.as_ref()
.is_none_or(|&(_, best_score)| confidence > best_score)
{
best_match = Some((*new_pos, confidence));
}
}
}
if let Some((best_pos, confidence)) = best_match {
reuse_map.insert(
*old_pos,
ReuseStrategy {
target_position: best_pos,
reuse_type: ReuseType::PositionShift,
confidence_score: confidence,
position_adjustment: (best_pos as isize) - (*old_pos as isize),
},
);
used_target_positions.insert(best_pos);
self.analysis_stats.position_adjustments += 1;
}
}
}
fn find_content_updated_matches(
&mut self,
old_analysis: &TreeAnalysis,
new_analysis: &TreeAnalysis,
reuse_map: &mut HashMap<usize, ReuseStrategy>,
config: &ReuseConfig,
) {
for (old_pos, old_info) in &old_analysis.node_info {
if reuse_map.contains_key(old_pos) {
continue;
}
if old_info.children_count == 0 {
for (new_pos, new_info) in &new_analysis.node_info {
if old_info.structural_hash == new_info.structural_hash
&& old_info.content_hash != new_info.content_hash
&& self.are_compatible_for_content_update(&old_info.node, &new_info.node)
{
let confidence = 0.8; if confidence >= config.min_confidence {
reuse_map.insert(
*old_pos,
ReuseStrategy {
target_position: *new_pos,
reuse_type: ReuseType::ContentUpdate,
confidence_score: confidence,
position_adjustment: (*new_pos as isize) - (*old_pos as isize),
},
);
self.analysis_stats.content_matches += 1;
break;
}
}
}
}
}
}
fn find_aggressive_structural_matches(
&mut self,
old_analysis: &TreeAnalysis,
new_analysis: &TreeAnalysis,
reuse_map: &mut HashMap<usize, ReuseStrategy>,
config: &ReuseConfig,
) {
for (old_pos, old_info) in &old_analysis.node_info {
if reuse_map.contains_key(old_pos) {
continue;
}
let mut best_match: Option<(usize, f64)> = None;
for (new_pos, new_info) in &new_analysis.node_info {
if old_info.children_count > 0
&& self.get_children_count(&old_info.node)
!= self.get_children_count(&new_info.node)
{
continue;
}
let similarity =
self.calculate_structural_similarity(&old_info.node, &new_info.node);
if similarity >= config.min_confidence * 0.8 {
if best_match.as_ref().is_none_or(|&(_, s)| similarity > s) {
best_match = Some((*new_pos, similarity));
}
}
}
if let Some((best_pos, confidence)) = best_match {
if confidence >= config.min_confidence * 0.7 {
reuse_map.insert(
*old_pos,
ReuseStrategy {
target_position: best_pos,
reuse_type: ReuseType::StructuralEquivalent,
confidence_score: confidence,
position_adjustment: (best_pos as isize) - (*old_pos as isize),
},
);
self.analysis_stats.reuse_candidates_found += 1;
}
}
}
}
fn validate_reuse_candidates(
&mut self,
candidates: HashMap<usize, ReuseStrategy>,
old_tree: &Node,
new_tree: &Node,
config: &ReuseConfig,
) -> HashMap<usize, ReuseStrategy> {
let mut validated = HashMap::new();
for (old_pos, strategy) in candidates {
if self.validate_reuse_strategy(&strategy, old_tree, new_tree, config) {
validated.insert(old_pos, strategy);
self.analysis_stats.validation_passes += 1;
} else {
self.analysis_stats.validation_failures += 1;
}
}
validated
}
fn calculate_structural_hash(&self, node: &Node) -> u64 {
let mut hasher = DefaultHasher::new();
std::mem::discriminant(&node.kind).hash(&mut hasher);
match &node.kind {
NodeKind::Program { statements } => {
statements.len().hash(&mut hasher);
"program".hash(&mut hasher);
}
NodeKind::Block { statements } => {
statements.len().hash(&mut hasher);
"block".hash(&mut hasher);
}
NodeKind::VariableDeclaration { declarator, .. } => {
declarator.hash(&mut hasher);
"vardecl".hash(&mut hasher);
}
NodeKind::Binary { op, .. } => {
op.hash(&mut hasher);
"binary".hash(&mut hasher);
}
NodeKind::Unary { op, .. } => {
op.hash(&mut hasher);
"unary".hash(&mut hasher);
}
NodeKind::FunctionCall { name, args } => {
name.hash(&mut hasher);
args.len().hash(&mut hasher);
"funccall".hash(&mut hasher);
}
NodeKind::Number { .. } => "number".hash(&mut hasher),
NodeKind::String { interpolated, .. } => {
interpolated.hash(&mut hasher);
"string".hash(&mut hasher);
}
NodeKind::Variable { sigil, .. } => {
sigil.hash(&mut hasher);
"variable".hash(&mut hasher);
}
NodeKind::Identifier { .. } => "identifier".hash(&mut hasher),
_ => "other".hash(&mut hasher),
}
hasher.finish()
}
fn calculate_content_hash(&self, node: &Node) -> u64 {
let mut hasher = DefaultHasher::new();
match &node.kind {
NodeKind::Number { value } => value.hash(&mut hasher),
NodeKind::String { value, .. } => value.hash(&mut hasher),
NodeKind::Variable { name, .. } => name.hash(&mut hasher),
NodeKind::Identifier { name } => name.hash(&mut hasher),
_ => {
self.calculate_structural_hash(node).hash(&mut hasher);
}
}
hasher.finish()
}
fn get_children_count(&self, node: &Node) -> usize {
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => statements.len(),
NodeKind::VariableDeclaration { initializer, .. } => {
if initializer.is_some() { 2 } else { 1 } }
NodeKind::Binary { .. } => 2, NodeKind::Unary { .. } => 1, NodeKind::FunctionCall { args, .. } => args.len(),
NodeKind::If { elsif_branches, else_branch, .. } => {
2 + elsif_branches.len() * 2 + if else_branch.is_some() { 1 } else { 0 }
}
_ => 0, }
}
fn calculate_match_confidence(
&self,
old_info: &NodeAnalysisInfo,
new_info: &NodeAnalysisInfo,
) -> f64 {
let mut confidence = 0.0f64;
if old_info.structural_hash == new_info.structural_hash {
confidence += 0.4;
}
if old_info.content_hash == new_info.content_hash {
confidence += 0.3;
}
if old_info.children_count == new_info.children_count {
confidence += 0.2;
}
let depth_diff = (old_info.depth as isize - new_info.depth as isize).abs();
if depth_diff == 0 {
confidence += 0.1;
} else if depth_diff <= 2 {
confidence += 0.05;
}
confidence.min(1.0)
}
fn calculate_structural_similarity(&self, old_node: &Node, new_node: &Node) -> f64 {
let mut similarity = 0.0;
if std::mem::discriminant(&old_node.kind) == std::mem::discriminant(&new_node.kind) {
similarity += 0.5;
match (&old_node.kind, &new_node.kind) {
(NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) => {
let len_similarity = 1.0
- ((s1.len() as f64 - s2.len() as f64).abs()
/ (s1.len().max(s2.len()) as f64));
similarity += 0.3 * len_similarity;
}
(NodeKind::Binary { op: op1, .. }, NodeKind::Binary { op: op2, .. }) => {
if op1 == op2 {
similarity += 0.4;
}
}
(
NodeKind::FunctionCall { name: n1, args: a1 },
NodeKind::FunctionCall { name: n2, args: a2 },
) => {
if n1 == n2 {
similarity += 0.3;
}
let arg_similarity = 1.0
- ((a1.len() as f64 - a2.len() as f64).abs()
/ (a1.len().max(a2.len()) as f64));
similarity += 0.2 * arg_similarity;
}
_ => {
similarity += 0.2; }
}
}
similarity.min(1.0)
}
fn are_compatible_for_content_update(&self, old_node: &Node, new_node: &Node) -> bool {
match (&old_node.kind, &new_node.kind) {
(NodeKind::Number { .. }, NodeKind::Number { .. }) => true,
(
NodeKind::String { interpolated: i1, .. },
NodeKind::String { interpolated: i2, .. },
) => i1 == i2,
(NodeKind::Variable { sigil: s1, .. }, NodeKind::Variable { sigil: s2, .. }) => {
s1 == s2
}
(NodeKind::Identifier { .. }, NodeKind::Identifier { .. }) => true,
_ => false,
}
}
fn is_position_shift_candidate_safe(
&self,
old_info: &NodeAnalysisInfo,
new_info: &NodeAnalysisInfo,
position_shift: usize,
config: &ReuseConfig,
) -> bool {
if position_shift > config.max_position_shift {
return false;
}
let old_is_container = self.is_container_node(&old_info.node);
let new_is_container = self.is_container_node(&new_info.node);
if old_is_container != new_is_container {
return false;
}
if old_is_container {
let max_container_shift = config.max_position_shift / 4;
if position_shift > max_container_shift {
return false;
}
let depth_diff = (old_info.depth as isize - new_info.depth as isize).unsigned_abs();
if depth_diff > 1 || old_info.children_count != new_info.children_count {
return false;
}
}
true
}
fn calculate_shifted_match_confidence(
&self,
old_info: &NodeAnalysisInfo,
new_info: &NodeAnalysisInfo,
position_shift: usize,
config: &ReuseConfig,
) -> f64 {
let base_confidence = self.calculate_match_confidence(old_info, new_info);
let shift_ratio = position_shift as f64 / config.max_position_shift.max(1) as f64;
let shift_penalty = if self.is_container_node(&old_info.node) {
0.45 * shift_ratio.min(1.0)
} else if self.is_content_stable_leaf(&old_info.node) {
0.12 * shift_ratio.min(1.0)
} else {
0.30 * shift_ratio.min(1.0)
};
(base_confidence - shift_penalty).clamp(0.0, 1.0)
}
fn is_content_stable_leaf(&self, node: &Node) -> bool {
matches!(
node.kind,
NodeKind::Number { .. }
| NodeKind::String { .. }
| NodeKind::Identifier { .. }
| NodeKind::Variable { .. }
)
}
fn is_container_node(&self, node: &Node) -> bool {
matches!(node.kind, NodeKind::Program { .. } | NodeKind::Block { .. } | NodeKind::If { .. })
}
fn validate_reuse_strategy(
&self,
_strategy: &ReuseStrategy,
_old_tree: &Node,
_new_tree: &Node,
_config: &ReuseConfig,
) -> bool {
true
}
fn count_nodes(&self, node: &Node) -> usize {
let mut count = 1;
match &node.kind {
NodeKind::Program { statements } | NodeKind::Block { statements } => {
for stmt in statements {
count += self.count_nodes(stmt);
}
}
NodeKind::VariableDeclaration { variable, initializer, .. } => {
count += self.count_nodes(variable);
if let Some(init) = initializer {
count += self.count_nodes(init);
}
}
NodeKind::Binary { left, right, .. } => {
count += self.count_nodes(left);
count += self.count_nodes(right);
}
NodeKind::Unary { operand, .. } => {
count += self.count_nodes(operand);
}
NodeKind::FunctionCall { args, .. } => {
for arg in args {
count += self.count_nodes(arg);
}
}
NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
count += self.count_nodes(condition);
count += self.count_nodes(then_branch);
for (cond, branch) in elsif_branches {
count += self.count_nodes(cond);
count += self.count_nodes(branch);
}
if let Some(branch) = else_branch {
count += self.count_nodes(branch);
}
}
_ => {} }
count
}
pub fn map_old_position_to_new(&self, old_pos: usize, edits: &EditSet) -> usize {
let mut shift: isize = 0;
for edit in edits.edits() {
if old_pos < edit.start_byte {
break;
}
if old_pos < edit.old_end_byte {
return edit.new_end_byte;
}
shift += edit.byte_shift();
}
let signed = old_pos as isize + shift;
if signed < 0 { 0 } else { signed as usize }
}
pub fn try_register_match(
&self,
reuse_map: &mut HashMap<usize, ReuseStrategy>,
old_pos: usize,
new_pos: usize,
reuse_type: ReuseType,
confidence: f64,
) -> bool {
if reuse_map.values().any(|s| s.target_position == new_pos) {
return false;
}
reuse_map.insert(
old_pos,
ReuseStrategy {
target_position: new_pos,
reuse_type,
confidence_score: confidence,
position_adjustment: (new_pos as isize) - (old_pos as isize),
},
);
true
}
}
#[derive(Debug)]
struct TreeAnalysis {
node_info: HashMap<usize, NodeAnalysisInfo>,
}
impl TreeAnalysis {
fn new() -> Self {
TreeAnalysis { node_info: HashMap::new() }
}
fn add_node_info(&mut self, position: usize, info: NodeAnalysisInfo) {
self.node_info.insert(position, info);
}
}
#[derive(Debug, Clone)]
struct NodeAnalysisInfo {
node: Node,
structural_hash: u64,
content_hash: u64,
depth: usize,
children_count: usize,
}
#[derive(Debug, Clone)]
pub struct ReuseStrategy {
pub target_position: usize,
pub reuse_type: ReuseType,
pub confidence_score: f64,
pub position_adjustment: isize,
}
#[derive(Debug)]
pub struct ReuseAnalysisResult {
pub reuse_map: HashMap<usize, ReuseStrategy>,
pub total_old_nodes: usize,
pub total_new_nodes: usize,
pub reused_nodes: usize,
pub reuse_percentage: f64,
pub analysis_stats: ReuseAnalysisStats,
}
impl ReuseAnalysisResult {
pub fn meets_efficiency_target(&self, target_percentage: f64) -> bool {
self.reuse_percentage >= target_percentage
}
pub fn performance_summary(&self) -> String {
format!(
"Reuse Analysis: {:.1}% efficiency ({}/{} nodes), {} structural matches, {} position adjustments",
self.reuse_percentage,
self.reused_nodes,
self.total_old_nodes,
self.analysis_stats.structural_matches,
self.analysis_stats.position_adjustments
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_parser_core::{SourceLocation, ast::Node};
#[test]
fn test_advanced_reuse_analyzer_creation() {
let analyzer = AdvancedReuseAnalyzer::new();
assert_eq!(analyzer.analysis_stats.nodes_analyzed, 0);
}
#[test]
fn test_structural_hash_calculation() {
let analyzer = AdvancedReuseAnalyzer::new();
let node1 = Node::new(
NodeKind::Number { value: "42".to_string() },
SourceLocation { start: 0, end: 2 },
);
let node2 = Node::new(
NodeKind::Number { value: "99".to_string() },
SourceLocation { start: 0, end: 2 },
);
let hash1 = analyzer.calculate_structural_hash(&node1);
let hash2 = analyzer.calculate_structural_hash(&node2);
assert_eq!(hash1, hash2, "Numbers should have same structural hash regardless of value");
}
#[test]
fn test_content_hash_differs_for_different_values() {
let analyzer = AdvancedReuseAnalyzer::new();
let node1 = Node::new(
NodeKind::Number { value: "42".to_string() },
SourceLocation { start: 0, end: 2 },
);
let node2 = Node::new(
NodeKind::Number { value: "99".to_string() },
SourceLocation { start: 0, end: 2 },
);
let hash1 = analyzer.calculate_content_hash(&node1);
let hash2 = analyzer.calculate_content_hash(&node2);
assert_ne!(hash1, hash2, "Different values should have different content hashes");
}
#[test]
fn test_children_count_calculation() {
let analyzer = AdvancedReuseAnalyzer::new();
let leaf = Node::new(
NodeKind::Number { value: "42".to_string() },
SourceLocation { start: 0, end: 2 },
);
assert_eq!(analyzer.get_children_count(&leaf), 0);
let binary = Node::new(
NodeKind::Binary {
op: "+".to_string(),
left: Box::new(leaf.clone()),
right: Box::new(leaf.clone()),
},
SourceLocation { start: 0, end: 5 },
);
assert_eq!(analyzer.get_children_count(&binary), 2);
let program = Node::new(
NodeKind::Program { statements: vec![binary] },
SourceLocation { start: 0, end: 5 },
);
assert_eq!(analyzer.get_children_count(&program), 1);
}
#[test]
fn test_reuse_config_defaults() {
let config = ReuseConfig::default();
assert_eq!(config.min_confidence, 0.75);
assert_eq!(config.max_position_shift, 1000);
assert!(config.aggressive_structural_matching);
assert!(config.enable_content_reuse);
assert_eq!(config.max_analysis_depth, 10);
}
#[test]
fn test_node_compatibility_for_content_update() {
let analyzer = AdvancedReuseAnalyzer::new();
let num1 = Node::new(
NodeKind::Number { value: "42".to_string() },
SourceLocation { start: 0, end: 2 },
);
let num2 = Node::new(
NodeKind::Number { value: "99".to_string() },
SourceLocation { start: 0, end: 2 },
);
let str1 = Node::new(
NodeKind::String { value: "hello".to_string(), interpolated: false },
SourceLocation { start: 0, end: 7 },
);
assert!(analyzer.are_compatible_for_content_update(&num1, &num2));
assert!(!analyzer.are_compatible_for_content_update(&num1, &str1));
}
#[test]
fn test_identify_affected_nodes_marks_only_overlapping_regions() {
let mut analyzer = AdvancedReuseAnalyzer::new();
let tree = Node::new(
NodeKind::Program {
statements: vec![
Node::new(
NodeKind::Number { value: "1".to_string() },
SourceLocation { start: 0, end: 1 },
),
Node::new(
NodeKind::Number { value: "2".to_string() },
SourceLocation { start: 20, end: 21 },
),
],
},
SourceLocation { start: 0, end: 21 },
);
let mut edits = EditSet::new();
edits.add(perl_parser_core::edit::Edit::new(
0,
2,
2,
Position::new(0, 0, 0),
Position::new(2, 0, 2),
Position::new(2, 0, 2),
));
analyzer.identify_affected_nodes(&tree, &edits);
assert!(analyzer.affected_nodes.contains(&0));
assert!(!analyzer.affected_nodes.contains(&20));
}
fn make_edit(start: usize, old_end: usize, new_end: usize) -> perl_parser_core::edit::Edit {
perl_parser_core::edit::Edit::new(
start,
old_end,
new_end,
Position::new(start, 0, start as u32),
Position::new(old_end, 0, old_end as u32),
Position::new(new_end, 0, new_end as u32),
)
}
#[test]
fn test_map_position_before_edit_unchanged() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(10, 12, 14));
assert_eq!(analyzer.map_old_position_to_new(5, &edits), 5);
}
#[test]
fn test_map_position_after_single_edit_shifted() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(8, 10, 11)); assert_eq!(analyzer.map_old_position_to_new(13, &edits), 14);
}
#[test]
fn test_map_position_accumulates_two_shifts() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(8, 10, 11)); edits.add(make_edit(24, 26, 27)); assert_eq!(analyzer.map_old_position_to_new(30, &edits), 32);
}
#[test]
fn test_map_position_at_edit_start_returns_new_end() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(10, 20, 15));
assert_eq!(analyzer.map_old_position_to_new(10, &edits), 15);
}
#[test]
fn test_map_position_at_edit_old_end_is_shifted() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(10, 20, 25)); assert_eq!(analyzer.map_old_position_to_new(20, &edits), 25);
}
#[test]
fn test_map_position_inside_edit_returns_new_end() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(10, 30, 20)); assert_eq!(analyzer.map_old_position_to_new(15, &edits), 20);
}
#[test]
fn test_map_position_between_two_edits_shifted_by_first_only() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut edits = EditSet::new();
edits.add(make_edit(5, 7, 8)); edits.add(make_edit(20, 22, 23)); assert_eq!(analyzer.map_old_position_to_new(10, &edits), 11);
}
#[test]
fn test_try_register_match_rejects_duplicate_new_pos() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut reuse_map = HashMap::new();
let first = analyzer.try_register_match(&mut reuse_map, 0, 10, ReuseType::Direct, 0.9);
assert!(first, "first registration should succeed");
let dup = analyzer.try_register_match(&mut reuse_map, 5, 10, ReuseType::Direct, 0.95);
assert!(!dup, "second registration for same new_pos must be rejected");
assert_eq!(reuse_map.len(), 1);
assert_eq!(reuse_map[&0].target_position, 10);
}
#[test]
fn test_try_register_match_distinct_new_positions_both_succeed() {
let analyzer = AdvancedReuseAnalyzer::new();
let mut reuse_map = HashMap::new();
assert!(analyzer.try_register_match(&mut reuse_map, 0, 10, ReuseType::Direct, 0.9));
assert!(
analyzer.try_register_match(&mut reuse_map, 5, 20, ReuseType::PositionShift, 0.85,)
);
assert_eq!(reuse_map.len(), 2);
}
}