Skip to main content

perl_incremental_parsing/incremental/
incremental_advanced_reuse.rs

1//! Advanced tree reuse algorithms for incremental parsing
2//!
3//! This module implements sophisticated AST node reuse strategies that go beyond
4//! simple value matching to achieve high node reuse rates even for complex edits.
5//!
6//! ## Key Features
7//!
8//! - **Structural similarity analysis** - Compare AST subtree patterns
9//! - **Position-aware reuse** - Understand which nodes can be safely repositioned
10//! - **Content-based hashing** - Fast comparison of subtree equivalence
11//! - **Incremental node mapping** - Efficient lookup of reusable nodes
12//! - **Advanced validation** - Ensure reused nodes maintain correctness
13//!
14//! ## Performance Targets
15//!
16//! - **≥85% node reuse** for simple value edits
17//! - **≥70% node reuse** for structural modifications
18//! - **≥50% node reuse** for complex multi-edit scenarios
19//! - **<500µs processing** for reuse analysis on typical documents
20
21use crate::{
22    ast::{Node, NodeKind},
23    edit::EditSet,
24    position::{Position, Range},
25};
26use std::collections::{HashMap, HashSet};
27use std::hash::{DefaultHasher, Hash, Hasher};
28
29/// Advanced node reuse analyzer with sophisticated matching algorithms
30#[derive(Debug)]
31pub struct AdvancedReuseAnalyzer {
32    /// Cache of node structural hashes for fast comparison
33    node_hashes: HashMap<usize, u64>,
34    /// Mapping of positions to potentially reusable nodes
35    position_map: HashMap<usize, Vec<NodeCandidate>>,
36    /// Set of nodes that are known to be affected by edits
37    affected_nodes: HashSet<usize>,
38    /// Statistics for reuse analysis
39    pub analysis_stats: ReuseAnalysisStats,
40}
41
42/// Statistics tracking reuse analysis performance and effectiveness
43#[derive(Debug, Default, Clone)]
44pub struct ReuseAnalysisStats {
45    pub nodes_analyzed: usize,
46    pub structural_matches: usize,
47    pub content_matches: usize,
48    pub position_adjustments: usize,
49    pub reuse_candidates_found: usize,
50    pub validation_passes: usize,
51    pub validation_failures: usize,
52}
53
54/// A candidate node for reuse with metadata about its reusability
55#[derive(Debug, Clone)]
56#[allow(dead_code)] // Fields used by future advanced matching strategies
57struct NodeCandidate {
58    node: Node,
59    structural_hash: u64,
60    confidence_score: f64,
61    position_delta: isize,
62    reuse_type: ReuseType,
63}
64
65/// Types of reuse strategies available for nodes
66#[derive(Debug, Clone, PartialEq)]
67#[allow(dead_code)] // Used by future advanced matching strategies
68pub enum ReuseType {
69    /// Direct reuse - node unchanged
70    Direct,
71    /// Position shift - same content, different position
72    PositionShift,
73    /// Content update - same structure, updated values
74    ContentUpdate,
75    /// Structural equivalent - same pattern with different details
76    StructuralEquivalent,
77}
78
79/// Configuration for reuse analysis behavior
80#[derive(Debug, Clone)]
81pub struct ReuseConfig {
82    /// Minimum confidence score for reuse (0.0-1.0)
83    pub min_confidence: f64,
84    /// Maximum position shift allowed for reuse
85    pub max_position_shift: usize,
86    /// Enable aggressive structural matching
87    pub aggressive_structural_matching: bool,
88    /// Enable content-based reuse for literals
89    pub enable_content_reuse: bool,
90    /// Maximum depth for recursive analysis
91    pub max_analysis_depth: usize,
92}
93
94impl Default for ReuseConfig {
95    fn default() -> Self {
96        ReuseConfig {
97            min_confidence: 0.75,
98            max_position_shift: 1000,
99            aggressive_structural_matching: true,
100            enable_content_reuse: true,
101            max_analysis_depth: 10,
102        }
103    }
104}
105
106impl Default for AdvancedReuseAnalyzer {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112impl AdvancedReuseAnalyzer {
113    /// Create a new reuse analyzer with default configuration
114    pub fn new() -> Self {
115        AdvancedReuseAnalyzer {
116            node_hashes: HashMap::new(),
117            position_map: HashMap::new(),
118            affected_nodes: HashSet::new(),
119            analysis_stats: ReuseAnalysisStats::default(),
120        }
121    }
122
123    /// Create analyzer with custom configuration
124    pub fn with_config(_config: ReuseConfig) -> Self {
125        // Store config for future use if needed
126        Self::new()
127    }
128
129    /// Analyze potential node reuse between old and new trees
130    ///
131    /// Returns a mapping of old node positions to reuse strategies,
132    /// enabling intelligent tree reconstruction with maximum node reuse.
133    pub fn analyze_reuse_opportunities(
134        &mut self,
135        old_tree: &Node,
136        new_tree: &Node,
137        edits: &EditSet,
138        config: &ReuseConfig,
139    ) -> ReuseAnalysisResult {
140        self.analysis_stats = ReuseAnalysisStats::default();
141
142        // Reset internal state
143        self.node_hashes.clear();
144        self.position_map.clear();
145        self.affected_nodes.clear();
146
147        // Build structural analysis of both trees
148        let old_analysis = self.build_tree_analysis(old_tree, config);
149        let new_analysis = self.build_tree_analysis(new_tree, config);
150
151        // Identify affected regions from edits
152        self.identify_affected_nodes(old_tree, edits);
153
154        // Find reuse candidates using multiple strategies
155        let mut reuse_map = HashMap::new();
156
157        // Strategy 1: Direct structural matching
158        self.find_direct_structural_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
159
160        // Strategy 2: Position-shifted matching
161        self.find_position_shifted_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
162
163        // Strategy 3: Content-updated matching
164        if config.enable_content_reuse {
165            self.find_content_updated_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
166        }
167
168        // Strategy 4: Aggressive structural matching
169        if config.aggressive_structural_matching {
170            self.find_aggressive_structural_matches(
171                &old_analysis,
172                &new_analysis,
173                &mut reuse_map,
174                config,
175            );
176        }
177
178        // Validate reuse candidates and calculate confidence scores
179        let validated_reuse_map =
180            self.validate_reuse_candidates(reuse_map, old_tree, new_tree, config);
181
182        // Calculate final statistics
183        let total_old_nodes = self.count_nodes(old_tree);
184        let total_new_nodes = self.count_nodes(new_tree);
185        let reused_nodes = validated_reuse_map.len();
186        let reuse_percentage = if total_old_nodes > 0 {
187            (reused_nodes as f64 / total_old_nodes as f64) * 100.0
188        } else {
189            0.0
190        };
191
192        ReuseAnalysisResult {
193            reuse_map: validated_reuse_map,
194            total_old_nodes,
195            total_new_nodes,
196            reused_nodes,
197            reuse_percentage,
198            analysis_stats: self.analysis_stats.clone(),
199        }
200    }
201
202    /// Build comprehensive analysis of tree structure
203    fn build_tree_analysis(&mut self, tree: &Node, config: &ReuseConfig) -> TreeAnalysis {
204        let mut analysis = TreeAnalysis::new();
205        self.analyze_node_recursive(tree, &mut analysis, 0, config);
206        analysis
207    }
208
209    /// Recursively analyze nodes to build structural understanding
210    fn analyze_node_recursive(
211        &mut self,
212        node: &Node,
213        analysis: &mut TreeAnalysis,
214        depth: usize,
215        config: &ReuseConfig,
216    ) {
217        if depth > config.max_analysis_depth {
218            return;
219        }
220
221        self.analysis_stats.nodes_analyzed += 1;
222
223        // Calculate structural hash
224        let structural_hash = self.calculate_structural_hash(node);
225        self.node_hashes.insert(node.location.start, structural_hash);
226
227        // Create node info for analysis
228        let node_info = NodeAnalysisInfo {
229            node: node.clone(),
230            structural_hash,
231            depth,
232            children_count: self.get_children_count(node),
233            content_hash: self.calculate_content_hash(node),
234        };
235
236        analysis.add_node_info(node.location.start, node_info);
237
238        // Add to position map
239        let candidate = NodeCandidate {
240            node: node.clone(),
241            structural_hash,
242            confidence_score: 1.0, // Will be refined during analysis
243            position_delta: 0,
244            reuse_type: ReuseType::Direct,
245        };
246
247        self.position_map.entry(node.location.start).or_default().push(candidate);
248
249        // Recurse into children
250        match &node.kind {
251            NodeKind::Program { statements } | NodeKind::Block { statements } => {
252                for stmt in statements {
253                    self.analyze_node_recursive(stmt, analysis, depth + 1, config);
254                }
255            }
256            NodeKind::VariableDeclaration { variable, initializer, .. } => {
257                self.analyze_node_recursive(variable, analysis, depth + 1, config);
258                if let Some(init) = initializer {
259                    self.analyze_node_recursive(init, analysis, depth + 1, config);
260                }
261            }
262            NodeKind::Binary { left, right, .. } => {
263                self.analyze_node_recursive(left, analysis, depth + 1, config);
264                self.analyze_node_recursive(right, analysis, depth + 1, config);
265            }
266            NodeKind::Unary { operand, .. } => {
267                self.analyze_node_recursive(operand, analysis, depth + 1, config);
268            }
269            NodeKind::FunctionCall { args, .. } => {
270                for arg in args {
271                    self.analyze_node_recursive(arg, analysis, depth + 1, config);
272                }
273            }
274            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
275                self.analyze_node_recursive(condition, analysis, depth + 1, config);
276                self.analyze_node_recursive(then_branch, analysis, depth + 1, config);
277                for (cond, branch) in elsif_branches {
278                    self.analyze_node_recursive(cond, analysis, depth + 1, config);
279                    self.analyze_node_recursive(branch, analysis, depth + 1, config);
280                }
281                if let Some(branch) = else_branch {
282                    self.analyze_node_recursive(branch, analysis, depth + 1, config);
283                }
284            }
285            _ => {} // Leaf nodes
286        }
287    }
288
289    /// Identify nodes affected by edits
290    fn identify_affected_nodes(&mut self, tree: &Node, edits: &EditSet) {
291        for edit in edits.edits() {
292            self.mark_affected_nodes_in_range(tree, edit.start_byte, edit.old_end_byte);
293        }
294    }
295
296    /// Mark nodes as affected if they overlap with edit ranges
297    fn mark_affected_nodes_in_range(&mut self, node: &Node, start: usize, end: usize) {
298        let node_range = Range::from(node.location);
299        let edit_range = Range::new(Position::new(start, 0, 0), Position::new(end, 0, 0));
300
301        if node_range.overlaps(&edit_range) {
302            self.affected_nodes.insert(node.location.start);
303        }
304
305        // Recurse into children
306        match &node.kind {
307            NodeKind::Program { statements } | NodeKind::Block { statements } => {
308                for stmt in statements {
309                    self.mark_affected_nodes_in_range(stmt, start, end);
310                }
311            }
312            NodeKind::VariableDeclaration { variable, initializer, .. } => {
313                self.mark_affected_nodes_in_range(variable, start, end);
314                if let Some(init) = initializer {
315                    self.mark_affected_nodes_in_range(init, start, end);
316                }
317            }
318            NodeKind::Binary { left, right, .. } => {
319                self.mark_affected_nodes_in_range(left, start, end);
320                self.mark_affected_nodes_in_range(right, start, end);
321            }
322            _ => {} // Handle other node types as needed
323        }
324    }
325
326    /// Find direct structural matches between trees
327    fn find_direct_structural_matches(
328        &mut self,
329        old_analysis: &TreeAnalysis,
330        new_analysis: &TreeAnalysis,
331        reuse_map: &mut HashMap<usize, ReuseStrategy>,
332        config: &ReuseConfig,
333    ) {
334        for (old_pos, old_info) in &old_analysis.node_info {
335            // Skip affected nodes for direct matching
336            if self.affected_nodes.contains(old_pos) {
337                continue;
338            }
339
340            // Look for exact structural matches in new tree
341            for (new_pos, new_info) in &new_analysis.node_info {
342                if old_info.structural_hash == new_info.structural_hash
343                    && old_info.children_count == new_info.children_count
344                {
345                    let confidence = self.calculate_match_confidence(old_info, new_info);
346                    if confidence >= config.min_confidence {
347                        reuse_map.insert(
348                            *old_pos,
349                            ReuseStrategy {
350                                target_position: *new_pos,
351                                reuse_type: ReuseType::Direct,
352                                confidence_score: confidence,
353                                position_adjustment: (*new_pos as isize) - (*old_pos as isize),
354                            },
355                        );
356                        self.analysis_stats.structural_matches += 1;
357                        break; // Use first good match
358                    }
359                }
360            }
361        }
362    }
363
364    /// Find position-shifted matches (same content, different location)
365    fn find_position_shifted_matches(
366        &mut self,
367        old_analysis: &TreeAnalysis,
368        new_analysis: &TreeAnalysis,
369        reuse_map: &mut HashMap<usize, ReuseStrategy>,
370        config: &ReuseConfig,
371    ) {
372        for (old_pos, old_info) in &old_analysis.node_info {
373            // Skip if already matched or is a leaf node
374            if reuse_map.contains_key(old_pos) || old_info.children_count == 0 {
375                continue;
376            }
377
378            // Look for content matches that may have shifted position
379            for (new_pos, new_info) in &new_analysis.node_info {
380                if old_info.content_hash == new_info.content_hash
381                    && old_info.structural_hash == new_info.structural_hash
382                {
383                    let position_shift = (*new_pos as isize - *old_pos as isize).unsigned_abs();
384                    if position_shift <= config.max_position_shift {
385                        let confidence = self.calculate_match_confidence(old_info, new_info) * 0.9; // Slight penalty for position shift
386                        if confidence >= config.min_confidence {
387                            reuse_map.insert(
388                                *old_pos,
389                                ReuseStrategy {
390                                    target_position: *new_pos,
391                                    reuse_type: ReuseType::PositionShift,
392                                    confidence_score: confidence,
393                                    position_adjustment: (*new_pos as isize) - (*old_pos as isize),
394                                },
395                            );
396                            self.analysis_stats.position_adjustments += 1;
397                            break;
398                        }
399                    }
400                }
401            }
402        }
403    }
404
405    /// Find content-updated matches (structure same, values changed)
406    fn find_content_updated_matches(
407        &mut self,
408        old_analysis: &TreeAnalysis,
409        new_analysis: &TreeAnalysis,
410        reuse_map: &mut HashMap<usize, ReuseStrategy>,
411        config: &ReuseConfig,
412    ) {
413        for (old_pos, old_info) in &old_analysis.node_info {
414            if reuse_map.contains_key(old_pos) {
415                continue;
416            }
417
418            // For leaf nodes, check if structure matches but content differs
419            if old_info.children_count == 0 {
420                for (new_pos, new_info) in &new_analysis.node_info {
421                    if old_info.structural_hash == new_info.structural_hash
422                        && old_info.content_hash != new_info.content_hash
423                        && self.are_compatible_for_content_update(&old_info.node, &new_info.node)
424                    {
425                        let confidence = 0.8; // Content updates get medium confidence
426                        if confidence >= config.min_confidence {
427                            reuse_map.insert(
428                                *old_pos,
429                                ReuseStrategy {
430                                    target_position: *new_pos,
431                                    reuse_type: ReuseType::ContentUpdate,
432                                    confidence_score: confidence,
433                                    position_adjustment: (*new_pos as isize) - (*old_pos as isize),
434                                },
435                            );
436                            self.analysis_stats.content_matches += 1;
437                            break;
438                        }
439                    }
440                }
441            }
442        }
443    }
444
445    /// Find aggressive structural matches using pattern analysis
446    fn find_aggressive_structural_matches(
447        &mut self,
448        old_analysis: &TreeAnalysis,
449        new_analysis: &TreeAnalysis,
450        reuse_map: &mut HashMap<usize, ReuseStrategy>,
451        config: &ReuseConfig,
452    ) {
453        // This is the most sophisticated matching - look for structural patterns
454        // even when exact hashes don't match
455        for (old_pos, old_info) in &old_analysis.node_info {
456            if reuse_map.contains_key(old_pos) {
457                continue;
458            }
459
460            let mut best_match: Option<(usize, f64)> = None;
461
462            for (new_pos, new_info) in &new_analysis.node_info {
463                // Compare structural similarity
464                let similarity =
465                    self.calculate_structural_similarity(&old_info.node, &new_info.node);
466                if similarity >= config.min_confidence * 0.8 {
467                    // Slightly lower threshold for aggressive matching
468                    if best_match.as_ref().is_none_or(|&(_, s)| similarity > s) {
469                        best_match = Some((*new_pos, similarity));
470                    }
471                }
472            }
473
474            if let Some((best_pos, confidence)) = best_match {
475                if confidence >= config.min_confidence * 0.7 {
476                    // Final threshold check
477                    reuse_map.insert(
478                        *old_pos,
479                        ReuseStrategy {
480                            target_position: best_pos,
481                            reuse_type: ReuseType::StructuralEquivalent,
482                            confidence_score: confidence,
483                            position_adjustment: (best_pos as isize) - (*old_pos as isize),
484                        },
485                    );
486                    self.analysis_stats.reuse_candidates_found += 1;
487                }
488            }
489        }
490    }
491
492    /// Validate reuse candidates to ensure correctness
493    fn validate_reuse_candidates(
494        &mut self,
495        candidates: HashMap<usize, ReuseStrategy>,
496        old_tree: &Node,
497        new_tree: &Node,
498        config: &ReuseConfig,
499    ) -> HashMap<usize, ReuseStrategy> {
500        let mut validated = HashMap::new();
501
502        for (old_pos, strategy) in candidates {
503            if self.validate_reuse_strategy(&strategy, old_tree, new_tree, config) {
504                validated.insert(old_pos, strategy);
505                self.analysis_stats.validation_passes += 1;
506            } else {
507                self.analysis_stats.validation_failures += 1;
508            }
509        }
510
511        validated
512    }
513
514    /// Calculate structural hash for fast comparison
515    fn calculate_structural_hash(&self, node: &Node) -> u64 {
516        let mut hasher = DefaultHasher::new();
517
518        // Hash node kind discriminant
519        std::mem::discriminant(&node.kind).hash(&mut hasher);
520
521        // Hash structural properties based on node type
522        match &node.kind {
523            NodeKind::Program { statements } => {
524                statements.len().hash(&mut hasher);
525                "program".hash(&mut hasher);
526            }
527            NodeKind::Block { statements } => {
528                statements.len().hash(&mut hasher);
529                "block".hash(&mut hasher);
530            }
531            NodeKind::VariableDeclaration { declarator, .. } => {
532                declarator.hash(&mut hasher);
533                "vardecl".hash(&mut hasher);
534            }
535            NodeKind::Binary { op, .. } => {
536                op.hash(&mut hasher);
537                "binary".hash(&mut hasher);
538            }
539            NodeKind::Unary { op, .. } => {
540                op.hash(&mut hasher);
541                "unary".hash(&mut hasher);
542            }
543            NodeKind::FunctionCall { name, args } => {
544                name.hash(&mut hasher);
545                args.len().hash(&mut hasher);
546                "funccall".hash(&mut hasher);
547            }
548            NodeKind::Number { .. } => "number".hash(&mut hasher),
549            NodeKind::String { interpolated, .. } => {
550                interpolated.hash(&mut hasher);
551                "string".hash(&mut hasher);
552            }
553            NodeKind::Variable { sigil, .. } => {
554                sigil.hash(&mut hasher);
555                "variable".hash(&mut hasher);
556            }
557            NodeKind::Identifier { .. } => "identifier".hash(&mut hasher),
558            _ => "other".hash(&mut hasher),
559        }
560
561        hasher.finish()
562    }
563
564    /// Calculate content-based hash for value comparison
565    fn calculate_content_hash(&self, node: &Node) -> u64 {
566        let mut hasher = DefaultHasher::new();
567
568        match &node.kind {
569            NodeKind::Number { value } => value.hash(&mut hasher),
570            NodeKind::String { value, .. } => value.hash(&mut hasher),
571            NodeKind::Variable { name, .. } => name.hash(&mut hasher),
572            NodeKind::Identifier { name } => name.hash(&mut hasher),
573            _ => {
574                // For non-leaf nodes, hash is based on structure
575                self.calculate_structural_hash(node).hash(&mut hasher);
576            }
577        }
578
579        hasher.finish()
580    }
581
582    /// Get count of direct children for a node
583    fn get_children_count(&self, node: &Node) -> usize {
584        match &node.kind {
585            NodeKind::Program { statements } | NodeKind::Block { statements } => statements.len(),
586            NodeKind::VariableDeclaration { initializer, .. } => {
587                if initializer.is_some() { 2 } else { 1 } // variable + optional initializer
588            }
589            NodeKind::Binary { .. } => 2, // left + right
590            NodeKind::Unary { .. } => 1,  // operand
591            NodeKind::FunctionCall { args, .. } => args.len(),
592            NodeKind::If { elsif_branches, else_branch, .. } => {
593                2 + elsif_branches.len() * 2 + if else_branch.is_some() { 1 } else { 0 }
594            }
595            _ => 0, // Leaf nodes
596        }
597    }
598
599    /// Calculate confidence score for a potential match
600    fn calculate_match_confidence(
601        &self,
602        old_info: &NodeAnalysisInfo,
603        new_info: &NodeAnalysisInfo,
604    ) -> f64 {
605        let mut confidence = 0.0f64;
606
607        // Structural match bonus
608        if old_info.structural_hash == new_info.structural_hash {
609            confidence += 0.4;
610        }
611
612        // Content match bonus
613        if old_info.content_hash == new_info.content_hash {
614            confidence += 0.3;
615        }
616
617        // Children count match bonus
618        if old_info.children_count == new_info.children_count {
619            confidence += 0.2;
620        }
621
622        // Depth similarity bonus
623        let depth_diff = (old_info.depth as isize - new_info.depth as isize).abs();
624        if depth_diff == 0 {
625            confidence += 0.1;
626        } else if depth_diff <= 2 {
627            confidence += 0.05;
628        }
629
630        confidence.min(1.0)
631    }
632
633    /// Calculate structural similarity between two nodes
634    fn calculate_structural_similarity(&self, old_node: &Node, new_node: &Node) -> f64 {
635        // This is a more sophisticated comparison than hash equality
636        let mut similarity = 0.0;
637
638        // Base similarity from node type
639        if std::mem::discriminant(&old_node.kind) == std::mem::discriminant(&new_node.kind) {
640            similarity += 0.5;
641
642            // Additional similarity based on node-specific properties
643            match (&old_node.kind, &new_node.kind) {
644                (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) => {
645                    let len_similarity = 1.0
646                        - ((s1.len() as f64 - s2.len() as f64).abs()
647                            / (s1.len().max(s2.len()) as f64));
648                    similarity += 0.3 * len_similarity;
649                }
650                (NodeKind::Binary { op: op1, .. }, NodeKind::Binary { op: op2, .. }) => {
651                    if op1 == op2 {
652                        similarity += 0.4;
653                    }
654                }
655                (
656                    NodeKind::FunctionCall { name: n1, args: a1 },
657                    NodeKind::FunctionCall { name: n2, args: a2 },
658                ) => {
659                    if n1 == n2 {
660                        similarity += 0.3;
661                    }
662                    let arg_similarity = 1.0
663                        - ((a1.len() as f64 - a2.len() as f64).abs()
664                            / (a1.len().max(a2.len()) as f64));
665                    similarity += 0.2 * arg_similarity;
666                }
667                _ => {
668                    similarity += 0.2; // Generic bonus for same type
669                }
670            }
671        }
672
673        similarity.min(1.0)
674    }
675
676    /// Check if two nodes are compatible for content updates
677    fn are_compatible_for_content_update(&self, old_node: &Node, new_node: &Node) -> bool {
678        match (&old_node.kind, &new_node.kind) {
679            (NodeKind::Number { .. }, NodeKind::Number { .. }) => true,
680            (
681                NodeKind::String { interpolated: i1, .. },
682                NodeKind::String { interpolated: i2, .. },
683            ) => i1 == i2,
684            (NodeKind::Variable { sigil: s1, .. }, NodeKind::Variable { sigil: s2, .. }) => {
685                s1 == s2
686            }
687            (NodeKind::Identifier { .. }, NodeKind::Identifier { .. }) => true,
688            _ => false,
689        }
690    }
691
692    /// Validate a reuse strategy for correctness
693    fn validate_reuse_strategy(
694        &self,
695        _strategy: &ReuseStrategy,
696        _old_tree: &Node,
697        _new_tree: &Node,
698        _config: &ReuseConfig,
699    ) -> bool {
700        // Implement validation logic:
701        // - Check that reused nodes maintain parent-child relationships
702        // - Verify position adjustments are reasonable
703        // - Ensure content updates are semantically valid
704        // For now, accept all strategies (can be enhanced with specific validation)
705        true
706    }
707
708    /// Count total nodes in a tree
709    fn count_nodes(&self, node: &Node) -> usize {
710        let mut count = 1;
711
712        match &node.kind {
713            NodeKind::Program { statements } | NodeKind::Block { statements } => {
714                for stmt in statements {
715                    count += self.count_nodes(stmt);
716                }
717            }
718            NodeKind::VariableDeclaration { variable, initializer, .. } => {
719                count += self.count_nodes(variable);
720                if let Some(init) = initializer {
721                    count += self.count_nodes(init);
722                }
723            }
724            NodeKind::Binary { left, right, .. } => {
725                count += self.count_nodes(left);
726                count += self.count_nodes(right);
727            }
728            NodeKind::Unary { operand, .. } => {
729                count += self.count_nodes(operand);
730            }
731            NodeKind::FunctionCall { args, .. } => {
732                for arg in args {
733                    count += self.count_nodes(arg);
734                }
735            }
736            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
737                count += self.count_nodes(condition);
738                count += self.count_nodes(then_branch);
739                for (cond, branch) in elsif_branches {
740                    count += self.count_nodes(cond);
741                    count += self.count_nodes(branch);
742                }
743                if let Some(branch) = else_branch {
744                    count += self.count_nodes(branch);
745                }
746            }
747            _ => {} // Leaf nodes
748        }
749
750        count
751    }
752}
753
754/// Comprehensive analysis of a tree structure
755#[derive(Debug)]
756struct TreeAnalysis {
757    node_info: HashMap<usize, NodeAnalysisInfo>,
758}
759
760impl TreeAnalysis {
761    fn new() -> Self {
762        TreeAnalysis { node_info: HashMap::new() }
763    }
764
765    fn add_node_info(&mut self, position: usize, info: NodeAnalysisInfo) {
766        self.node_info.insert(position, info);
767    }
768}
769
770/// Detailed information about a node for reuse analysis
771#[derive(Debug, Clone)]
772struct NodeAnalysisInfo {
773    node: Node,
774    structural_hash: u64,
775    content_hash: u64,
776    depth: usize,
777    children_count: usize,
778}
779
780/// Strategy for reusing a node from old tree to new tree
781#[derive(Debug, Clone)]
782pub struct ReuseStrategy {
783    pub target_position: usize,
784    pub reuse_type: ReuseType,
785    pub confidence_score: f64,
786    pub position_adjustment: isize,
787}
788
789/// Result of reuse analysis with comprehensive metrics
790#[derive(Debug)]
791pub struct ReuseAnalysisResult {
792    pub reuse_map: HashMap<usize, ReuseStrategy>,
793    pub total_old_nodes: usize,
794    pub total_new_nodes: usize,
795    pub reused_nodes: usize,
796    pub reuse_percentage: f64,
797    pub analysis_stats: ReuseAnalysisStats,
798}
799
800impl ReuseAnalysisResult {
801    /// Check if reuse analysis achieved target efficiency
802    pub fn meets_efficiency_target(&self, target_percentage: f64) -> bool {
803        self.reuse_percentage >= target_percentage
804    }
805
806    /// Get a summary of the analysis performance
807    pub fn performance_summary(&self) -> String {
808        format!(
809            "Reuse Analysis: {:.1}% efficiency ({}/{} nodes), {} structural matches, {} position adjustments",
810            self.reuse_percentage,
811            self.reused_nodes,
812            self.total_old_nodes,
813            self.analysis_stats.structural_matches,
814            self.analysis_stats.position_adjustments
815        )
816    }
817}
818
819#[cfg(test)]
820mod tests {
821    use super::*;
822    use crate::{SourceLocation, ast::Node};
823
824    #[test]
825    fn test_advanced_reuse_analyzer_creation() {
826        let analyzer = AdvancedReuseAnalyzer::new();
827        assert_eq!(analyzer.analysis_stats.nodes_analyzed, 0);
828    }
829
830    #[test]
831    fn test_structural_hash_calculation() {
832        let analyzer = AdvancedReuseAnalyzer::new();
833
834        // Create sample nodes
835        let node1 = Node::new(
836            NodeKind::Number { value: "42".to_string() },
837            SourceLocation { start: 0, end: 2 },
838        );
839
840        let node2 = Node::new(
841            NodeKind::Number { value: "99".to_string() },
842            SourceLocation { start: 0, end: 2 },
843        );
844
845        let hash1 = analyzer.calculate_structural_hash(&node1);
846        let hash2 = analyzer.calculate_structural_hash(&node2);
847
848        // Same structure should have same hash
849        assert_eq!(hash1, hash2, "Numbers should have same structural hash regardless of value");
850    }
851
852    #[test]
853    fn test_content_hash_differs_for_different_values() {
854        let analyzer = AdvancedReuseAnalyzer::new();
855
856        let node1 = Node::new(
857            NodeKind::Number { value: "42".to_string() },
858            SourceLocation { start: 0, end: 2 },
859        );
860
861        let node2 = Node::new(
862            NodeKind::Number { value: "99".to_string() },
863            SourceLocation { start: 0, end: 2 },
864        );
865
866        let hash1 = analyzer.calculate_content_hash(&node1);
867        let hash2 = analyzer.calculate_content_hash(&node2);
868
869        assert_ne!(hash1, hash2, "Different values should have different content hashes");
870    }
871
872    #[test]
873    fn test_children_count_calculation() {
874        let analyzer = AdvancedReuseAnalyzer::new();
875
876        // Leaf node
877        let leaf = Node::new(
878            NodeKind::Number { value: "42".to_string() },
879            SourceLocation { start: 0, end: 2 },
880        );
881        assert_eq!(analyzer.get_children_count(&leaf), 0);
882
883        // Binary node
884        let binary = Node::new(
885            NodeKind::Binary {
886                op: "+".to_string(),
887                left: Box::new(leaf.clone()),
888                right: Box::new(leaf.clone()),
889            },
890            SourceLocation { start: 0, end: 5 },
891        );
892        assert_eq!(analyzer.get_children_count(&binary), 2);
893
894        // Program node
895        let program = Node::new(
896            NodeKind::Program { statements: vec![binary] },
897            SourceLocation { start: 0, end: 5 },
898        );
899        assert_eq!(analyzer.get_children_count(&program), 1);
900    }
901
902    #[test]
903    fn test_reuse_config_defaults() {
904        let config = ReuseConfig::default();
905        assert_eq!(config.min_confidence, 0.75);
906        assert_eq!(config.max_position_shift, 1000);
907        assert!(config.aggressive_structural_matching);
908        assert!(config.enable_content_reuse);
909        assert_eq!(config.max_analysis_depth, 10);
910    }
911
912    #[test]
913    fn test_node_compatibility_for_content_update() {
914        let analyzer = AdvancedReuseAnalyzer::new();
915
916        let num1 = Node::new(
917            NodeKind::Number { value: "42".to_string() },
918            SourceLocation { start: 0, end: 2 },
919        );
920
921        let num2 = Node::new(
922            NodeKind::Number { value: "99".to_string() },
923            SourceLocation { start: 0, end: 2 },
924        );
925
926        let str1 = Node::new(
927            NodeKind::String { value: "hello".to_string(), interpolated: false },
928            SourceLocation { start: 0, end: 7 },
929        );
930
931        // Same type nodes should be compatible
932        assert!(analyzer.are_compatible_for_content_update(&num1, &num2));
933
934        // Different type nodes should not be compatible
935        assert!(!analyzer.are_compatible_for_content_update(&num1, &str1));
936    }
937}