1use perl_parser_core::{
22 ast::{Node, NodeKind},
23 edit::EditSet,
24 position::{Position, Range},
25};
26use std::collections::{HashMap, HashSet};
27use std::hash::{DefaultHasher, Hash, Hasher};
28
29#[derive(Debug)]
31pub struct AdvancedReuseAnalyzer {
32 node_hashes: HashMap<usize, u64>,
34 position_map: HashMap<usize, Vec<NodeCandidate>>,
36 affected_nodes: HashSet<usize>,
38 pub analysis_stats: ReuseAnalysisStats,
40}
41
42#[derive(Debug, Default, Clone)]
44pub struct ReuseAnalysisStats {
45 pub nodes_analyzed: usize,
46 pub structural_matches: usize,
47 pub content_matches: usize,
48 pub position_adjustments: usize,
49 pub reuse_candidates_found: usize,
50 pub validation_passes: usize,
51 pub validation_failures: usize,
52}
53
54#[derive(Debug, Clone)]
56#[allow(dead_code)] struct NodeCandidate {
58 node: Node,
59 structural_hash: u64,
60 confidence_score: f64,
61 position_delta: isize,
62 reuse_type: ReuseType,
63}
64
65#[derive(Debug, Clone, PartialEq)]
67#[allow(dead_code)] pub enum ReuseType {
69 Direct,
71 PositionShift,
73 ContentUpdate,
75 StructuralEquivalent,
77}
78
79#[derive(Debug, Clone)]
81pub struct ReuseConfig {
82 pub min_confidence: f64,
84 pub max_position_shift: usize,
86 pub aggressive_structural_matching: bool,
88 pub enable_content_reuse: bool,
90 pub max_analysis_depth: usize,
92}
93
94impl Default for ReuseConfig {
95 fn default() -> Self {
96 ReuseConfig {
97 min_confidence: 0.75,
98 max_position_shift: 1000,
99 aggressive_structural_matching: true,
100 enable_content_reuse: true,
101 max_analysis_depth: 10,
102 }
103 }
104}
105
106impl Default for AdvancedReuseAnalyzer {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112impl AdvancedReuseAnalyzer {
113 pub fn new() -> Self {
115 AdvancedReuseAnalyzer {
116 node_hashes: HashMap::new(),
117 position_map: HashMap::new(),
118 affected_nodes: HashSet::new(),
119 analysis_stats: ReuseAnalysisStats::default(),
120 }
121 }
122
123 pub fn with_config(_config: ReuseConfig) -> Self {
125 Self::new()
127 }
128
129 pub fn analyze_reuse_opportunities(
134 &mut self,
135 old_tree: &Node,
136 new_tree: &Node,
137 edits: &EditSet,
138 config: &ReuseConfig,
139 ) -> ReuseAnalysisResult {
140 self.analysis_stats = ReuseAnalysisStats::default();
141
142 self.node_hashes.clear();
144 self.position_map.clear();
145 self.affected_nodes.clear();
146
147 let old_analysis = self.build_tree_analysis(old_tree, config);
149 let new_analysis = self.build_tree_analysis(new_tree, config);
150
151 self.identify_affected_nodes(old_tree, edits);
153
154 let mut reuse_map = HashMap::new();
156
157 self.find_direct_structural_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
159
160 self.find_position_shifted_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
162
163 if config.enable_content_reuse {
165 self.find_content_updated_matches(&old_analysis, &new_analysis, &mut reuse_map, config);
166 }
167
168 if config.aggressive_structural_matching {
170 self.find_aggressive_structural_matches(
171 &old_analysis,
172 &new_analysis,
173 &mut reuse_map,
174 config,
175 );
176 }
177
178 let validated_reuse_map =
180 self.validate_reuse_candidates(reuse_map, old_tree, new_tree, config);
181
182 let total_old_nodes = self.count_nodes(old_tree);
184 let total_new_nodes = self.count_nodes(new_tree);
185 let reused_nodes = validated_reuse_map.len();
186 let reuse_percentage = if total_old_nodes > 0 {
187 (reused_nodes as f64 / total_old_nodes as f64) * 100.0
188 } else {
189 0.0
190 };
191
192 ReuseAnalysisResult {
193 reuse_map: validated_reuse_map,
194 total_old_nodes,
195 total_new_nodes,
196 reused_nodes,
197 reuse_percentage,
198 analysis_stats: self.analysis_stats.clone(),
199 }
200 }
201
202 fn build_tree_analysis(&mut self, tree: &Node, config: &ReuseConfig) -> TreeAnalysis {
204 let mut analysis = TreeAnalysis::new();
205 self.analyze_node_recursive(tree, &mut analysis, 0, config);
206 analysis
207 }
208
209 fn analyze_node_recursive(
211 &mut self,
212 node: &Node,
213 analysis: &mut TreeAnalysis,
214 depth: usize,
215 config: &ReuseConfig,
216 ) {
217 if depth > config.max_analysis_depth {
218 return;
219 }
220
221 self.analysis_stats.nodes_analyzed += 1;
222
223 let structural_hash = self.calculate_structural_hash(node);
225 self.node_hashes.insert(node.location.start, structural_hash);
226
227 let node_info = NodeAnalysisInfo {
229 node: node.clone(),
230 structural_hash,
231 depth,
232 children_count: self.get_children_count(node),
233 content_hash: self.calculate_content_hash(node),
234 };
235
236 analysis.add_node_info(node.location.start, node_info);
237
238 let candidate = NodeCandidate {
240 node: node.clone(),
241 structural_hash,
242 confidence_score: 1.0, position_delta: 0,
244 reuse_type: ReuseType::Direct,
245 };
246
247 self.position_map.entry(node.location.start).or_default().push(candidate);
248
249 match &node.kind {
251 NodeKind::Program { statements } | NodeKind::Block { statements } => {
252 for stmt in statements {
253 self.analyze_node_recursive(stmt, analysis, depth + 1, config);
254 }
255 }
256 NodeKind::VariableDeclaration { variable, initializer, .. } => {
257 self.analyze_node_recursive(variable, analysis, depth + 1, config);
258 if let Some(init) = initializer {
259 self.analyze_node_recursive(init, analysis, depth + 1, config);
260 }
261 }
262 NodeKind::Binary { left, right, .. } => {
263 self.analyze_node_recursive(left, analysis, depth + 1, config);
264 self.analyze_node_recursive(right, analysis, depth + 1, config);
265 }
266 NodeKind::Unary { operand, .. } => {
267 self.analyze_node_recursive(operand, analysis, depth + 1, config);
268 }
269 NodeKind::FunctionCall { args, .. } => {
270 for arg in args {
271 self.analyze_node_recursive(arg, analysis, depth + 1, config);
272 }
273 }
274 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
275 self.analyze_node_recursive(condition, analysis, depth + 1, config);
276 self.analyze_node_recursive(then_branch, analysis, depth + 1, config);
277 for (cond, branch) in elsif_branches {
278 self.analyze_node_recursive(cond, analysis, depth + 1, config);
279 self.analyze_node_recursive(branch, analysis, depth + 1, config);
280 }
281 if let Some(branch) = else_branch {
282 self.analyze_node_recursive(branch, analysis, depth + 1, config);
283 }
284 }
285 _ => {} }
287 }
288
289 fn identify_affected_nodes(&mut self, tree: &Node, edits: &EditSet) {
291 for range in edits.coalesced_affected_ranges() {
292 self.mark_affected_nodes_in_range(tree, range.start.byte, range.end.byte);
293 }
294 }
295
296 fn mark_affected_nodes_in_range(&mut self, node: &Node, start: usize, end: usize) {
298 let node_range = Range::from(node.location);
299 let edit_range = Range::new(Position::new(start, 0, 0), Position::new(end, 0, 0));
300
301 if !node_range.overlaps(&edit_range) {
302 return;
303 }
304 self.affected_nodes.insert(node.location.start);
305
306 match &node.kind {
308 NodeKind::Program { statements } | NodeKind::Block { statements } => {
309 for stmt in statements {
310 self.mark_affected_nodes_in_range(stmt, start, end);
311 }
312 }
313 NodeKind::VariableDeclaration { variable, initializer, .. } => {
314 self.mark_affected_nodes_in_range(variable, start, end);
315 if let Some(init) = initializer {
316 self.mark_affected_nodes_in_range(init, start, end);
317 }
318 }
319 NodeKind::Binary { left, right, .. } => {
320 self.mark_affected_nodes_in_range(left, start, end);
321 self.mark_affected_nodes_in_range(right, start, end);
322 }
323 _ => {} }
325 }
326
327 fn find_direct_structural_matches(
329 &mut self,
330 old_analysis: &TreeAnalysis,
331 new_analysis: &TreeAnalysis,
332 reuse_map: &mut HashMap<usize, ReuseStrategy>,
333 config: &ReuseConfig,
334 ) {
335 let mut used_target_positions: HashSet<usize> =
336 reuse_map.values().map(|strategy| strategy.target_position).collect();
337
338 for (old_pos, old_info) in &old_analysis.node_info {
339 if self.affected_nodes.contains(old_pos) {
341 continue;
342 }
343
344 for (new_pos, new_info) in &new_analysis.node_info {
346 if used_target_positions.contains(new_pos) {
347 continue;
348 }
349
350 if old_info.structural_hash == new_info.structural_hash
351 && old_info.children_count == new_info.children_count
352 {
353 let confidence = self.calculate_match_confidence(old_info, new_info);
354 if confidence >= config.min_confidence {
355 reuse_map.insert(
356 *old_pos,
357 ReuseStrategy {
358 target_position: *new_pos,
359 reuse_type: ReuseType::Direct,
360 confidence_score: confidence,
361 position_adjustment: (*new_pos as isize) - (*old_pos as isize),
362 },
363 );
364 used_target_positions.insert(*new_pos);
365 self.analysis_stats.structural_matches += 1;
366 break; }
368 }
369 }
370 }
371 }
372
373 fn find_position_shifted_matches(
375 &mut self,
376 old_analysis: &TreeAnalysis,
377 new_analysis: &TreeAnalysis,
378 reuse_map: &mut HashMap<usize, ReuseStrategy>,
379 config: &ReuseConfig,
380 ) {
381 let mut used_target_positions: HashSet<usize> =
382 reuse_map.values().map(|strategy| strategy.target_position).collect();
383
384 for (old_pos, old_info) in &old_analysis.node_info {
385 if reuse_map.contains_key(old_pos) {
386 continue;
387 }
388
389 let mut best_match: Option<(usize, f64)> = None;
390 for (new_pos, new_info) in &new_analysis.node_info {
391 if used_target_positions.contains(new_pos) {
392 continue;
393 }
394
395 if old_info.content_hash == new_info.content_hash
396 && old_info.structural_hash == new_info.structural_hash
397 {
398 let position_shift = (*new_pos as isize - *old_pos as isize).unsigned_abs();
399 if !self.is_position_shift_candidate_safe(
400 old_info,
401 new_info,
402 position_shift,
403 config,
404 ) {
405 continue;
406 }
407
408 let confidence = self.calculate_shifted_match_confidence(
409 old_info,
410 new_info,
411 position_shift,
412 config,
413 );
414 if confidence >= config.min_confidence
415 && best_match
416 .as_ref()
417 .is_none_or(|&(_, best_score)| confidence > best_score)
418 {
419 best_match = Some((*new_pos, confidence));
420 }
421 }
422 }
423
424 if let Some((best_pos, confidence)) = best_match {
425 reuse_map.insert(
426 *old_pos,
427 ReuseStrategy {
428 target_position: best_pos,
429 reuse_type: ReuseType::PositionShift,
430 confidence_score: confidence,
431 position_adjustment: (best_pos as isize) - (*old_pos as isize),
432 },
433 );
434 used_target_positions.insert(best_pos);
435 self.analysis_stats.position_adjustments += 1;
436 }
437 }
438 }
439
440 fn find_content_updated_matches(
442 &mut self,
443 old_analysis: &TreeAnalysis,
444 new_analysis: &TreeAnalysis,
445 reuse_map: &mut HashMap<usize, ReuseStrategy>,
446 config: &ReuseConfig,
447 ) {
448 for (old_pos, old_info) in &old_analysis.node_info {
449 if reuse_map.contains_key(old_pos) {
450 continue;
451 }
452
453 if old_info.children_count == 0 {
455 for (new_pos, new_info) in &new_analysis.node_info {
456 if old_info.structural_hash == new_info.structural_hash
457 && old_info.content_hash != new_info.content_hash
458 && self.are_compatible_for_content_update(&old_info.node, &new_info.node)
459 {
460 let confidence = 0.8; if confidence >= config.min_confidence {
462 reuse_map.insert(
463 *old_pos,
464 ReuseStrategy {
465 target_position: *new_pos,
466 reuse_type: ReuseType::ContentUpdate,
467 confidence_score: confidence,
468 position_adjustment: (*new_pos as isize) - (*old_pos as isize),
469 },
470 );
471 self.analysis_stats.content_matches += 1;
472 break;
473 }
474 }
475 }
476 }
477 }
478 }
479
480 fn find_aggressive_structural_matches(
482 &mut self,
483 old_analysis: &TreeAnalysis,
484 new_analysis: &TreeAnalysis,
485 reuse_map: &mut HashMap<usize, ReuseStrategy>,
486 config: &ReuseConfig,
487 ) {
488 for (old_pos, old_info) in &old_analysis.node_info {
491 if reuse_map.contains_key(old_pos) {
492 continue;
493 }
494
495 let mut best_match: Option<(usize, f64)> = None;
496
497 for (new_pos, new_info) in &new_analysis.node_info {
498 if old_info.children_count > 0
499 && self.get_children_count(&old_info.node)
500 != self.get_children_count(&new_info.node)
501 {
502 continue;
503 }
504
505 let similarity =
507 self.calculate_structural_similarity(&old_info.node, &new_info.node);
508 if similarity >= config.min_confidence * 0.8 {
509 if best_match.as_ref().is_none_or(|&(_, s)| similarity > s) {
511 best_match = Some((*new_pos, similarity));
512 }
513 }
514 }
515
516 if let Some((best_pos, confidence)) = best_match {
517 if confidence >= config.min_confidence * 0.7 {
518 reuse_map.insert(
520 *old_pos,
521 ReuseStrategy {
522 target_position: best_pos,
523 reuse_type: ReuseType::StructuralEquivalent,
524 confidence_score: confidence,
525 position_adjustment: (best_pos as isize) - (*old_pos as isize),
526 },
527 );
528 self.analysis_stats.reuse_candidates_found += 1;
529 }
530 }
531 }
532 }
533
534 fn validate_reuse_candidates(
536 &mut self,
537 candidates: HashMap<usize, ReuseStrategy>,
538 old_tree: &Node,
539 new_tree: &Node,
540 config: &ReuseConfig,
541 ) -> HashMap<usize, ReuseStrategy> {
542 let mut validated = HashMap::new();
543
544 for (old_pos, strategy) in candidates {
545 if self.validate_reuse_strategy(&strategy, old_tree, new_tree, config) {
546 validated.insert(old_pos, strategy);
547 self.analysis_stats.validation_passes += 1;
548 } else {
549 self.analysis_stats.validation_failures += 1;
550 }
551 }
552
553 validated
554 }
555
556 fn calculate_structural_hash(&self, node: &Node) -> u64 {
558 let mut hasher = DefaultHasher::new();
559
560 std::mem::discriminant(&node.kind).hash(&mut hasher);
562
563 match &node.kind {
565 NodeKind::Program { statements } => {
566 statements.len().hash(&mut hasher);
567 "program".hash(&mut hasher);
568 }
569 NodeKind::Block { statements } => {
570 statements.len().hash(&mut hasher);
571 "block".hash(&mut hasher);
572 }
573 NodeKind::VariableDeclaration { declarator, .. } => {
574 declarator.hash(&mut hasher);
575 "vardecl".hash(&mut hasher);
576 }
577 NodeKind::Binary { op, .. } => {
578 op.hash(&mut hasher);
579 "binary".hash(&mut hasher);
580 }
581 NodeKind::Unary { op, .. } => {
582 op.hash(&mut hasher);
583 "unary".hash(&mut hasher);
584 }
585 NodeKind::FunctionCall { name, args } => {
586 name.hash(&mut hasher);
587 args.len().hash(&mut hasher);
588 "funccall".hash(&mut hasher);
589 }
590 NodeKind::Number { .. } => "number".hash(&mut hasher),
591 NodeKind::String { interpolated, .. } => {
592 interpolated.hash(&mut hasher);
593 "string".hash(&mut hasher);
594 }
595 NodeKind::Variable { sigil, .. } => {
596 sigil.hash(&mut hasher);
597 "variable".hash(&mut hasher);
598 }
599 NodeKind::Identifier { .. } => "identifier".hash(&mut hasher),
600 _ => "other".hash(&mut hasher),
601 }
602
603 hasher.finish()
604 }
605
606 fn calculate_content_hash(&self, node: &Node) -> u64 {
608 let mut hasher = DefaultHasher::new();
609
610 match &node.kind {
611 NodeKind::Number { value } => value.hash(&mut hasher),
612 NodeKind::String { value, .. } => value.hash(&mut hasher),
613 NodeKind::Variable { name, .. } => name.hash(&mut hasher),
614 NodeKind::Identifier { name } => name.hash(&mut hasher),
615 _ => {
616 self.calculate_structural_hash(node).hash(&mut hasher);
618 }
619 }
620
621 hasher.finish()
622 }
623
624 fn get_children_count(&self, node: &Node) -> usize {
626 match &node.kind {
627 NodeKind::Program { statements } | NodeKind::Block { statements } => statements.len(),
628 NodeKind::VariableDeclaration { initializer, .. } => {
629 if initializer.is_some() { 2 } else { 1 } }
631 NodeKind::Binary { .. } => 2, NodeKind::Unary { .. } => 1, NodeKind::FunctionCall { args, .. } => args.len(),
634 NodeKind::If { elsif_branches, else_branch, .. } => {
635 2 + elsif_branches.len() * 2 + if else_branch.is_some() { 1 } else { 0 }
636 }
637 _ => 0, }
639 }
640
641 fn calculate_match_confidence(
643 &self,
644 old_info: &NodeAnalysisInfo,
645 new_info: &NodeAnalysisInfo,
646 ) -> f64 {
647 let mut confidence = 0.0f64;
648
649 if old_info.structural_hash == new_info.structural_hash {
651 confidence += 0.4;
652 }
653
654 if old_info.content_hash == new_info.content_hash {
656 confidence += 0.3;
657 }
658
659 if old_info.children_count == new_info.children_count {
661 confidence += 0.2;
662 }
663
664 let depth_diff = (old_info.depth as isize - new_info.depth as isize).abs();
666 if depth_diff == 0 {
667 confidence += 0.1;
668 } else if depth_diff <= 2 {
669 confidence += 0.05;
670 }
671
672 confidence.min(1.0)
673 }
674
675 fn calculate_structural_similarity(&self, old_node: &Node, new_node: &Node) -> f64 {
677 let mut similarity = 0.0;
679
680 if std::mem::discriminant(&old_node.kind) == std::mem::discriminant(&new_node.kind) {
682 similarity += 0.5;
683
684 match (&old_node.kind, &new_node.kind) {
686 (NodeKind::Program { statements: s1 }, NodeKind::Program { statements: s2 }) => {
687 let len_similarity = 1.0
688 - ((s1.len() as f64 - s2.len() as f64).abs()
689 / (s1.len().max(s2.len()) as f64));
690 similarity += 0.3 * len_similarity;
691 }
692 (NodeKind::Binary { op: op1, .. }, NodeKind::Binary { op: op2, .. }) => {
693 if op1 == op2 {
694 similarity += 0.4;
695 }
696 }
697 (
698 NodeKind::FunctionCall { name: n1, args: a1 },
699 NodeKind::FunctionCall { name: n2, args: a2 },
700 ) => {
701 if n1 == n2 {
702 similarity += 0.3;
703 }
704 let arg_similarity = 1.0
705 - ((a1.len() as f64 - a2.len() as f64).abs()
706 / (a1.len().max(a2.len()) as f64));
707 similarity += 0.2 * arg_similarity;
708 }
709 _ => {
710 similarity += 0.2; }
712 }
713 }
714
715 similarity.min(1.0)
716 }
717
718 fn are_compatible_for_content_update(&self, old_node: &Node, new_node: &Node) -> bool {
720 match (&old_node.kind, &new_node.kind) {
721 (NodeKind::Number { .. }, NodeKind::Number { .. }) => true,
722 (
723 NodeKind::String { interpolated: i1, .. },
724 NodeKind::String { interpolated: i2, .. },
725 ) => i1 == i2,
726 (NodeKind::Variable { sigil: s1, .. }, NodeKind::Variable { sigil: s2, .. }) => {
727 s1 == s2
728 }
729 (NodeKind::Identifier { .. }, NodeKind::Identifier { .. }) => true,
730 _ => false,
731 }
732 }
733
734 fn is_position_shift_candidate_safe(
735 &self,
736 old_info: &NodeAnalysisInfo,
737 new_info: &NodeAnalysisInfo,
738 position_shift: usize,
739 config: &ReuseConfig,
740 ) -> bool {
741 if position_shift > config.max_position_shift {
742 return false;
743 }
744
745 let old_is_container = self.is_container_node(&old_info.node);
746 let new_is_container = self.is_container_node(&new_info.node);
747 if old_is_container != new_is_container {
748 return false;
749 }
750
751 if old_is_container {
752 let max_container_shift = config.max_position_shift / 4;
753 if position_shift > max_container_shift {
754 return false;
755 }
756
757 let depth_diff = (old_info.depth as isize - new_info.depth as isize).unsigned_abs();
758 if depth_diff > 1 || old_info.children_count != new_info.children_count {
759 return false;
760 }
761 }
762
763 true
764 }
765
766 fn calculate_shifted_match_confidence(
767 &self,
768 old_info: &NodeAnalysisInfo,
769 new_info: &NodeAnalysisInfo,
770 position_shift: usize,
771 config: &ReuseConfig,
772 ) -> f64 {
773 let base_confidence = self.calculate_match_confidence(old_info, new_info);
774 let shift_ratio = position_shift as f64 / config.max_position_shift.max(1) as f64;
775
776 let shift_penalty = if self.is_container_node(&old_info.node) {
777 0.45 * shift_ratio.min(1.0)
778 } else if self.is_content_stable_leaf(&old_info.node) {
779 0.12 * shift_ratio.min(1.0)
780 } else {
781 0.30 * shift_ratio.min(1.0)
782 };
783
784 (base_confidence - shift_penalty).clamp(0.0, 1.0)
785 }
786
787 fn is_content_stable_leaf(&self, node: &Node) -> bool {
788 matches!(
789 node.kind,
790 NodeKind::Number { .. }
791 | NodeKind::String { .. }
792 | NodeKind::Identifier { .. }
793 | NodeKind::Variable { .. }
794 )
795 }
796
797 fn is_container_node(&self, node: &Node) -> bool {
798 matches!(node.kind, NodeKind::Program { .. } | NodeKind::Block { .. } | NodeKind::If { .. })
799 }
800
801 fn validate_reuse_strategy(
803 &self,
804 _strategy: &ReuseStrategy,
805 _old_tree: &Node,
806 _new_tree: &Node,
807 _config: &ReuseConfig,
808 ) -> bool {
809 true
815 }
816
817 fn count_nodes(&self, node: &Node) -> usize {
819 let mut count = 1;
820
821 match &node.kind {
822 NodeKind::Program { statements } | NodeKind::Block { statements } => {
823 for stmt in statements {
824 count += self.count_nodes(stmt);
825 }
826 }
827 NodeKind::VariableDeclaration { variable, initializer, .. } => {
828 count += self.count_nodes(variable);
829 if let Some(init) = initializer {
830 count += self.count_nodes(init);
831 }
832 }
833 NodeKind::Binary { left, right, .. } => {
834 count += self.count_nodes(left);
835 count += self.count_nodes(right);
836 }
837 NodeKind::Unary { operand, .. } => {
838 count += self.count_nodes(operand);
839 }
840 NodeKind::FunctionCall { args, .. } => {
841 for arg in args {
842 count += self.count_nodes(arg);
843 }
844 }
845 NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
846 count += self.count_nodes(condition);
847 count += self.count_nodes(then_branch);
848 for (cond, branch) in elsif_branches {
849 count += self.count_nodes(cond);
850 count += self.count_nodes(branch);
851 }
852 if let Some(branch) = else_branch {
853 count += self.count_nodes(branch);
854 }
855 }
856 _ => {} }
858
859 count
860 }
861
862 pub fn map_old_position_to_new(&self, old_pos: usize, edits: &EditSet) -> usize {
878 let mut shift: isize = 0;
879 for edit in edits.edits() {
880 if old_pos < edit.start_byte {
881 break;
882 }
883 if old_pos < edit.old_end_byte {
884 return edit.new_end_byte;
886 }
887 shift += edit.byte_shift();
888 }
889 let signed = old_pos as isize + shift;
890 if signed < 0 { 0 } else { signed as usize }
891 }
892
893 pub fn try_register_match(
901 &self,
902 reuse_map: &mut HashMap<usize, ReuseStrategy>,
903 old_pos: usize,
904 new_pos: usize,
905 reuse_type: ReuseType,
906 confidence: f64,
907 ) -> bool {
908 if reuse_map.values().any(|s| s.target_position == new_pos) {
909 return false;
910 }
911 reuse_map.insert(
912 old_pos,
913 ReuseStrategy {
914 target_position: new_pos,
915 reuse_type,
916 confidence_score: confidence,
917 position_adjustment: (new_pos as isize) - (old_pos as isize),
918 },
919 );
920 true
921 }
922}
923
924#[derive(Debug)]
926struct TreeAnalysis {
927 node_info: HashMap<usize, NodeAnalysisInfo>,
928}
929
930impl TreeAnalysis {
931 fn new() -> Self {
932 TreeAnalysis { node_info: HashMap::new() }
933 }
934
935 fn add_node_info(&mut self, position: usize, info: NodeAnalysisInfo) {
936 self.node_info.insert(position, info);
937 }
938}
939
940#[derive(Debug, Clone)]
942struct NodeAnalysisInfo {
943 node: Node,
944 structural_hash: u64,
945 content_hash: u64,
946 depth: usize,
947 children_count: usize,
948}
949
950#[derive(Debug, Clone)]
952pub struct ReuseStrategy {
953 pub target_position: usize,
954 pub reuse_type: ReuseType,
955 pub confidence_score: f64,
956 pub position_adjustment: isize,
957}
958
959#[derive(Debug)]
961pub struct ReuseAnalysisResult {
962 pub reuse_map: HashMap<usize, ReuseStrategy>,
963 pub total_old_nodes: usize,
964 pub total_new_nodes: usize,
965 pub reused_nodes: usize,
966 pub reuse_percentage: f64,
967 pub analysis_stats: ReuseAnalysisStats,
968}
969
970impl ReuseAnalysisResult {
971 pub fn meets_efficiency_target(&self, target_percentage: f64) -> bool {
973 self.reuse_percentage >= target_percentage
974 }
975
976 pub fn performance_summary(&self) -> String {
978 format!(
979 "Reuse Analysis: {:.1}% efficiency ({}/{} nodes), {} structural matches, {} position adjustments",
980 self.reuse_percentage,
981 self.reused_nodes,
982 self.total_old_nodes,
983 self.analysis_stats.structural_matches,
984 self.analysis_stats.position_adjustments
985 )
986 }
987}
988
989#[cfg(test)]
990mod tests {
991 use super::*;
992 use perl_parser_core::{SourceLocation, ast::Node};
993
994 #[test]
995 fn test_advanced_reuse_analyzer_creation() {
996 let analyzer = AdvancedReuseAnalyzer::new();
997 assert_eq!(analyzer.analysis_stats.nodes_analyzed, 0);
998 }
999
1000 #[test]
1001 fn test_structural_hash_calculation() {
1002 let analyzer = AdvancedReuseAnalyzer::new();
1003
1004 let node1 = Node::new(
1006 NodeKind::Number { value: "42".to_string() },
1007 SourceLocation { start: 0, end: 2 },
1008 );
1009
1010 let node2 = Node::new(
1011 NodeKind::Number { value: "99".to_string() },
1012 SourceLocation { start: 0, end: 2 },
1013 );
1014
1015 let hash1 = analyzer.calculate_structural_hash(&node1);
1016 let hash2 = analyzer.calculate_structural_hash(&node2);
1017
1018 assert_eq!(hash1, hash2, "Numbers should have same structural hash regardless of value");
1020 }
1021
1022 #[test]
1023 fn test_content_hash_differs_for_different_values() {
1024 let analyzer = AdvancedReuseAnalyzer::new();
1025
1026 let node1 = Node::new(
1027 NodeKind::Number { value: "42".to_string() },
1028 SourceLocation { start: 0, end: 2 },
1029 );
1030
1031 let node2 = Node::new(
1032 NodeKind::Number { value: "99".to_string() },
1033 SourceLocation { start: 0, end: 2 },
1034 );
1035
1036 let hash1 = analyzer.calculate_content_hash(&node1);
1037 let hash2 = analyzer.calculate_content_hash(&node2);
1038
1039 assert_ne!(hash1, hash2, "Different values should have different content hashes");
1040 }
1041
1042 #[test]
1043 fn test_children_count_calculation() {
1044 let analyzer = AdvancedReuseAnalyzer::new();
1045
1046 let leaf = Node::new(
1048 NodeKind::Number { value: "42".to_string() },
1049 SourceLocation { start: 0, end: 2 },
1050 );
1051 assert_eq!(analyzer.get_children_count(&leaf), 0);
1052
1053 let binary = Node::new(
1055 NodeKind::Binary {
1056 op: "+".to_string(),
1057 left: Box::new(leaf.clone()),
1058 right: Box::new(leaf.clone()),
1059 },
1060 SourceLocation { start: 0, end: 5 },
1061 );
1062 assert_eq!(analyzer.get_children_count(&binary), 2);
1063
1064 let program = Node::new(
1066 NodeKind::Program { statements: vec![binary] },
1067 SourceLocation { start: 0, end: 5 },
1068 );
1069 assert_eq!(analyzer.get_children_count(&program), 1);
1070 }
1071
1072 #[test]
1073 fn test_reuse_config_defaults() {
1074 let config = ReuseConfig::default();
1075 assert_eq!(config.min_confidence, 0.75);
1076 assert_eq!(config.max_position_shift, 1000);
1077 assert!(config.aggressive_structural_matching);
1078 assert!(config.enable_content_reuse);
1079 assert_eq!(config.max_analysis_depth, 10);
1080 }
1081
1082 #[test]
1083 fn test_node_compatibility_for_content_update() {
1084 let analyzer = AdvancedReuseAnalyzer::new();
1085
1086 let num1 = Node::new(
1087 NodeKind::Number { value: "42".to_string() },
1088 SourceLocation { start: 0, end: 2 },
1089 );
1090
1091 let num2 = Node::new(
1092 NodeKind::Number { value: "99".to_string() },
1093 SourceLocation { start: 0, end: 2 },
1094 );
1095
1096 let str1 = Node::new(
1097 NodeKind::String { value: "hello".to_string(), interpolated: false },
1098 SourceLocation { start: 0, end: 7 },
1099 );
1100
1101 assert!(analyzer.are_compatible_for_content_update(&num1, &num2));
1103
1104 assert!(!analyzer.are_compatible_for_content_update(&num1, &str1));
1106 }
1107
1108 #[test]
1109 fn test_identify_affected_nodes_marks_only_overlapping_regions() {
1110 let mut analyzer = AdvancedReuseAnalyzer::new();
1111 let tree = Node::new(
1112 NodeKind::Program {
1113 statements: vec![
1114 Node::new(
1115 NodeKind::Number { value: "1".to_string() },
1116 SourceLocation { start: 0, end: 1 },
1117 ),
1118 Node::new(
1119 NodeKind::Number { value: "2".to_string() },
1120 SourceLocation { start: 20, end: 21 },
1121 ),
1122 ],
1123 },
1124 SourceLocation { start: 0, end: 21 },
1125 );
1126
1127 let mut edits = EditSet::new();
1128 edits.add(perl_parser_core::edit::Edit::new(
1129 0,
1130 2,
1131 2,
1132 Position::new(0, 0, 0),
1133 Position::new(2, 0, 2),
1134 Position::new(2, 0, 2),
1135 ));
1136
1137 analyzer.identify_affected_nodes(&tree, &edits);
1138
1139 assert!(analyzer.affected_nodes.contains(&0));
1140 assert!(!analyzer.affected_nodes.contains(&20));
1141 }
1142
1143 fn make_edit(start: usize, old_end: usize, new_end: usize) -> perl_parser_core::edit::Edit {
1146 perl_parser_core::edit::Edit::new(
1147 start,
1148 old_end,
1149 new_end,
1150 Position::new(start, 0, start as u32),
1151 Position::new(old_end, 0, old_end as u32),
1152 Position::new(new_end, 0, new_end as u32),
1153 )
1154 }
1155
1156 #[test]
1157 fn test_map_position_before_edit_unchanged() {
1158 let analyzer = AdvancedReuseAnalyzer::new();
1159 let mut edits = EditSet::new();
1160 edits.add(make_edit(10, 12, 14));
1161 assert_eq!(analyzer.map_old_position_to_new(5, &edits), 5);
1163 }
1164
1165 #[test]
1166 fn test_map_position_after_single_edit_shifted() {
1167 let analyzer = AdvancedReuseAnalyzer::new();
1168 let mut edits = EditSet::new();
1169 edits.add(make_edit(8, 10, 11)); assert_eq!(analyzer.map_old_position_to_new(13, &edits), 14);
1172 }
1173
1174 #[test]
1175 fn test_map_position_accumulates_two_shifts() {
1176 let analyzer = AdvancedReuseAnalyzer::new();
1177 let mut edits = EditSet::new();
1178 edits.add(make_edit(8, 10, 11)); edits.add(make_edit(24, 26, 27)); assert_eq!(analyzer.map_old_position_to_new(30, &edits), 32);
1182 }
1183
1184 #[test]
1189 fn test_map_position_at_edit_start_returns_new_end() {
1190 let analyzer = AdvancedReuseAnalyzer::new();
1191 let mut edits = EditSet::new();
1192 edits.add(make_edit(10, 20, 15));
1194 assert_eq!(analyzer.map_old_position_to_new(10, &edits), 15);
1196 }
1197
1198 #[test]
1199 fn test_map_position_at_edit_old_end_is_shifted() {
1200 let analyzer = AdvancedReuseAnalyzer::new();
1201 let mut edits = EditSet::new();
1202 edits.add(make_edit(10, 20, 25)); assert_eq!(analyzer.map_old_position_to_new(20, &edits), 25);
1205 }
1206
1207 #[test]
1208 fn test_map_position_inside_edit_returns_new_end() {
1209 let analyzer = AdvancedReuseAnalyzer::new();
1210 let mut edits = EditSet::new();
1211 edits.add(make_edit(10, 30, 20)); assert_eq!(analyzer.map_old_position_to_new(15, &edits), 20);
1213 }
1214
1215 #[test]
1216 fn test_map_position_between_two_edits_shifted_by_first_only() {
1217 let analyzer = AdvancedReuseAnalyzer::new();
1218 let mut edits = EditSet::new();
1219 edits.add(make_edit(5, 7, 8)); edits.add(make_edit(20, 22, 23)); assert_eq!(analyzer.map_old_position_to_new(10, &edits), 11);
1223 }
1224
1225 #[test]
1228 fn test_try_register_match_rejects_duplicate_new_pos() {
1229 let analyzer = AdvancedReuseAnalyzer::new();
1230 let mut reuse_map = HashMap::new();
1231
1232 let first = analyzer.try_register_match(&mut reuse_map, 0, 10, ReuseType::Direct, 0.9);
1233 assert!(first, "first registration should succeed");
1234
1235 let dup = analyzer.try_register_match(&mut reuse_map, 5, 10, ReuseType::Direct, 0.95);
1236 assert!(!dup, "second registration for same new_pos must be rejected");
1237 assert_eq!(reuse_map.len(), 1);
1239 assert_eq!(reuse_map[&0].target_position, 10);
1240 }
1241
1242 #[test]
1243 fn test_try_register_match_distinct_new_positions_both_succeed() {
1244 let analyzer = AdvancedReuseAnalyzer::new();
1245 let mut reuse_map = HashMap::new();
1246
1247 assert!(analyzer.try_register_match(&mut reuse_map, 0, 10, ReuseType::Direct, 0.9));
1248 assert!(
1249 analyzer.try_register_match(&mut reuse_map, 5, 20, ReuseType::PositionShift, 0.85,)
1250 );
1251 assert_eq!(reuse_map.len(), 2);
1252 }
1253}