codeprism_dev_tools/
diff_comparison.rs

1//! AST diff comparison utilities for parser development
2
3use anyhow::Result;
4use codeprism_core::ParseResult;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8/// AST diff analyzer for comparing parse results
9#[derive(Debug, Clone)]
10pub struct AstDiff {
11    config: DiffConfig,
12}
13
14/// Configuration for diff analysis
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct DiffConfig {
17    pub ignore_spans: bool,
18    pub ignore_node_ids: bool,
19    pub compare_edge_order: bool,
20    pub max_differences: usize,
21    pub similarity_threshold: f64,
22}
23
24impl Default for DiffConfig {
25    fn default() -> Self {
26        Self {
27            ignore_spans: false,
28            ignore_node_ids: true,
29            compare_edge_order: false,
30            max_differences: 100,
31            similarity_threshold: 0.8,
32        }
33    }
34}
35
36/// Type of difference found between ASTs
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub enum DiffType {
39    NodeAdded {
40        node_name: String,
41        node_type: String,
42        location: Option<String>,
43    },
44    NodeRemoved {
45        node_name: String,
46        node_type: String,
47        location: Option<String>,
48    },
49    NodeModified {
50        node_name: String,
51        old_type: String,
52        new_type: String,
53        changes: Vec<String>,
54    },
55    EdgeAdded {
56        source: String,
57        target: String,
58        edge_type: String,
59    },
60    EdgeRemoved {
61        source: String,
62        target: String,
63        edge_type: String,
64    },
65    EdgeModified {
66        source: String,
67        target: String,
68        old_type: String,
69        new_type: String,
70    },
71    StructuralChange {
72        description: String,
73        impact: StructuralImpact,
74    },
75}
76
77/// Impact level of structural changes
78#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
79pub enum StructuralImpact {
80    Low,
81    Medium,
82    High,
83    Critical,
84}
85
86/// Comprehensive diff report
87#[derive(Debug, Clone)]
88pub struct DiffReport {
89    pub differences: Vec<DiffType>,
90    pub statistics: DiffStatistics,
91    pub similarity_score: f64,
92    pub is_significant_change: bool,
93    pub summary: String,
94}
95
96/// Statistics about the differences found
97#[derive(Debug, Clone, Default)]
98pub struct DiffStatistics {
99    pub nodes_added: usize,
100    pub nodes_removed: usize,
101    pub nodes_modified: usize,
102    pub edges_added: usize,
103    pub edges_removed: usize,
104    pub edges_modified: usize,
105    pub total_differences: usize,
106    pub similarity_percentage: f64,
107}
108
109impl AstDiff {
110    /// Create a new AST diff analyzer
111    pub fn new() -> Self {
112        Self {
113            config: DiffConfig::default(),
114        }
115    }
116
117    /// Create with custom configuration
118    pub fn with_config(config: DiffConfig) -> Self {
119        Self { config }
120    }
121
122    /// Compare two parse results and generate a diff report
123    pub fn compare(
124        &self,
125        old_result: &ParseResult,
126        new_result: &ParseResult,
127        _source: &str,
128    ) -> Result<DiffReport> {
129        let mut differences = Vec::new();
130        let mut statistics = DiffStatistics::default();
131
132        // Compare nodes
133        self.compare_nodes(
134            &old_result.nodes,
135            &new_result.nodes,
136            &mut differences,
137            &mut statistics,
138        )?;
139
140        // Compare edges
141        self.compare_edges(
142            &old_result.edges,
143            &new_result.edges,
144            &mut differences,
145            &mut statistics,
146        )?;
147
148        // Compare tree structures
149        self.compare_tree_structures(&old_result.tree, &new_result.tree, &mut differences)?;
150
151        // Calculate similarity score
152        let similarity_score = self.calculate_similarity_score(&statistics, old_result, new_result);
153
154        // Determine if this is a significant change
155        let is_significant_change = similarity_score < self.config.similarity_threshold;
156
157        // Generate summary
158        let summary = self.generate_summary(&statistics, similarity_score);
159
160        statistics.total_differences = differences.len();
161        statistics.similarity_percentage = similarity_score * 100.0;
162
163        Ok(DiffReport {
164            differences,
165            statistics,
166            similarity_score,
167            is_significant_change,
168            summary,
169        })
170    }
171
172    /// Compare node lists
173    fn compare_nodes(
174        &self,
175        old_nodes: &[codeprism_core::Node],
176        new_nodes: &[codeprism_core::Node],
177        differences: &mut Vec<DiffType>,
178        statistics: &mut DiffStatistics,
179    ) -> Result<()> {
180        // Create maps for easier comparison
181        let old_node_map: HashMap<String, &codeprism_core::Node> = old_nodes
182            .iter()
183            .map(|n| (self.create_node_key(n), n))
184            .collect();
185
186        let new_node_map: HashMap<String, &codeprism_core::Node> = new_nodes
187            .iter()
188            .map(|n| (self.create_node_key(n), n))
189            .collect();
190
191        let old_keys: HashSet<_> = old_node_map.keys().collect();
192        let new_keys: HashSet<_> = new_node_map.keys().collect();
193
194        // Find added nodes
195        for key in new_keys.difference(&old_keys) {
196            if let Some(node) = new_node_map.get(*key) {
197                differences.push(DiffType::NodeAdded {
198                    node_name: node.name.clone(),
199                    node_type: format!("{:?}", node.kind),
200                    location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
201                });
202                statistics.nodes_added += 1;
203            }
204        }
205
206        // Find removed nodes
207        for key in old_keys.difference(&new_keys) {
208            if let Some(node) = old_node_map.get(*key) {
209                differences.push(DiffType::NodeRemoved {
210                    node_name: node.name.clone(),
211                    node_type: format!("{:?}", node.kind),
212                    location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
213                });
214                statistics.nodes_removed += 1;
215            }
216        }
217
218        // Find modified nodes (same key but different properties)
219        for key in old_keys.intersection(&new_keys) {
220            if let (Some(old_node), Some(new_node)) =
221                (old_node_map.get(*key), new_node_map.get(*key))
222            {
223                let changes = self.compare_node_properties(old_node, new_node);
224                if !changes.is_empty() {
225                    differences.push(DiffType::NodeModified {
226                        node_name: old_node.name.clone(),
227                        old_type: format!("{:?}", old_node.kind),
228                        new_type: format!("{:?}", new_node.kind),
229                        changes,
230                    });
231                    statistics.nodes_modified += 1;
232                }
233            }
234        }
235
236        Ok(())
237    }
238
239    /// Compare edge lists
240    fn compare_edges(
241        &self,
242        old_edges: &[codeprism_core::Edge],
243        new_edges: &[codeprism_core::Edge],
244        differences: &mut Vec<DiffType>,
245        statistics: &mut DiffStatistics,
246    ) -> Result<()> {
247        let old_edge_map: HashMap<String, &codeprism_core::Edge> = old_edges
248            .iter()
249            .map(|e| (self.create_edge_key(e), e))
250            .collect();
251
252        let new_edge_map: HashMap<String, &codeprism_core::Edge> = new_edges
253            .iter()
254            .map(|e| (self.create_edge_key(e), e))
255            .collect();
256
257        let old_keys: HashSet<_> = old_edge_map.keys().collect();
258        let new_keys: HashSet<_> = new_edge_map.keys().collect();
259
260        // Find added edges
261        for key in new_keys.difference(&old_keys) {
262            if let Some(edge) = new_edge_map.get(*key) {
263                differences.push(DiffType::EdgeAdded {
264                    source: edge.source.to_hex(),
265                    target: edge.target.to_hex(),
266                    edge_type: format!("{:?}", edge.kind),
267                });
268                statistics.edges_added += 1;
269            }
270        }
271
272        // Find removed edges
273        for key in old_keys.difference(&new_keys) {
274            if let Some(edge) = old_edge_map.get(*key) {
275                differences.push(DiffType::EdgeRemoved {
276                    source: edge.source.to_hex(),
277                    target: edge.target.to_hex(),
278                    edge_type: format!("{:?}", edge.kind),
279                });
280                statistics.edges_removed += 1;
281            }
282        }
283
284        Ok(())
285    }
286
287    /// Compare tree structures
288    fn compare_tree_structures(
289        &self,
290        old_tree: &tree_sitter::Tree,
291        new_tree: &tree_sitter::Tree,
292        differences: &mut Vec<DiffType>,
293    ) -> Result<()> {
294        let old_root = old_tree.root_node();
295        let new_root = new_tree.root_node();
296
297        // Compare root node types
298        if old_root.kind() != new_root.kind() {
299            differences.push(DiffType::StructuralChange {
300                description: format!(
301                    "Root node type changed from '{}' to '{}'",
302                    old_root.kind(),
303                    new_root.kind()
304                ),
305                impact: StructuralImpact::Critical,
306            });
307        }
308
309        // Compare tree depths
310        let old_depth = self.calculate_tree_depth(&old_root);
311        let new_depth = self.calculate_tree_depth(&new_root);
312
313        if (old_depth as i32 - new_depth as i32).abs() > 2 {
314            differences.push(DiffType::StructuralChange {
315                description: format!(
316                    "Significant tree depth change: {} -> {}",
317                    old_depth, new_depth
318                ),
319                impact: if (old_depth as i32 - new_depth as i32).abs() > 5 {
320                    StructuralImpact::High
321                } else {
322                    StructuralImpact::Medium
323                },
324            });
325        }
326
327        Ok(())
328    }
329
330    /// Create a unique key for a node (ignoring IDs if configured)
331    fn create_node_key(&self, node: &codeprism_core::Node) -> String {
332        if self.config.ignore_node_ids {
333            if self.config.ignore_spans {
334                format!("{}:{:?}", node.name, node.kind)
335            } else {
336                format!(
337                    "{}:{:?}:{}:{}",
338                    node.name, node.kind, node.span.start_byte, node.span.end_byte
339                )
340            }
341        } else {
342            node.id.to_hex()
343        }
344    }
345
346    /// Create a unique key for an edge
347    fn create_edge_key(&self, edge: &codeprism_core::Edge) -> String {
348        format!(
349            "{}->{}:{:?}",
350            edge.source.to_hex(),
351            edge.target.to_hex(),
352            edge.kind
353        )
354    }
355
356    /// Compare properties of two nodes
357    fn compare_node_properties(
358        &self,
359        old_node: &codeprism_core::Node,
360        new_node: &codeprism_core::Node,
361    ) -> Vec<String> {
362        let mut changes = Vec::new();
363
364        if old_node.kind != new_node.kind {
365            changes.push(format!(
366                "Type changed: {:?} -> {:?}",
367                old_node.kind, new_node.kind
368            ));
369        }
370
371        if old_node.name != new_node.name {
372            changes.push(format!(
373                "Name changed: '{}' -> '{}'",
374                old_node.name, new_node.name
375            ));
376        }
377
378        if !self.config.ignore_spans
379            && (old_node.span.start_byte != new_node.span.start_byte
380                || old_node.span.end_byte != new_node.span.end_byte)
381        {
382            changes.push(format!(
383                "Span changed: {}..{} -> {}..{}",
384                old_node.span.start_byte,
385                old_node.span.end_byte,
386                new_node.span.start_byte,
387                new_node.span.end_byte
388            ));
389        }
390
391        changes
392    }
393
394    /// Calculate tree depth
395    #[allow(clippy::only_used_in_recursion)] // Method is used recursively by design
396    fn calculate_tree_depth(&self, node: &tree_sitter::Node) -> usize {
397        let mut max_depth = 0;
398        for i in 0..node.child_count() {
399            if let Some(child) = node.child(i) {
400                let child_depth = self.calculate_tree_depth(&child);
401                max_depth = max_depth.max(child_depth);
402            }
403        }
404        max_depth + 1
405    }
406
407    /// Calculate similarity score between two parse results
408    fn calculate_similarity_score(
409        &self,
410        statistics: &DiffStatistics,
411        old_result: &ParseResult,
412        new_result: &ParseResult,
413    ) -> f64 {
414        let total_old_items = old_result.nodes.len() + old_result.edges.len();
415        let total_new_items = new_result.nodes.len() + new_result.edges.len();
416        let max_items = total_old_items.max(total_new_items) as f64;
417
418        if max_items == 0.0 {
419            return 1.0; // Both empty, perfect similarity
420        }
421
422        let total_changes = statistics.nodes_added
423            + statistics.nodes_removed
424            + statistics.nodes_modified
425            + statistics.edges_added
426            + statistics.edges_removed
427            + statistics.edges_modified;
428
429        let similarity = 1.0 - (total_changes as f64 / max_items);
430        similarity.max(0.0)
431    }
432
433    /// Generate a human-readable summary
434    fn generate_summary(&self, statistics: &DiffStatistics, similarity_score: f64) -> String {
435        if statistics.total_differences == 0 {
436            return "No differences found - ASTs are identical".to_string();
437        }
438
439        let mut summary = format!(
440            "Found {} differences (similarity: {:.1}%)",
441            statistics.total_differences,
442            similarity_score * 100.0
443        );
444
445        let mut parts = Vec::new();
446
447        if statistics.nodes_added > 0 {
448            parts.push(format!("{} nodes added", statistics.nodes_added));
449        }
450        if statistics.nodes_removed > 0 {
451            parts.push(format!("{} nodes removed", statistics.nodes_removed));
452        }
453        if statistics.nodes_modified > 0 {
454            parts.push(format!("{} nodes modified", statistics.nodes_modified));
455        }
456        if statistics.edges_added > 0 {
457            parts.push(format!("{} edges added", statistics.edges_added));
458        }
459        if statistics.edges_removed > 0 {
460            parts.push(format!("{} edges removed", statistics.edges_removed));
461        }
462        if statistics.edges_modified > 0 {
463            parts.push(format!("{} edges modified", statistics.edges_modified));
464        }
465
466        if !parts.is_empty() {
467            summary.push_str(": ");
468            summary.push_str(&parts.join(", "));
469        }
470
471        summary
472    }
473}
474
475impl Default for AstDiff {
476    fn default() -> Self {
477        Self::new()
478    }
479}
480
481impl DiffReport {
482    /// Format the diff report for display
483    pub fn format_report(&self) -> String {
484        let mut output = String::new();
485
486        output.push_str("=== AST Diff Report ===\n\n");
487        output.push_str(&format!("Summary: {}\n", self.summary));
488        output.push_str(&format!(
489            "Similarity Score: {:.1}%\n",
490            self.similarity_score * 100.0
491        ));
492
493        if self.is_significant_change {
494            output.push_str("⚠️  Significant changes detected!\n");
495        } else {
496            output.push_str("✅ Minor changes only\n");
497        }
498
499        output.push('\n');
500
501        if !self.differences.is_empty() {
502            output.push_str("## Detailed Changes:\n");
503            for (i, diff) in self.differences.iter().enumerate() {
504                output.push_str(&format!("{}. {}\n", i + 1, self.format_diff(diff)));
505            }
506        }
507
508        output.push_str("\n## Statistics:\n");
509        output.push_str(&format!("- Nodes added: {}\n", self.statistics.nodes_added));
510        output.push_str(&format!(
511            "- Nodes removed: {}\n",
512            self.statistics.nodes_removed
513        ));
514        output.push_str(&format!(
515            "- Nodes modified: {}\n",
516            self.statistics.nodes_modified
517        ));
518        output.push_str(&format!("- Edges added: {}\n", self.statistics.edges_added));
519        output.push_str(&format!(
520            "- Edges removed: {}\n",
521            self.statistics.edges_removed
522        ));
523        output.push_str(&format!(
524            "- Edges modified: {}\n",
525            self.statistics.edges_modified
526        ));
527        output.push_str(&format!(
528            "- Total differences: {}\n",
529            self.statistics.total_differences
530        ));
531
532        output
533    }
534
535    /// Format a single difference
536    fn format_diff(&self, diff: &DiffType) -> String {
537        match diff {
538            DiffType::NodeAdded {
539                node_name,
540                node_type,
541                location,
542            } => {
543                format!(
544                    "Added node '{}' ({}) at {}",
545                    node_name,
546                    node_type,
547                    location.as_deref().unwrap_or("unknown")
548                )
549            }
550            DiffType::NodeRemoved {
551                node_name,
552                node_type,
553                location,
554            } => {
555                format!(
556                    "Removed node '{}' ({}) from {}",
557                    node_name,
558                    node_type,
559                    location.as_deref().unwrap_or("unknown")
560                )
561            }
562            DiffType::NodeModified {
563                node_name,
564                old_type,
565                new_type,
566                changes,
567            } => {
568                format!(
569                    "Modified node '{}' ({} -> {}): {}",
570                    node_name,
571                    old_type,
572                    new_type,
573                    changes.join(", ")
574                )
575            }
576            DiffType::EdgeAdded {
577                source,
578                target,
579                edge_type,
580            } => {
581                format!("Added edge {} -> {} ({})", source, target, edge_type)
582            }
583            DiffType::EdgeRemoved {
584                source,
585                target,
586                edge_type,
587            } => {
588                format!("Removed edge {} -> {} ({})", source, target, edge_type)
589            }
590            DiffType::EdgeModified {
591                source,
592                target,
593                old_type,
594                new_type,
595            } => {
596                format!(
597                    "Modified edge {} -> {} ({} -> {})",
598                    source, target, old_type, new_type
599                )
600            }
601            DiffType::StructuralChange {
602                description,
603                impact,
604            } => {
605                format!("Structural change ({:?}): {}", impact, description)
606            }
607        }
608    }
609}
610
611#[cfg(test)]
612mod tests {
613    use super::*;
614    use codeprism_core::{NodeKind, Span};
615    use std::path::PathBuf;
616
617    fn create_test_node(_id: u64, name: &str, kind: NodeKind) -> codeprism_core::Node {
618        let path = PathBuf::from("test.rs");
619        let span = Span::new(0, 10, 1, 1, 1, 10);
620        let repo_id = "test_repo";
621
622        codeprism_core::Node {
623            id: codeprism_core::NodeId::new(repo_id, &path, &span, &kind),
624            kind,
625            name: name.to_string(),
626            file: path,
627            span,
628            lang: codeprism_core::Language::Rust,
629            metadata: Default::default(),
630            signature: Default::default(),
631        }
632    }
633
634    #[test]
635    fn test_ast_diff_creation() {
636        let diff = AstDiff::new();
637        assert!(diff.config.ignore_node_ids);
638        assert!(!diff.config.ignore_spans);
639    }
640
641    #[test]
642    fn test_create_node_key() {
643        let diff = AstDiff::new();
644        let node = create_test_node(1, "test", NodeKind::Function);
645        let key = diff.create_node_key(&node);
646        assert!(key.contains("test"));
647        assert!(key.contains("Function"));
648    }
649
650    #[test]
651    fn test_node_key_ignoring_ids() {
652        let config = DiffConfig {
653            ignore_node_ids: true,
654            ..Default::default()
655        };
656        let diff = AstDiff::with_config(config);
657
658        let node1 = create_test_node(1, "test", NodeKind::Function);
659        let node2 = create_test_node(2, "test", NodeKind::Function);
660
661        let key1 = diff.create_node_key(&node1);
662        let key2 = diff.create_node_key(&node2);
663
664        assert_eq!(key1, key2); // Should be same since IDs are ignored
665    }
666}