codeprism_dev_tools/
diff_comparison.rs

1//! AST diff comparison utilities for parser development
2
3use anyhow::Result;
4use codeprism_core::ParseResult;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8/// AST diff analyzer for comparing parse results
9#[derive(Debug, Clone)]
10pub struct AstDiff {
11    config: DiffConfig,
12}
13
14/// Configuration for diff analysis
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct DiffConfig {
17    pub ignore_spans: bool,
18    pub ignore_node_ids: bool,
19    pub compare_edge_order: bool,
20    pub max_differences: usize,
21    pub similarity_threshold: f64,
22}
23
24impl Default for DiffConfig {
25    fn default() -> Self {
26        Self {
27            ignore_spans: false,
28            ignore_node_ids: true,
29            compare_edge_order: false,
30            max_differences: 100,
31            similarity_threshold: 0.8,
32        }
33    }
34}
35
36/// Type of difference found between ASTs
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub enum DiffType {
39    NodeAdded {
40        node_name: String,
41        node_type: String,
42        location: Option<String>,
43    },
44    NodeRemoved {
45        node_name: String,
46        node_type: String,
47        location: Option<String>,
48    },
49    NodeModified {
50        node_name: String,
51        old_type: String,
52        new_type: String,
53        changes: Vec<String>,
54    },
55    EdgeAdded {
56        source: String,
57        target: String,
58        edge_type: String,
59    },
60    EdgeRemoved {
61        source: String,
62        target: String,
63        edge_type: String,
64    },
65    EdgeModified {
66        source: String,
67        target: String,
68        old_type: String,
69        new_type: String,
70    },
71    StructuralChange {
72        description: String,
73        impact: StructuralImpact,
74    },
75}
76
77/// Impact level of structural changes
78#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
79pub enum StructuralImpact {
80    Low,
81    Medium,
82    High,
83    Critical,
84}
85
86/// Comprehensive diff report
87#[derive(Debug, Clone)]
88pub struct DiffReport {
89    pub differences: Vec<DiffType>,
90    pub statistics: DiffStatistics,
91    pub similarity_score: f64,
92    pub is_significant_change: bool,
93    pub summary: String,
94}
95
96/// Statistics about the differences found
97#[derive(Debug, Clone, Default)]
98pub struct DiffStatistics {
99    pub nodes_added: usize,
100    pub nodes_removed: usize,
101    pub nodes_modified: usize,
102    pub edges_added: usize,
103    pub edges_removed: usize,
104    pub edges_modified: usize,
105    pub total_differences: usize,
106    pub similarity_percentage: f64,
107}
108
109impl AstDiff {
110    /// Create a new AST diff analyzer
111    pub fn new() -> Self {
112        Self {
113            config: DiffConfig::default(),
114        }
115    }
116
117    /// Create with custom configuration
118    pub fn with_config(config: DiffConfig) -> Self {
119        Self { config }
120    }
121
122    /// Compare two parse results and generate a diff report
123    pub fn compare(
124        &self,
125        old_result: &ParseResult,
126        new_result: &ParseResult,
127        _source: &str,
128    ) -> Result<DiffReport> {
129        let mut differences = Vec::new();
130        let mut statistics = DiffStatistics::default();
131
132        // Compare nodes
133        self.compare_nodes(
134            &old_result.nodes,
135            &new_result.nodes,
136            &mut differences,
137            &mut statistics,
138        )?;
139
140        // Compare edges
141        self.compare_edges(
142            &old_result.edges,
143            &new_result.edges,
144            &mut differences,
145            &mut statistics,
146        )?;
147
148        // Compare tree structures
149        self.compare_tree_structures(&old_result.tree, &new_result.tree, &mut differences)?;
150
151        // Calculate similarity score
152        let similarity_score = self.calculate_similarity_score(&statistics, old_result, new_result);
153
154        // Determine if this is a significant change
155        let is_significant_change = similarity_score < self.config.similarity_threshold;
156
157        // Generate summary
158        let summary = self.generate_summary(&statistics, similarity_score);
159
160        statistics.total_differences = differences.len();
161        statistics.similarity_percentage = similarity_score * 100.0;
162
163        Ok(DiffReport {
164            differences,
165            statistics,
166            similarity_score,
167            is_significant_change,
168            summary,
169        })
170    }
171
172    /// Compare node lists
173    fn compare_nodes(
174        &self,
175        old_nodes: &[codeprism_core::Node],
176        new_nodes: &[codeprism_core::Node],
177        differences: &mut Vec<DiffType>,
178        statistics: &mut DiffStatistics,
179    ) -> Result<()> {
180        // Create maps for easier comparison
181        let old_node_map: HashMap<String, &codeprism_core::Node> = old_nodes
182            .iter()
183            .map(|n| (self.create_node_key(n), n))
184            .collect();
185
186        let new_node_map: HashMap<String, &codeprism_core::Node> = new_nodes
187            .iter()
188            .map(|n| (self.create_node_key(n), n))
189            .collect();
190
191        let old_keys: HashSet<_> = old_node_map.keys().collect();
192        let new_keys: HashSet<_> = new_node_map.keys().collect();
193
194        // Find added nodes
195        for key in new_keys.difference(&old_keys) {
196            if let Some(node) = new_node_map.get(*key) {
197                differences.push(DiffType::NodeAdded {
198                    node_name: node.name.clone(),
199                    node_type: format!("{:?}", node.kind),
200                    location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
201                });
202                statistics.nodes_added += 1;
203            }
204        }
205
206        // Find removed nodes
207        for key in old_keys.difference(&new_keys) {
208            if let Some(node) = old_node_map.get(*key) {
209                differences.push(DiffType::NodeRemoved {
210                    node_name: node.name.clone(),
211                    node_type: format!("{:?}", node.kind),
212                    location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
213                });
214                statistics.nodes_removed += 1;
215            }
216        }
217
218        // Find modified nodes (same key but different properties)
219        for key in old_keys.intersection(&new_keys) {
220            if let (Some(old_node), Some(new_node)) =
221                (old_node_map.get(*key), new_node_map.get(*key))
222            {
223                let changes = self.compare_node_properties(old_node, new_node);
224                if !changes.is_empty() {
225                    differences.push(DiffType::NodeModified {
226                        node_name: old_node.name.clone(),
227                        old_type: format!("{:?}", old_node.kind),
228                        new_type: format!("{:?}", new_node.kind),
229                        changes,
230                    });
231                    statistics.nodes_modified += 1;
232                }
233            }
234        }
235
236        Ok(())
237    }
238
239    /// Compare edge lists
240    fn compare_edges(
241        &self,
242        old_edges: &[codeprism_core::Edge],
243        new_edges: &[codeprism_core::Edge],
244        differences: &mut Vec<DiffType>,
245        statistics: &mut DiffStatistics,
246    ) -> Result<()> {
247        let old_edge_map: HashMap<String, &codeprism_core::Edge> = old_edges
248            .iter()
249            .map(|e| (self.create_edge_key(e), e))
250            .collect();
251
252        let new_edge_map: HashMap<String, &codeprism_core::Edge> = new_edges
253            .iter()
254            .map(|e| (self.create_edge_key(e), e))
255            .collect();
256
257        let old_keys: HashSet<_> = old_edge_map.keys().collect();
258        let new_keys: HashSet<_> = new_edge_map.keys().collect();
259
260        // Find added edges
261        for key in new_keys.difference(&old_keys) {
262            if let Some(edge) = new_edge_map.get(*key) {
263                differences.push(DiffType::EdgeAdded {
264                    source: edge.source.to_hex(),
265                    target: edge.target.to_hex(),
266                    edge_type: format!("{:?}", edge.kind),
267                });
268                statistics.edges_added += 1;
269            }
270        }
271
272        // Find removed edges
273        for key in old_keys.difference(&new_keys) {
274            if let Some(edge) = old_edge_map.get(*key) {
275                differences.push(DiffType::EdgeRemoved {
276                    source: edge.source.to_hex(),
277                    target: edge.target.to_hex(),
278                    edge_type: format!("{:?}", edge.kind),
279                });
280                statistics.edges_removed += 1;
281            }
282        }
283
284        Ok(())
285    }
286
287    /// Compare tree structures
288    fn compare_tree_structures(
289        &self,
290        old_tree: &tree_sitter::Tree,
291        new_tree: &tree_sitter::Tree,
292        differences: &mut Vec<DiffType>,
293    ) -> Result<()> {
294        let old_root = old_tree.root_node();
295        let new_root = new_tree.root_node();
296
297        // Compare root node types
298        if old_root.kind() != new_root.kind() {
299            differences.push(DiffType::StructuralChange {
300                description: format!(
301                    "Root node type changed from '{}' to '{}'",
302                    old_root.kind(),
303                    new_root.kind()
304                ),
305                impact: StructuralImpact::Critical,
306            });
307        }
308
309        // Compare tree depths
310        let old_depth = self.calculate_tree_depth(&old_root);
311        let new_depth = self.calculate_tree_depth(&new_root);
312
313        if (old_depth as i32 - new_depth as i32).abs() > 2 {
314            differences.push(DiffType::StructuralChange {
315                description: format!("Significant tree depth change: {old_depth} -> {new_depth}"),
316                impact: if (old_depth as i32 - new_depth as i32).abs() > 5 {
317                    StructuralImpact::High
318                } else {
319                    StructuralImpact::Medium
320                },
321            });
322        }
323
324        Ok(())
325    }
326
327    /// Create a unique key for a node (ignoring IDs if configured)
328    fn create_node_key(&self, node: &codeprism_core::Node) -> String {
329        if self.config.ignore_node_ids {
330            if self.config.ignore_spans {
331                format!("{}:{:?}", node.name, node.kind)
332            } else {
333                format!(
334                    "{}:{:?}:{}:{}",
335                    node.name, node.kind, node.span.start_byte, node.span.end_byte
336                )
337            }
338        } else {
339            node.id.to_hex()
340        }
341    }
342
343    /// Create a unique key for an edge
344    fn create_edge_key(&self, edge: &codeprism_core::Edge) -> String {
345        format!(
346            "{}->{}:{:?}",
347            edge.source.to_hex(),
348            edge.target.to_hex(),
349            edge.kind
350        )
351    }
352
353    /// Compare properties of two nodes
354    fn compare_node_properties(
355        &self,
356        old_node: &codeprism_core::Node,
357        new_node: &codeprism_core::Node,
358    ) -> Vec<String> {
359        let mut changes = Vec::new();
360
361        if old_node.kind != new_node.kind {
362            changes.push(format!(
363                "Type changed: {:?} -> {:?}",
364                old_node.kind, new_node.kind
365            ));
366        }
367
368        if old_node.name != new_node.name {
369            changes.push(format!(
370                "Name changed: '{}' -> '{}'",
371                old_node.name, new_node.name
372            ));
373        }
374
375        if !self.config.ignore_spans
376            && (old_node.span.start_byte != new_node.span.start_byte
377                || old_node.span.end_byte != new_node.span.end_byte)
378        {
379            changes.push(format!(
380                "Span changed: {}..{} -> {}..{}",
381                old_node.span.start_byte,
382                old_node.span.end_byte,
383                new_node.span.start_byte,
384                new_node.span.end_byte
385            ));
386        }
387
388        changes
389    }
390
391    /// Calculate tree depth
392    #[allow(clippy::only_used_in_recursion)] // Method is used recursively by design
393    fn calculate_tree_depth(&self, node: &tree_sitter::Node) -> usize {
394        let mut max_depth = 0;
395        for i in 0..node.child_count() {
396            if let Some(child) = node.child(i) {
397                let child_depth = self.calculate_tree_depth(&child);
398                max_depth = max_depth.max(child_depth);
399            }
400        }
401        max_depth + 1
402    }
403
404    /// Calculate similarity score between two parse results
405    fn calculate_similarity_score(
406        &self,
407        statistics: &DiffStatistics,
408        old_result: &ParseResult,
409        new_result: &ParseResult,
410    ) -> f64 {
411        let total_old_items = old_result.nodes.len() + old_result.edges.len();
412        let total_new_items = new_result.nodes.len() + new_result.edges.len();
413        let max_items = total_old_items.max(total_new_items) as f64;
414
415        if max_items == 0.0 {
416            return 1.0; // Both empty, perfect similarity
417        }
418
419        let total_changes = statistics.nodes_added
420            + statistics.nodes_removed
421            + statistics.nodes_modified
422            + statistics.edges_added
423            + statistics.edges_removed
424            + statistics.edges_modified;
425
426        let similarity = 1.0 - (total_changes as f64 / max_items);
427        similarity.max(0.0)
428    }
429
430    /// Generate a human-readable summary
431    fn generate_summary(&self, statistics: &DiffStatistics, similarity_score: f64) -> String {
432        if statistics.total_differences == 0 {
433            return "No differences found - ASTs are identical".to_string();
434        }
435
436        let mut summary = format!(
437            "Found {} differences (similarity: {:.1}%)",
438            statistics.total_differences,
439            similarity_score * 100.0
440        );
441
442        let mut parts = Vec::new();
443
444        if statistics.nodes_added > 0 {
445            parts.push(format!("{} nodes added", statistics.nodes_added));
446        }
447        if statistics.nodes_removed > 0 {
448            parts.push(format!("{} nodes removed", statistics.nodes_removed));
449        }
450        if statistics.nodes_modified > 0 {
451            parts.push(format!("{} nodes modified", statistics.nodes_modified));
452        }
453        if statistics.edges_added > 0 {
454            parts.push(format!("{} edges added", statistics.edges_added));
455        }
456        if statistics.edges_removed > 0 {
457            parts.push(format!("{} edges removed", statistics.edges_removed));
458        }
459        if statistics.edges_modified > 0 {
460            parts.push(format!("{} edges modified", statistics.edges_modified));
461        }
462
463        if !parts.is_empty() {
464            summary.push_str(": ");
465            summary.push_str(&parts.join(", "));
466        }
467
468        summary
469    }
470}
471
472impl Default for AstDiff {
473    fn default() -> Self {
474        Self::new()
475    }
476}
477
478impl DiffReport {
479    /// Format the diff report for display
480    pub fn format_report(&self) -> String {
481        let mut output = String::new();
482
483        output.push_str("=== AST Diff Report ===\n\n");
484        output.push_str(&format!("Summary: {}\n", self.summary));
485        output.push_str(&format!(
486            "Similarity Score: {:.1}%\n",
487            self.similarity_score * 100.0
488        ));
489
490        if self.is_significant_change {
491            output.push_str("⚠️  Significant changes detected!\n");
492        } else {
493            output.push_str("✅ Minor changes only\n");
494        }
495
496        output.push('\n');
497
498        if !self.differences.is_empty() {
499            output.push_str("## Detailed Changes:\n");
500            for (i, diff) in self.differences.iter().enumerate() {
501                output.push_str(&format!("{}. {}\n", i + 1, self.format_diff(diff)));
502            }
503        }
504
505        output.push_str("\n## Statistics:\n");
506        output.push_str(&format!("- Nodes added: {}\n", self.statistics.nodes_added));
507        output.push_str(&format!(
508            "- Nodes removed: {}\n",
509            self.statistics.nodes_removed
510        ));
511        output.push_str(&format!(
512            "- Nodes modified: {}\n",
513            self.statistics.nodes_modified
514        ));
515        output.push_str(&format!("- Edges added: {}\n", self.statistics.edges_added));
516        output.push_str(&format!(
517            "- Edges removed: {}\n",
518            self.statistics.edges_removed
519        ));
520        output.push_str(&format!(
521            "- Edges modified: {}\n",
522            self.statistics.edges_modified
523        ));
524        output.push_str(&format!(
525            "- Total differences: {}\n",
526            self.statistics.total_differences
527        ));
528
529        output
530    }
531
532    /// Format a single difference
533    fn format_diff(&self, diff: &DiffType) -> String {
534        match diff {
535            DiffType::NodeAdded {
536                node_name,
537                node_type,
538                location,
539            } => {
540                format!(
541                    "Added node '{}' ({}) at {}",
542                    node_name,
543                    node_type,
544                    location.as_deref().unwrap_or("unknown")
545                )
546            }
547            DiffType::NodeRemoved {
548                node_name,
549                node_type,
550                location,
551            } => {
552                format!(
553                    "Removed node '{}' ({}) from {}",
554                    node_name,
555                    node_type,
556                    location.as_deref().unwrap_or("unknown")
557                )
558            }
559            DiffType::NodeModified {
560                node_name,
561                old_type,
562                new_type,
563                changes,
564            } => {
565                format!(
566                    "Modified node '{}' ({} -> {}): {}",
567                    node_name,
568                    old_type,
569                    new_type,
570                    changes.join(", ")
571                )
572            }
573            DiffType::EdgeAdded {
574                source,
575                target,
576                edge_type,
577            } => {
578                format!("Added edge {source} -> {target} ({edge_type})")
579            }
580            DiffType::EdgeRemoved {
581                source,
582                target,
583                edge_type,
584            } => {
585                format!("Removed edge {source} -> {target} ({edge_type})")
586            }
587            DiffType::EdgeModified {
588                source,
589                target,
590                old_type,
591                new_type,
592            } => {
593                format!("Modified edge {source} -> {target} ({old_type} -> {new_type})")
594            }
595            DiffType::StructuralChange {
596                description,
597                impact,
598            } => {
599                format!("Structural change ({impact:?}): {description}")
600            }
601        }
602    }
603}
604
605#[cfg(test)]
606mod tests {
607    use super::*;
608    use codeprism_core::{NodeKind, Span};
609    use std::path::PathBuf;
610
611    fn create_test_node(_id: u64, name: &str, kind: NodeKind) -> codeprism_core::Node {
612        let path = PathBuf::from("test.rs");
613        let span = Span::new(0, 10, 1, 1, 1, 10);
614        let repo_id = "test_repo";
615
616        codeprism_core::Node {
617            id: codeprism_core::NodeId::new(repo_id, &path, &span, &kind),
618            kind,
619            name: name.to_string(),
620            file: path,
621            span,
622            lang: codeprism_core::Language::Rust,
623            metadata: Default::default(),
624            signature: Default::default(),
625        }
626    }
627
628    #[test]
629    fn test_ast_diff_creation() {
630        let diff = AstDiff::new();
631        assert!(diff.config.ignore_node_ids);
632        assert!(!diff.config.ignore_spans);
633    }
634
635    #[test]
636    fn test_create_node_key() {
637        let diff = AstDiff::new();
638        let node = create_test_node(1, "test", NodeKind::Function);
639        let key = diff.create_node_key(&node);
640        assert!(key.contains("test"));
641        assert!(key.contains("Function"));
642    }
643
644    #[test]
645    fn test_node_key_ignoring_ids() {
646        let config = DiffConfig {
647            ignore_node_ids: true,
648            ..Default::default()
649        };
650        let diff = AstDiff::with_config(config);
651
652        let node1 = create_test_node(1, "test", NodeKind::Function);
653        let node2 = create_test_node(2, "test", NodeKind::Function);
654
655        let key1 = diff.create_node_key(&node1);
656        let key2 = diff.create_node_key(&node2);
657
658        assert_eq!(key1, key2); // Should be same since IDs are ignored
659    }
660}