1use anyhow::Result;
4use codeprism_core::ParseResult;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8#[derive(Debug, Clone)]
10pub struct AstDiff {
11 config: DiffConfig,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct DiffConfig {
17 pub ignore_spans: bool,
18 pub ignore_node_ids: bool,
19 pub compare_edge_order: bool,
20 pub max_differences: usize,
21 pub similarity_threshold: f64,
22}
23
24impl Default for DiffConfig {
25 fn default() -> Self {
26 Self {
27 ignore_spans: false,
28 ignore_node_ids: true,
29 compare_edge_order: false,
30 max_differences: 100,
31 similarity_threshold: 0.8,
32 }
33 }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub enum DiffType {
39 NodeAdded {
40 node_name: String,
41 node_type: String,
42 location: Option<String>,
43 },
44 NodeRemoved {
45 node_name: String,
46 node_type: String,
47 location: Option<String>,
48 },
49 NodeModified {
50 node_name: String,
51 old_type: String,
52 new_type: String,
53 changes: Vec<String>,
54 },
55 EdgeAdded {
56 source: String,
57 target: String,
58 edge_type: String,
59 },
60 EdgeRemoved {
61 source: String,
62 target: String,
63 edge_type: String,
64 },
65 EdgeModified {
66 source: String,
67 target: String,
68 old_type: String,
69 new_type: String,
70 },
71 StructuralChange {
72 description: String,
73 impact: StructuralImpact,
74 },
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
79pub enum StructuralImpact {
80 Low,
81 Medium,
82 High,
83 Critical,
84}
85
86#[derive(Debug, Clone)]
88pub struct DiffReport {
89 pub differences: Vec<DiffType>,
90 pub statistics: DiffStatistics,
91 pub similarity_score: f64,
92 pub is_significant_change: bool,
93 pub summary: String,
94}
95
96#[derive(Debug, Clone, Default)]
98pub struct DiffStatistics {
99 pub nodes_added: usize,
100 pub nodes_removed: usize,
101 pub nodes_modified: usize,
102 pub edges_added: usize,
103 pub edges_removed: usize,
104 pub edges_modified: usize,
105 pub total_differences: usize,
106 pub similarity_percentage: f64,
107}
108
109impl AstDiff {
110 pub fn new() -> Self {
112 Self {
113 config: DiffConfig::default(),
114 }
115 }
116
117 pub fn with_config(config: DiffConfig) -> Self {
119 Self { config }
120 }
121
122 pub fn compare(
124 &self,
125 old_result: &ParseResult,
126 new_result: &ParseResult,
127 _source: &str,
128 ) -> Result<DiffReport> {
129 let mut differences = Vec::new();
130 let mut statistics = DiffStatistics::default();
131
132 self.compare_nodes(
134 &old_result.nodes,
135 &new_result.nodes,
136 &mut differences,
137 &mut statistics,
138 )?;
139
140 self.compare_edges(
142 &old_result.edges,
143 &new_result.edges,
144 &mut differences,
145 &mut statistics,
146 )?;
147
148 self.compare_tree_structures(&old_result.tree, &new_result.tree, &mut differences)?;
150
151 let similarity_score = self.calculate_similarity_score(&statistics, old_result, new_result);
153
154 let is_significant_change = similarity_score < self.config.similarity_threshold;
156
157 let summary = self.generate_summary(&statistics, similarity_score);
159
160 statistics.total_differences = differences.len();
161 statistics.similarity_percentage = similarity_score * 100.0;
162
163 Ok(DiffReport {
164 differences,
165 statistics,
166 similarity_score,
167 is_significant_change,
168 summary,
169 })
170 }
171
172 fn compare_nodes(
174 &self,
175 old_nodes: &[codeprism_core::Node],
176 new_nodes: &[codeprism_core::Node],
177 differences: &mut Vec<DiffType>,
178 statistics: &mut DiffStatistics,
179 ) -> Result<()> {
180 let old_node_map: HashMap<String, &codeprism_core::Node> = old_nodes
182 .iter()
183 .map(|n| (self.create_node_key(n), n))
184 .collect();
185
186 let new_node_map: HashMap<String, &codeprism_core::Node> = new_nodes
187 .iter()
188 .map(|n| (self.create_node_key(n), n))
189 .collect();
190
191 let old_keys: HashSet<_> = old_node_map.keys().collect();
192 let new_keys: HashSet<_> = new_node_map.keys().collect();
193
194 for key in new_keys.difference(&old_keys) {
196 if let Some(node) = new_node_map.get(*key) {
197 differences.push(DiffType::NodeAdded {
198 node_name: node.name.clone(),
199 node_type: format!("{:?}", node.kind),
200 location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
201 });
202 statistics.nodes_added += 1;
203 }
204 }
205
206 for key in old_keys.difference(&new_keys) {
208 if let Some(node) = old_node_map.get(*key) {
209 differences.push(DiffType::NodeRemoved {
210 node_name: node.name.clone(),
211 node_type: format!("{:?}", node.kind),
212 location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
213 });
214 statistics.nodes_removed += 1;
215 }
216 }
217
218 for key in old_keys.intersection(&new_keys) {
220 if let (Some(old_node), Some(new_node)) =
221 (old_node_map.get(*key), new_node_map.get(*key))
222 {
223 let changes = self.compare_node_properties(old_node, new_node);
224 if !changes.is_empty() {
225 differences.push(DiffType::NodeModified {
226 node_name: old_node.name.clone(),
227 old_type: format!("{:?}", old_node.kind),
228 new_type: format!("{:?}", new_node.kind),
229 changes,
230 });
231 statistics.nodes_modified += 1;
232 }
233 }
234 }
235
236 Ok(())
237 }
238
239 fn compare_edges(
241 &self,
242 old_edges: &[codeprism_core::Edge],
243 new_edges: &[codeprism_core::Edge],
244 differences: &mut Vec<DiffType>,
245 statistics: &mut DiffStatistics,
246 ) -> Result<()> {
247 let old_edge_map: HashMap<String, &codeprism_core::Edge> = old_edges
248 .iter()
249 .map(|e| (self.create_edge_key(e), e))
250 .collect();
251
252 let new_edge_map: HashMap<String, &codeprism_core::Edge> = new_edges
253 .iter()
254 .map(|e| (self.create_edge_key(e), e))
255 .collect();
256
257 let old_keys: HashSet<_> = old_edge_map.keys().collect();
258 let new_keys: HashSet<_> = new_edge_map.keys().collect();
259
260 for key in new_keys.difference(&old_keys) {
262 if let Some(edge) = new_edge_map.get(*key) {
263 differences.push(DiffType::EdgeAdded {
264 source: edge.source.to_hex(),
265 target: edge.target.to_hex(),
266 edge_type: format!("{:?}", edge.kind),
267 });
268 statistics.edges_added += 1;
269 }
270 }
271
272 for key in old_keys.difference(&new_keys) {
274 if let Some(edge) = old_edge_map.get(*key) {
275 differences.push(DiffType::EdgeRemoved {
276 source: edge.source.to_hex(),
277 target: edge.target.to_hex(),
278 edge_type: format!("{:?}", edge.kind),
279 });
280 statistics.edges_removed += 1;
281 }
282 }
283
284 Ok(())
285 }
286
287 fn compare_tree_structures(
289 &self,
290 old_tree: &tree_sitter::Tree,
291 new_tree: &tree_sitter::Tree,
292 differences: &mut Vec<DiffType>,
293 ) -> Result<()> {
294 let old_root = old_tree.root_node();
295 let new_root = new_tree.root_node();
296
297 if old_root.kind() != new_root.kind() {
299 differences.push(DiffType::StructuralChange {
300 description: format!(
301 "Root node type changed from '{}' to '{}'",
302 old_root.kind(),
303 new_root.kind()
304 ),
305 impact: StructuralImpact::Critical,
306 });
307 }
308
309 let old_depth = self.calculate_tree_depth(&old_root);
311 let new_depth = self.calculate_tree_depth(&new_root);
312
313 if (old_depth as i32 - new_depth as i32).abs() > 2 {
314 differences.push(DiffType::StructuralChange {
315 description: format!(
316 "Significant tree depth change: {} -> {}",
317 old_depth, new_depth
318 ),
319 impact: if (old_depth as i32 - new_depth as i32).abs() > 5 {
320 StructuralImpact::High
321 } else {
322 StructuralImpact::Medium
323 },
324 });
325 }
326
327 Ok(())
328 }
329
330 fn create_node_key(&self, node: &codeprism_core::Node) -> String {
332 if self.config.ignore_node_ids {
333 if self.config.ignore_spans {
334 format!("{}:{:?}", node.name, node.kind)
335 } else {
336 format!(
337 "{}:{:?}:{}:{}",
338 node.name, node.kind, node.span.start_byte, node.span.end_byte
339 )
340 }
341 } else {
342 node.id.to_hex()
343 }
344 }
345
346 fn create_edge_key(&self, edge: &codeprism_core::Edge) -> String {
348 format!(
349 "{}->{}:{:?}",
350 edge.source.to_hex(),
351 edge.target.to_hex(),
352 edge.kind
353 )
354 }
355
356 fn compare_node_properties(
358 &self,
359 old_node: &codeprism_core::Node,
360 new_node: &codeprism_core::Node,
361 ) -> Vec<String> {
362 let mut changes = Vec::new();
363
364 if old_node.kind != new_node.kind {
365 changes.push(format!(
366 "Type changed: {:?} -> {:?}",
367 old_node.kind, new_node.kind
368 ));
369 }
370
371 if old_node.name != new_node.name {
372 changes.push(format!(
373 "Name changed: '{}' -> '{}'",
374 old_node.name, new_node.name
375 ));
376 }
377
378 if !self.config.ignore_spans
379 && (old_node.span.start_byte != new_node.span.start_byte
380 || old_node.span.end_byte != new_node.span.end_byte)
381 {
382 changes.push(format!(
383 "Span changed: {}..{} -> {}..{}",
384 old_node.span.start_byte,
385 old_node.span.end_byte,
386 new_node.span.start_byte,
387 new_node.span.end_byte
388 ));
389 }
390
391 changes
392 }
393
394 #[allow(clippy::only_used_in_recursion)] fn calculate_tree_depth(&self, node: &tree_sitter::Node) -> usize {
397 let mut max_depth = 0;
398 for i in 0..node.child_count() {
399 if let Some(child) = node.child(i) {
400 let child_depth = self.calculate_tree_depth(&child);
401 max_depth = max_depth.max(child_depth);
402 }
403 }
404 max_depth + 1
405 }
406
407 fn calculate_similarity_score(
409 &self,
410 statistics: &DiffStatistics,
411 old_result: &ParseResult,
412 new_result: &ParseResult,
413 ) -> f64 {
414 let total_old_items = old_result.nodes.len() + old_result.edges.len();
415 let total_new_items = new_result.nodes.len() + new_result.edges.len();
416 let max_items = total_old_items.max(total_new_items) as f64;
417
418 if max_items == 0.0 {
419 return 1.0; }
421
422 let total_changes = statistics.nodes_added
423 + statistics.nodes_removed
424 + statistics.nodes_modified
425 + statistics.edges_added
426 + statistics.edges_removed
427 + statistics.edges_modified;
428
429 let similarity = 1.0 - (total_changes as f64 / max_items);
430 similarity.max(0.0)
431 }
432
433 fn generate_summary(&self, statistics: &DiffStatistics, similarity_score: f64) -> String {
435 if statistics.total_differences == 0 {
436 return "No differences found - ASTs are identical".to_string();
437 }
438
439 let mut summary = format!(
440 "Found {} differences (similarity: {:.1}%)",
441 statistics.total_differences,
442 similarity_score * 100.0
443 );
444
445 let mut parts = Vec::new();
446
447 if statistics.nodes_added > 0 {
448 parts.push(format!("{} nodes added", statistics.nodes_added));
449 }
450 if statistics.nodes_removed > 0 {
451 parts.push(format!("{} nodes removed", statistics.nodes_removed));
452 }
453 if statistics.nodes_modified > 0 {
454 parts.push(format!("{} nodes modified", statistics.nodes_modified));
455 }
456 if statistics.edges_added > 0 {
457 parts.push(format!("{} edges added", statistics.edges_added));
458 }
459 if statistics.edges_removed > 0 {
460 parts.push(format!("{} edges removed", statistics.edges_removed));
461 }
462 if statistics.edges_modified > 0 {
463 parts.push(format!("{} edges modified", statistics.edges_modified));
464 }
465
466 if !parts.is_empty() {
467 summary.push_str(": ");
468 summary.push_str(&parts.join(", "));
469 }
470
471 summary
472 }
473}
474
475impl Default for AstDiff {
476 fn default() -> Self {
477 Self::new()
478 }
479}
480
481impl DiffReport {
482 pub fn format_report(&self) -> String {
484 let mut output = String::new();
485
486 output.push_str("=== AST Diff Report ===\n\n");
487 output.push_str(&format!("Summary: {}\n", self.summary));
488 output.push_str(&format!(
489 "Similarity Score: {:.1}%\n",
490 self.similarity_score * 100.0
491 ));
492
493 if self.is_significant_change {
494 output.push_str("⚠️ Significant changes detected!\n");
495 } else {
496 output.push_str("✅ Minor changes only\n");
497 }
498
499 output.push('\n');
500
501 if !self.differences.is_empty() {
502 output.push_str("## Detailed Changes:\n");
503 for (i, diff) in self.differences.iter().enumerate() {
504 output.push_str(&format!("{}. {}\n", i + 1, self.format_diff(diff)));
505 }
506 }
507
508 output.push_str("\n## Statistics:\n");
509 output.push_str(&format!("- Nodes added: {}\n", self.statistics.nodes_added));
510 output.push_str(&format!(
511 "- Nodes removed: {}\n",
512 self.statistics.nodes_removed
513 ));
514 output.push_str(&format!(
515 "- Nodes modified: {}\n",
516 self.statistics.nodes_modified
517 ));
518 output.push_str(&format!("- Edges added: {}\n", self.statistics.edges_added));
519 output.push_str(&format!(
520 "- Edges removed: {}\n",
521 self.statistics.edges_removed
522 ));
523 output.push_str(&format!(
524 "- Edges modified: {}\n",
525 self.statistics.edges_modified
526 ));
527 output.push_str(&format!(
528 "- Total differences: {}\n",
529 self.statistics.total_differences
530 ));
531
532 output
533 }
534
535 fn format_diff(&self, diff: &DiffType) -> String {
537 match diff {
538 DiffType::NodeAdded {
539 node_name,
540 node_type,
541 location,
542 } => {
543 format!(
544 "Added node '{}' ({}) at {}",
545 node_name,
546 node_type,
547 location.as_deref().unwrap_or("unknown")
548 )
549 }
550 DiffType::NodeRemoved {
551 node_name,
552 node_type,
553 location,
554 } => {
555 format!(
556 "Removed node '{}' ({}) from {}",
557 node_name,
558 node_type,
559 location.as_deref().unwrap_or("unknown")
560 )
561 }
562 DiffType::NodeModified {
563 node_name,
564 old_type,
565 new_type,
566 changes,
567 } => {
568 format!(
569 "Modified node '{}' ({} -> {}): {}",
570 node_name,
571 old_type,
572 new_type,
573 changes.join(", ")
574 )
575 }
576 DiffType::EdgeAdded {
577 source,
578 target,
579 edge_type,
580 } => {
581 format!("Added edge {} -> {} ({})", source, target, edge_type)
582 }
583 DiffType::EdgeRemoved {
584 source,
585 target,
586 edge_type,
587 } => {
588 format!("Removed edge {} -> {} ({})", source, target, edge_type)
589 }
590 DiffType::EdgeModified {
591 source,
592 target,
593 old_type,
594 new_type,
595 } => {
596 format!(
597 "Modified edge {} -> {} ({} -> {})",
598 source, target, old_type, new_type
599 )
600 }
601 DiffType::StructuralChange {
602 description,
603 impact,
604 } => {
605 format!("Structural change ({:?}): {}", impact, description)
606 }
607 }
608 }
609}
610
611#[cfg(test)]
612mod tests {
613 use super::*;
614 use codeprism_core::{NodeKind, Span};
615 use std::path::PathBuf;
616
617 fn create_test_node(_id: u64, name: &str, kind: NodeKind) -> codeprism_core::Node {
618 let path = PathBuf::from("test.rs");
619 let span = Span::new(0, 10, 1, 1, 1, 10);
620 let repo_id = "test_repo";
621
622 codeprism_core::Node {
623 id: codeprism_core::NodeId::new(repo_id, &path, &span, &kind),
624 kind,
625 name: name.to_string(),
626 file: path,
627 span,
628 lang: codeprism_core::Language::Rust,
629 metadata: Default::default(),
630 signature: Default::default(),
631 }
632 }
633
634 #[test]
635 fn test_ast_diff_creation() {
636 let diff = AstDiff::new();
637 assert!(diff.config.ignore_node_ids);
638 assert!(!diff.config.ignore_spans);
639 }
640
641 #[test]
642 fn test_create_node_key() {
643 let diff = AstDiff::new();
644 let node = create_test_node(1, "test", NodeKind::Function);
645 let key = diff.create_node_key(&node);
646 assert!(key.contains("test"));
647 assert!(key.contains("Function"));
648 }
649
650 #[test]
651 fn test_node_key_ignoring_ids() {
652 let config = DiffConfig {
653 ignore_node_ids: true,
654 ..Default::default()
655 };
656 let diff = AstDiff::with_config(config);
657
658 let node1 = create_test_node(1, "test", NodeKind::Function);
659 let node2 = create_test_node(2, "test", NodeKind::Function);
660
661 let key1 = diff.create_node_key(&node1);
662 let key2 = diff.create_node_key(&node2);
663
664 assert_eq!(key1, key2); }
666}