1use anyhow::Result;
4use codeprism_core::ParseResult;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8#[derive(Debug, Clone)]
10pub struct AstDiff {
11 config: DiffConfig,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct DiffConfig {
17 pub ignore_spans: bool,
18 pub ignore_node_ids: bool,
19 pub compare_edge_order: bool,
20 pub max_differences: usize,
21 pub similarity_threshold: f64,
22}
23
24impl Default for DiffConfig {
25 fn default() -> Self {
26 Self {
27 ignore_spans: false,
28 ignore_node_ids: true,
29 compare_edge_order: false,
30 max_differences: 100,
31 similarity_threshold: 0.8,
32 }
33 }
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub enum DiffType {
39 NodeAdded {
40 node_name: String,
41 node_type: String,
42 location: Option<String>,
43 },
44 NodeRemoved {
45 node_name: String,
46 node_type: String,
47 location: Option<String>,
48 },
49 NodeModified {
50 node_name: String,
51 old_type: String,
52 new_type: String,
53 changes: Vec<String>,
54 },
55 EdgeAdded {
56 source: String,
57 target: String,
58 edge_type: String,
59 },
60 EdgeRemoved {
61 source: String,
62 target: String,
63 edge_type: String,
64 },
65 EdgeModified {
66 source: String,
67 target: String,
68 old_type: String,
69 new_type: String,
70 },
71 StructuralChange {
72 description: String,
73 impact: StructuralImpact,
74 },
75}
76
77#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
79pub enum StructuralImpact {
80 Low,
81 Medium,
82 High,
83 Critical,
84}
85
86#[derive(Debug, Clone)]
88pub struct DiffReport {
89 pub differences: Vec<DiffType>,
90 pub statistics: DiffStatistics,
91 pub similarity_score: f64,
92 pub is_significant_change: bool,
93 pub summary: String,
94}
95
96#[derive(Debug, Clone, Default)]
98pub struct DiffStatistics {
99 pub nodes_added: usize,
100 pub nodes_removed: usize,
101 pub nodes_modified: usize,
102 pub edges_added: usize,
103 pub edges_removed: usize,
104 pub edges_modified: usize,
105 pub total_differences: usize,
106 pub similarity_percentage: f64,
107}
108
109impl AstDiff {
110 pub fn new() -> Self {
112 Self {
113 config: DiffConfig::default(),
114 }
115 }
116
117 pub fn with_config(config: DiffConfig) -> Self {
119 Self { config }
120 }
121
122 pub fn compare(
124 &self,
125 old_result: &ParseResult,
126 new_result: &ParseResult,
127 _source: &str,
128 ) -> Result<DiffReport> {
129 let mut differences = Vec::new();
130 let mut statistics = DiffStatistics::default();
131
132 self.compare_nodes(
134 &old_result.nodes,
135 &new_result.nodes,
136 &mut differences,
137 &mut statistics,
138 )?;
139
140 self.compare_edges(
142 &old_result.edges,
143 &new_result.edges,
144 &mut differences,
145 &mut statistics,
146 )?;
147
148 self.compare_tree_structures(&old_result.tree, &new_result.tree, &mut differences)?;
150
151 let similarity_score = self.calculate_similarity_score(&statistics, old_result, new_result);
153
154 let is_significant_change = similarity_score < self.config.similarity_threshold;
156
157 let summary = self.generate_summary(&statistics, similarity_score);
159
160 statistics.total_differences = differences.len();
161 statistics.similarity_percentage = similarity_score * 100.0;
162
163 Ok(DiffReport {
164 differences,
165 statistics,
166 similarity_score,
167 is_significant_change,
168 summary,
169 })
170 }
171
172 fn compare_nodes(
174 &self,
175 old_nodes: &[codeprism_core::Node],
176 new_nodes: &[codeprism_core::Node],
177 differences: &mut Vec<DiffType>,
178 statistics: &mut DiffStatistics,
179 ) -> Result<()> {
180 let old_node_map: HashMap<String, &codeprism_core::Node> = old_nodes
182 .iter()
183 .map(|n| (self.create_node_key(n), n))
184 .collect();
185
186 let new_node_map: HashMap<String, &codeprism_core::Node> = new_nodes
187 .iter()
188 .map(|n| (self.create_node_key(n), n))
189 .collect();
190
191 let old_keys: HashSet<_> = old_node_map.keys().collect();
192 let new_keys: HashSet<_> = new_node_map.keys().collect();
193
194 for key in new_keys.difference(&old_keys) {
196 if let Some(node) = new_node_map.get(*key) {
197 differences.push(DiffType::NodeAdded {
198 node_name: node.name.clone(),
199 node_type: format!("{:?}", node.kind),
200 location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
201 });
202 statistics.nodes_added += 1;
203 }
204 }
205
206 for key in old_keys.difference(&new_keys) {
208 if let Some(node) = old_node_map.get(*key) {
209 differences.push(DiffType::NodeRemoved {
210 node_name: node.name.clone(),
211 node_type: format!("{:?}", node.kind),
212 location: Some(format!("{}:{}", node.span.start_byte, node.span.end_byte)),
213 });
214 statistics.nodes_removed += 1;
215 }
216 }
217
218 for key in old_keys.intersection(&new_keys) {
220 if let (Some(old_node), Some(new_node)) =
221 (old_node_map.get(*key), new_node_map.get(*key))
222 {
223 let changes = self.compare_node_properties(old_node, new_node);
224 if !changes.is_empty() {
225 differences.push(DiffType::NodeModified {
226 node_name: old_node.name.clone(),
227 old_type: format!("{:?}", old_node.kind),
228 new_type: format!("{:?}", new_node.kind),
229 changes,
230 });
231 statistics.nodes_modified += 1;
232 }
233 }
234 }
235
236 Ok(())
237 }
238
239 fn compare_edges(
241 &self,
242 old_edges: &[codeprism_core::Edge],
243 new_edges: &[codeprism_core::Edge],
244 differences: &mut Vec<DiffType>,
245 statistics: &mut DiffStatistics,
246 ) -> Result<()> {
247 let old_edge_map: HashMap<String, &codeprism_core::Edge> = old_edges
248 .iter()
249 .map(|e| (self.create_edge_key(e), e))
250 .collect();
251
252 let new_edge_map: HashMap<String, &codeprism_core::Edge> = new_edges
253 .iter()
254 .map(|e| (self.create_edge_key(e), e))
255 .collect();
256
257 let old_keys: HashSet<_> = old_edge_map.keys().collect();
258 let new_keys: HashSet<_> = new_edge_map.keys().collect();
259
260 for key in new_keys.difference(&old_keys) {
262 if let Some(edge) = new_edge_map.get(*key) {
263 differences.push(DiffType::EdgeAdded {
264 source: edge.source.to_hex(),
265 target: edge.target.to_hex(),
266 edge_type: format!("{:?}", edge.kind),
267 });
268 statistics.edges_added += 1;
269 }
270 }
271
272 for key in old_keys.difference(&new_keys) {
274 if let Some(edge) = old_edge_map.get(*key) {
275 differences.push(DiffType::EdgeRemoved {
276 source: edge.source.to_hex(),
277 target: edge.target.to_hex(),
278 edge_type: format!("{:?}", edge.kind),
279 });
280 statistics.edges_removed += 1;
281 }
282 }
283
284 Ok(())
285 }
286
287 fn compare_tree_structures(
289 &self,
290 old_tree: &tree_sitter::Tree,
291 new_tree: &tree_sitter::Tree,
292 differences: &mut Vec<DiffType>,
293 ) -> Result<()> {
294 let old_root = old_tree.root_node();
295 let new_root = new_tree.root_node();
296
297 if old_root.kind() != new_root.kind() {
299 differences.push(DiffType::StructuralChange {
300 description: format!(
301 "Root node type changed from '{}' to '{}'",
302 old_root.kind(),
303 new_root.kind()
304 ),
305 impact: StructuralImpact::Critical,
306 });
307 }
308
309 let old_depth = self.calculate_tree_depth(&old_root);
311 let new_depth = self.calculate_tree_depth(&new_root);
312
313 if (old_depth as i32 - new_depth as i32).abs() > 2 {
314 differences.push(DiffType::StructuralChange {
315 description: format!("Significant tree depth change: {old_depth} -> {new_depth}"),
316 impact: if (old_depth as i32 - new_depth as i32).abs() > 5 {
317 StructuralImpact::High
318 } else {
319 StructuralImpact::Medium
320 },
321 });
322 }
323
324 Ok(())
325 }
326
327 fn create_node_key(&self, node: &codeprism_core::Node) -> String {
329 if self.config.ignore_node_ids {
330 if self.config.ignore_spans {
331 format!("{}:{:?}", node.name, node.kind)
332 } else {
333 format!(
334 "{}:{:?}:{}:{}",
335 node.name, node.kind, node.span.start_byte, node.span.end_byte
336 )
337 }
338 } else {
339 node.id.to_hex()
340 }
341 }
342
343 fn create_edge_key(&self, edge: &codeprism_core::Edge) -> String {
345 format!(
346 "{}->{}:{:?}",
347 edge.source.to_hex(),
348 edge.target.to_hex(),
349 edge.kind
350 )
351 }
352
353 fn compare_node_properties(
355 &self,
356 old_node: &codeprism_core::Node,
357 new_node: &codeprism_core::Node,
358 ) -> Vec<String> {
359 let mut changes = Vec::new();
360
361 if old_node.kind != new_node.kind {
362 changes.push(format!(
363 "Type changed: {:?} -> {:?}",
364 old_node.kind, new_node.kind
365 ));
366 }
367
368 if old_node.name != new_node.name {
369 changes.push(format!(
370 "Name changed: '{}' -> '{}'",
371 old_node.name, new_node.name
372 ));
373 }
374
375 if !self.config.ignore_spans
376 && (old_node.span.start_byte != new_node.span.start_byte
377 || old_node.span.end_byte != new_node.span.end_byte)
378 {
379 changes.push(format!(
380 "Span changed: {}..{} -> {}..{}",
381 old_node.span.start_byte,
382 old_node.span.end_byte,
383 new_node.span.start_byte,
384 new_node.span.end_byte
385 ));
386 }
387
388 changes
389 }
390
391 #[allow(clippy::only_used_in_recursion)] fn calculate_tree_depth(&self, node: &tree_sitter::Node) -> usize {
394 let mut max_depth = 0;
395 for i in 0..node.child_count() {
396 if let Some(child) = node.child(i) {
397 let child_depth = self.calculate_tree_depth(&child);
398 max_depth = max_depth.max(child_depth);
399 }
400 }
401 max_depth + 1
402 }
403
404 fn calculate_similarity_score(
406 &self,
407 statistics: &DiffStatistics,
408 old_result: &ParseResult,
409 new_result: &ParseResult,
410 ) -> f64 {
411 let total_old_items = old_result.nodes.len() + old_result.edges.len();
412 let total_new_items = new_result.nodes.len() + new_result.edges.len();
413 let max_items = total_old_items.max(total_new_items) as f64;
414
415 if max_items == 0.0 {
416 return 1.0; }
418
419 let total_changes = statistics.nodes_added
420 + statistics.nodes_removed
421 + statistics.nodes_modified
422 + statistics.edges_added
423 + statistics.edges_removed
424 + statistics.edges_modified;
425
426 let similarity = 1.0 - (total_changes as f64 / max_items);
427 similarity.max(0.0)
428 }
429
430 fn generate_summary(&self, statistics: &DiffStatistics, similarity_score: f64) -> String {
432 if statistics.total_differences == 0 {
433 return "No differences found - ASTs are identical".to_string();
434 }
435
436 let mut summary = format!(
437 "Found {} differences (similarity: {:.1}%)",
438 statistics.total_differences,
439 similarity_score * 100.0
440 );
441
442 let mut parts = Vec::new();
443
444 if statistics.nodes_added > 0 {
445 parts.push(format!("{} nodes added", statistics.nodes_added));
446 }
447 if statistics.nodes_removed > 0 {
448 parts.push(format!("{} nodes removed", statistics.nodes_removed));
449 }
450 if statistics.nodes_modified > 0 {
451 parts.push(format!("{} nodes modified", statistics.nodes_modified));
452 }
453 if statistics.edges_added > 0 {
454 parts.push(format!("{} edges added", statistics.edges_added));
455 }
456 if statistics.edges_removed > 0 {
457 parts.push(format!("{} edges removed", statistics.edges_removed));
458 }
459 if statistics.edges_modified > 0 {
460 parts.push(format!("{} edges modified", statistics.edges_modified));
461 }
462
463 if !parts.is_empty() {
464 summary.push_str(": ");
465 summary.push_str(&parts.join(", "));
466 }
467
468 summary
469 }
470}
471
472impl Default for AstDiff {
473 fn default() -> Self {
474 Self::new()
475 }
476}
477
478impl DiffReport {
479 pub fn format_report(&self) -> String {
481 let mut output = String::new();
482
483 output.push_str("=== AST Diff Report ===\n\n");
484 output.push_str(&format!("Summary: {}\n", self.summary));
485 output.push_str(&format!(
486 "Similarity Score: {:.1}%\n",
487 self.similarity_score * 100.0
488 ));
489
490 if self.is_significant_change {
491 output.push_str("⚠️ Significant changes detected!\n");
492 } else {
493 output.push_str("✅ Minor changes only\n");
494 }
495
496 output.push('\n');
497
498 if !self.differences.is_empty() {
499 output.push_str("## Detailed Changes:\n");
500 for (i, diff) in self.differences.iter().enumerate() {
501 output.push_str(&format!("{}. {}\n", i + 1, self.format_diff(diff)));
502 }
503 }
504
505 output.push_str("\n## Statistics:\n");
506 output.push_str(&format!("- Nodes added: {}\n", self.statistics.nodes_added));
507 output.push_str(&format!(
508 "- Nodes removed: {}\n",
509 self.statistics.nodes_removed
510 ));
511 output.push_str(&format!(
512 "- Nodes modified: {}\n",
513 self.statistics.nodes_modified
514 ));
515 output.push_str(&format!("- Edges added: {}\n", self.statistics.edges_added));
516 output.push_str(&format!(
517 "- Edges removed: {}\n",
518 self.statistics.edges_removed
519 ));
520 output.push_str(&format!(
521 "- Edges modified: {}\n",
522 self.statistics.edges_modified
523 ));
524 output.push_str(&format!(
525 "- Total differences: {}\n",
526 self.statistics.total_differences
527 ));
528
529 output
530 }
531
532 fn format_diff(&self, diff: &DiffType) -> String {
534 match diff {
535 DiffType::NodeAdded {
536 node_name,
537 node_type,
538 location,
539 } => {
540 format!(
541 "Added node '{}' ({}) at {}",
542 node_name,
543 node_type,
544 location.as_deref().unwrap_or("unknown")
545 )
546 }
547 DiffType::NodeRemoved {
548 node_name,
549 node_type,
550 location,
551 } => {
552 format!(
553 "Removed node '{}' ({}) from {}",
554 node_name,
555 node_type,
556 location.as_deref().unwrap_or("unknown")
557 )
558 }
559 DiffType::NodeModified {
560 node_name,
561 old_type,
562 new_type,
563 changes,
564 } => {
565 format!(
566 "Modified node '{}' ({} -> {}): {}",
567 node_name,
568 old_type,
569 new_type,
570 changes.join(", ")
571 )
572 }
573 DiffType::EdgeAdded {
574 source,
575 target,
576 edge_type,
577 } => {
578 format!("Added edge {source} -> {target} ({edge_type})")
579 }
580 DiffType::EdgeRemoved {
581 source,
582 target,
583 edge_type,
584 } => {
585 format!("Removed edge {source} -> {target} ({edge_type})")
586 }
587 DiffType::EdgeModified {
588 source,
589 target,
590 old_type,
591 new_type,
592 } => {
593 format!("Modified edge {source} -> {target} ({old_type} -> {new_type})")
594 }
595 DiffType::StructuralChange {
596 description,
597 impact,
598 } => {
599 format!("Structural change ({impact:?}): {description}")
600 }
601 }
602 }
603}
604
605#[cfg(test)]
606mod tests {
607 use super::*;
608 use codeprism_core::{NodeKind, Span};
609 use std::path::PathBuf;
610
611 fn create_test_node(_id: u64, name: &str, kind: NodeKind) -> codeprism_core::Node {
612 let path = PathBuf::from("test.rs");
613 let span = Span::new(0, 10, 1, 1, 1, 10);
614 let repo_id = "test_repo";
615
616 codeprism_core::Node {
617 id: codeprism_core::NodeId::new(repo_id, &path, &span, &kind),
618 kind,
619 name: name.to_string(),
620 file: path,
621 span,
622 lang: codeprism_core::Language::Rust,
623 metadata: Default::default(),
624 signature: Default::default(),
625 }
626 }
627
628 #[test]
629 fn test_ast_diff_creation() {
630 let diff = AstDiff::new();
631 assert!(diff.config.ignore_node_ids);
632 assert!(!diff.config.ignore_spans);
633 }
634
635 #[test]
636 fn test_create_node_key() {
637 let diff = AstDiff::new();
638 let node = create_test_node(1, "test", NodeKind::Function);
639 let key = diff.create_node_key(&node);
640 assert!(key.contains("test"));
641 assert!(key.contains("Function"));
642 }
643
644 #[test]
645 fn test_node_key_ignoring_ids() {
646 let config = DiffConfig {
647 ignore_node_ids: true,
648 ..Default::default()
649 };
650 let diff = AstDiff::with_config(config);
651
652 let node1 = create_test_node(1, "test", NodeKind::Function);
653 let node2 = create_test_node(2, "test", NodeKind::Function);
654
655 let key1 = diff.create_node_key(&node1);
656 let key2 = diff.create_node_key(&node2);
657
658 assert_eq!(key1, key2); }
660}