1use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35 ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36 DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37 Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48#[derive(Debug, Args)]
64pub struct DiffArgs {
65 pub file_a: PathBuf,
67
68 pub file_b: PathBuf,
70
71 #[arg(long, short = 'g', default_value = "function")]
73 pub granularity: DiffGranularity,
74
75 #[arg(long)]
77 pub semantic_only: bool,
78
79 #[arg(long, short = 'O')]
81 pub output: Option<PathBuf>,
82}
83
84#[derive(Debug, Clone)]
90struct ExtractedNode {
91 name: String,
93 kind: NodeKind,
95 line: u32,
97 end_line: u32,
99 column: u32,
101 body: String,
103 normalized_body: String,
105 params: String,
107 is_method: bool,
109}
110
111impl ExtractedNode {
112 fn new(
113 name: impl Into<String>,
114 kind: NodeKind,
115 line: u32,
116 end_line: u32,
117 column: u32,
118 body: impl Into<String>,
119 ) -> Self {
120 let body_str: String = body.into();
121 let normalized = normalize_body(&body_str);
122 Self {
123 name: name.into(),
124 kind,
125 line,
126 end_line,
127 column,
128 body: body_str,
129 normalized_body: normalized,
130 params: String::new(),
131 is_method: false,
132 }
133 }
134
135 fn with_params(mut self, params: impl Into<String>) -> Self {
136 self.params = params.into();
137 self
138 }
139
140 fn with_method_kind(mut self) -> Self {
141 self.is_method = true;
142 if self.kind == NodeKind::Function {
143 self.kind = NodeKind::Method;
144 }
145 self
146 }
147}
148
149fn normalize_body(body: &str) -> String {
153 body.lines()
154 .skip(1) .map(|line| {
156 let stripped = if let Some(pos) = line.find('#') {
158 let before_hash = &line[..pos];
161 let single_quotes = before_hash.matches('\'').count();
162 let double_quotes = before_hash.matches('"').count();
163 if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165 &line[..pos]
166 } else {
167 line
168 }
169 } else {
170 line
171 };
172 stripped.trim()
173 })
174 .filter(|line| !line.is_empty())
175 .collect::<Vec<_>>()
176 .join("\n")
177}
178
179impl DiffArgs {
184 pub fn run_to_report(&self) -> Result<DiffReport> {
190 if !self.file_a.exists() {
192 return Err(RemainingError::file_not_found(&self.file_a).into());
193 }
194 if !self.file_b.exists() {
195 return Err(RemainingError::file_not_found(&self.file_b).into());
196 }
197
198 match self.granularity {
199 DiffGranularity::File => {
200 if !self.file_a.is_dir() || !self.file_b.is_dir() {
202 bail!("File-level (L6) diff requires directories, not individual files");
203 }
204 run_file_level_diff(&self.file_a, &self.file_b)
205 }
206 DiffGranularity::Module => {
207 if !self.file_a.is_dir() || !self.file_b.is_dir() {
209 bail!("Module-level (L7) diff requires directories, not individual files");
210 }
211 run_module_level_diff(&self.file_a, &self.file_b)
212 }
213 DiffGranularity::Architecture => {
214 if !self.file_a.is_dir() || !self.file_b.is_dir() {
216 bail!(
217 "Architecture-level (L8) diff requires directories, not individual files"
218 );
219 }
220 run_arch_level_diff(&self.file_a, &self.file_b)
221 }
222 DiffGranularity::Class => {
223 if self.file_a.is_dir() && self.file_b.is_dir() {
225 run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226 } else {
227 run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228 }
229 }
230 DiffGranularity::Statement => {
231 self.run_statement_level_diff()
233 }
234 DiffGranularity::Token => {
235 self.run_token_level_diff()
237 }
238 DiffGranularity::Expression => {
239 self.run_expression_level_diff()
241 }
242 _ => {
243 self.run_function_level_diff()
245 }
246 }
247 }
248
249 pub fn run(&self, format: OutputFormat) -> Result<()> {
251 let report = self.run_to_report()?;
252
253 match format {
255 OutputFormat::Json => {
256 let json = serde_json::to_string_pretty(&report)?;
257 if let Some(ref output_path) = self.output {
258 fs::write(output_path, &json)?;
259 } else {
260 println!("{}", json);
261 }
262 }
263 OutputFormat::Text => {
264 let text = format_diff_text(&report);
265 if let Some(ref output_path) = self.output {
266 fs::write(output_path, &text)?;
267 } else {
268 println!("{}", text);
269 }
270 }
271 OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272 let json = serde_json::to_string_pretty(&report)?;
274 println!("{}", json);
275 }
276 }
277
278 Ok(())
279 }
280
281 fn run_function_level_diff(&self) -> Result<DiffReport> {
283 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285 let ext = self
286 .file_a
287 .extension()
288 .map(|e| e.to_string_lossy().to_string())
289 .unwrap_or_else(|| "unknown".to_string());
290 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291 })?;
292
293 let source_a = fs::read_to_string(&self.file_a)?;
295 let source_b = fs::read_to_string(&self.file_b)?;
296
297 let pool = ParserPool::new();
299 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301 })?;
302 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304 })?;
305
306 let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308 let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310 let changes = detect_changes(
312 &nodes_a,
313 &nodes_b,
314 &self.file_a,
315 &self.file_b,
316 self.semantic_only,
317 );
318
319 let mut summary = DiffSummary::default();
321 for change in &changes {
322 summary.total_changes += 1;
323 if change.change_type != ChangeType::Format {
324 summary.semantic_changes += 1;
325 }
326 match change.change_type {
327 ChangeType::Insert => summary.inserts += 1,
328 ChangeType::Delete => summary.deletes += 1,
329 ChangeType::Update => summary.updates += 1,
330 ChangeType::Move => summary.moves += 1,
331 ChangeType::Rename => summary.renames += 1,
332 ChangeType::Format => summary.formats += 1,
333 ChangeType::Extract => summary.extracts += 1,
334 ChangeType::Inline => {}
335 }
336 }
337
338 let report = DiffReport {
340 file_a: self.file_a.display().to_string(),
341 file_b: self.file_b.display().to_string(),
342 identical: changes.is_empty(),
343 changes,
344 summary: Some(summary),
345 granularity: self.granularity,
346 file_changes: None,
347 module_changes: None,
348 import_graph_summary: None,
349 arch_changes: None,
350 arch_summary: None,
351 };
352
353 Ok(report)
354 }
355
356 fn run_token_level_diff(&self) -> Result<DiffReport> {
365 use super::difftastic;
366 use typed_arena::Arena;
367
368 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370 let ext = self
371 .file_a
372 .extension()
373 .map(|e| e.to_string_lossy().to_string())
374 .unwrap_or_else(|| "unknown".to_string());
375 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376 })?;
377
378 let lhs_src = fs::read_to_string(&self.file_a)?;
380 let rhs_src = fs::read_to_string(&self.file_b)?;
381
382 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385 let pool = ParserPool::new();
387 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389 })?;
390 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392 })?;
393
394 let arena = Arena::new();
396 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398 );
399
400 let mut change_map = difftastic::changes::ChangeMap::default();
402
403 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406 for (lhs_chunk, rhs_chunk) in &chunks {
408 match (lhs_chunk.first(), rhs_chunk.first()) {
409 (Some(lhs_first), Some(rhs_first)) => {
410 if difftastic::dijkstra::mark_syntax(
411 Some(*lhs_first),
412 Some(*rhs_first),
413 &mut change_map,
414 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415 )
416 .is_err()
417 {
418 for node in lhs_chunk {
420 difftastic::changes::insert_deep_novel(node, &mut change_map);
421 }
422 for node in rhs_chunk {
423 difftastic::changes::insert_deep_novel(node, &mut change_map);
424 }
425 }
426 }
427 (Some(_), None) => {
428 for node in lhs_chunk {
430 difftastic::changes::insert_deep_novel(node, &mut change_map);
431 }
432 }
433 (None, Some(_)) => {
434 for node in rhs_chunk {
436 difftastic::changes::insert_deep_novel(node, &mut change_map);
437 }
438 }
439 (None, None) => {
440 }
442 }
443 }
444
445 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449 let fa = self.file_a.display().to_string();
451 let fb = self.file_b.display().to_string();
452 Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453 &lhs_nodes,
454 &rhs_nodes,
455 &change_map,
456 &fa,
457 &fb,
458 ))
459 }
460
461 fn run_expression_level_diff(&self) -> Result<DiffReport> {
467 use super::difftastic;
468 use typed_arena::Arena;
469
470 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472 let ext = self
473 .file_a
474 .extension()
475 .map(|e| e.to_string_lossy().to_string())
476 .unwrap_or_else(|| "unknown".to_string());
477 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478 })?;
479
480 let lhs_src = fs::read_to_string(&self.file_a)?;
482 let rhs_src = fs::read_to_string(&self.file_b)?;
483
484 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487 let pool = ParserPool::new();
489 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491 })?;
492 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494 })?;
495
496 let arena = Arena::new();
498 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500 );
501
502 let mut change_map = difftastic::changes::ChangeMap::default();
504
505 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508 for (lhs_chunk, rhs_chunk) in &chunks {
510 match (lhs_chunk.first(), rhs_chunk.first()) {
511 (Some(lhs_first), Some(rhs_first)) => {
512 if difftastic::dijkstra::mark_syntax(
513 Some(*lhs_first),
514 Some(*rhs_first),
515 &mut change_map,
516 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517 )
518 .is_err()
519 {
520 for node in lhs_chunk {
521 difftastic::changes::insert_deep_novel(node, &mut change_map);
522 }
523 for node in rhs_chunk {
524 difftastic::changes::insert_deep_novel(node, &mut change_map);
525 }
526 }
527 }
528 (Some(_), None) => {
529 for node in lhs_chunk {
530 difftastic::changes::insert_deep_novel(node, &mut change_map);
531 }
532 }
533 (None, Some(_)) => {
534 for node in rhs_chunk {
535 difftastic::changes::insert_deep_novel(node, &mut change_map);
536 }
537 }
538 (None, None) => {}
539 }
540 }
541
542 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546 let fa = self.file_a.display().to_string();
548 let fb = self.file_b.display().to_string();
549 Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550 &lhs_nodes,
551 &rhs_nodes,
552 &change_map,
553 &fa,
554 &fb,
555 ))
556 }
557}
558
559fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565 node.utf8_text(source).unwrap_or("")
566}
567
568fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570 match language {
571 Language::Python => &["class_definition"],
572 Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573 Language::Go => &["type_declaration"],
574 Language::Rust => &["struct_item", "enum_item", "impl_item"],
575 Language::Java => &[
576 "class_declaration",
577 "interface_declaration",
578 "enum_declaration",
579 ],
580 Language::C => &["struct_specifier", "enum_specifier"],
581 Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582 Language::Ruby => &["class", "module"],
583 Language::Php => &["class_declaration", "interface_declaration"],
584 Language::CSharp => &[
585 "class_declaration",
586 "interface_declaration",
587 "struct_declaration",
588 ],
589 Language::Kotlin => &["class_declaration", "object_declaration"],
590 Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591 Language::Swift => &[
592 "class_declaration",
593 "struct_declaration",
594 "protocol_declaration",
595 ],
596 Language::Elixir => &["call"], Language::Lua | Language::Luau => &[], Language::Ocaml => &["module_definition", "type_definition"],
599 }
600}
601
602fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604 match language {
605 Language::Python => &["block"],
606 Language::TypeScript | Language::JavaScript => &["class_body"],
607 Language::Go => &[], Language::Rust => &["declaration_list"], Language::Java => &["class_body"],
610 Language::C | Language::Cpp => &["field_declaration_list"],
611 Language::Ruby => &["body_statement"],
612 Language::Php => &["declaration_list"],
613 Language::CSharp => &["declaration_list"],
614 Language::Kotlin => &["class_body"],
615 Language::Scala => &["template_body"],
616 Language::Swift => &["class_body"],
617 Language::Elixir => &["do_block"],
618 Language::Lua | Language::Luau => &[],
619 Language::Ocaml => &[],
620 }
621}
622
623fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629 let mut nodes = Vec::new();
630 let kinds = NodeKindSets {
631 func: get_function_node_kinds(lang),
632 class: get_class_node_kinds(lang),
633 body: get_class_body_kinds(lang),
634 };
635 extract_nodes_recursive(
636 root,
637 source,
638 &mut nodes,
639 false,
640 lang,
641 &kinds,
642 );
643 nodes
644}
645
646struct NodeKindSets<'a> {
647 func: &'a [&'a str],
648 class: &'a [&'a str],
649 body: &'a [&'a str],
650}
651
652fn extract_nodes_recursive(
653 node: Node,
654 source: &[u8],
655 nodes: &mut Vec<ExtractedNode>,
656 in_class: bool,
657 lang: Language,
658 kinds: &NodeKindSets<'_>,
659) {
660 let kind = node.kind();
661
662 if kinds.func.contains(&kind) {
664 if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
665 nodes.push(extracted);
666 }
667 }
668 else if kinds.class.contains(&kind) {
670 if let Some(extracted) = extract_class_node(node, source, lang) {
671 nodes.push(extracted);
672 }
673 for child in node.children(&mut node.walk()) {
675 if kinds.body.contains(&child.kind()) {
676 extract_nodes_recursive(child, source, nodes, true, lang, kinds);
677 }
678 }
679 return; }
681
682 for child in node.children(&mut node.walk()) {
684 extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
685 }
686}
687
688fn extract_function_node(
689 node: Node,
690 source: &[u8],
691 is_method: bool,
692 lang: Language,
693) -> Option<ExtractedNode> {
694 let source_str = std::str::from_utf8(source).unwrap_or("");
696 let func_name = get_function_name(node, lang, source_str)?;
697
698 let params = node
700 .child_by_field_name("parameters")
701 .or_else(|| node.child_by_field_name("formal_parameters"))
702 .map(|p| node_text(p, source).to_string())
703 .unwrap_or_default();
704
705 let line = node.start_position().row as u32 + 1;
706 let end_line = node.end_position().row as u32 + 1;
707 let column = node.start_position().column as u32;
708 let body = node_text(node, source).to_string();
709
710 let mut extracted =
711 ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
712 .with_params(params);
713
714 if is_method {
715 extracted = extracted.with_method_kind();
716 }
717
718 Some(extracted)
719}
720
721fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
722 let class_name = node
724 .child_by_field_name("name")
725 .map(|n| node_text(n, source).to_string())
726 .or_else(|| {
727 let mut cursor = node.walk();
729 for child in node.children(&mut cursor) {
730 if child.kind() == "identifier"
731 || child.kind() == "type_identifier"
732 || child.kind() == "constant"
733 {
734 return Some(node_text(child, source).to_string());
735 }
736 }
737 None
738 })?;
739
740 if class_name.is_empty() {
742 return None;
743 }
744
745 if lang == Language::Elixir && node.kind() == "call" {
747 let first_child = node.child(0)?;
748 let first_text = node_text(first_child, source);
749 if first_text != "defmodule" {
750 return None;
751 }
752 if let Some(args) = node.child(1) {
754 let name = node_text(args, source).to_string();
755 if !name.is_empty() {
756 let line = node.start_position().row as u32 + 1;
757 let end_line = node.end_position().row as u32 + 1;
758 let column = node.start_position().column as u32;
759 let body = node_text(node, source).to_string();
760 return Some(ExtractedNode::new(
761 name,
762 NodeKind::Class,
763 line,
764 end_line,
765 column,
766 body,
767 ));
768 }
769 }
770 return None;
771 }
772
773 let line = node.start_position().row as u32 + 1;
774 let end_line = node.end_position().row as u32 + 1;
775 let column = node.start_position().column as u32;
776 let body = node_text(node, source).to_string();
777
778 Some(ExtractedNode::new(
779 class_name,
780 NodeKind::Class,
781 line,
782 end_line,
783 column,
784 body,
785 ))
786}
787
788fn detect_changes(
794 nodes_a: &[ExtractedNode],
795 nodes_b: &[ExtractedNode],
796 file_a: &Path,
797 file_b: &Path,
798 semantic_only: bool,
799) -> Vec<ASTChange> {
800 let mut changes = Vec::new();
801
802 let _map_a: HashMap<&str, &ExtractedNode> =
804 nodes_a.iter().map(|n| (n.name.as_str(), n)).collect();
805 let map_b: HashMap<&str, &ExtractedNode> =
806 nodes_b.iter().map(|n| (n.name.as_str(), n)).collect();
807
808 let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
810 let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
811
812 for (i, node_a) in nodes_a.iter().enumerate() {
814 let _ = node_a.end_line;
815 if let Some(&node_b) = map_b.get(node_a.name.as_str()) {
816 matched_a[i] = true;
818 if let Some(j) = nodes_b.iter().position(|n| n.name == node_a.name) {
819 matched_b[j] = true;
820 }
821
822 if node_a.normalized_body != node_b.normalized_body {
824 changes.push(ASTChange {
826 change_type: ChangeType::Update,
827 node_kind: node_a.kind,
828 name: Some(node_a.name.clone()),
829 old_location: Some(Location::with_column(
830 file_a.display().to_string(),
831 node_a.line,
832 node_a.column,
833 )),
834 new_location: Some(Location::with_column(
835 file_b.display().to_string(),
836 node_b.line,
837 node_b.column,
838 )),
839 old_text: Some(node_a.body.clone()),
840 new_text: Some(node_b.body.clone()),
841 similarity: Some(compute_similarity(
842 &node_a.normalized_body,
843 &node_b.normalized_body,
844 )),
845 children: None,
846 base_changes: None,
847 });
848 } else if node_a.line != node_b.line && !semantic_only {
849 changes.push(ASTChange {
851 change_type: ChangeType::Move,
852 node_kind: node_a.kind,
853 name: Some(node_a.name.clone()),
854 old_location: Some(Location::with_column(
855 file_a.display().to_string(),
856 node_a.line,
857 node_a.column,
858 )),
859 new_location: Some(Location::with_column(
860 file_b.display().to_string(),
861 node_b.line,
862 node_b.column,
863 )),
864 old_text: None,
865 new_text: None,
866 similarity: Some(1.0),
867 children: None,
868 base_changes: None,
869 });
870 }
871 }
872 }
873
874 let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
876 .iter()
877 .enumerate()
878 .filter(|(i, _)| !matched_a[*i])
879 .collect();
880 let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
881 .iter()
882 .enumerate()
883 .filter(|(i, _)| !matched_b[*i])
884 .collect();
885
886 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
888
889 for (_, node_a) in &unmatched_a {
890 let mut best_match: Option<(usize, f64)> = None;
891
892 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
893 if used_b[j] {
894 continue;
895 }
896 if node_a.kind != node_b.kind {
897 continue;
898 }
899
900 let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
901 if similarity >= RENAME_SIMILARITY_THRESHOLD
902 && (best_match.is_none() || similarity > best_match.unwrap().1)
903 {
904 best_match = Some((j, similarity));
905 }
906 }
907
908 if let Some((j, similarity)) = best_match {
909 let (_, node_b) = unmatched_b[j];
910 used_b[j] = true;
911
912 changes.push(ASTChange {
914 change_type: ChangeType::Rename,
915 node_kind: node_a.kind,
916 name: Some(node_a.name.clone()),
917 old_location: Some(Location::with_column(
918 file_a.display().to_string(),
919 node_a.line,
920 node_a.column,
921 )),
922 new_location: Some(Location::with_column(
923 file_b.display().to_string(),
924 node_b.line,
925 node_b.column,
926 )),
927 old_text: Some(node_a.name.clone()),
928 new_text: Some(node_b.name.clone()),
929 similarity: Some(similarity),
930 children: None,
931 base_changes: None,
932 });
933 }
934 }
935
936 for (_, node_a) in &unmatched_a {
938 let is_renamed = changes
940 .iter()
941 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
942 if !is_renamed {
943 changes.push(ASTChange {
944 change_type: ChangeType::Delete,
945 node_kind: node_a.kind,
946 name: Some(node_a.name.clone()),
947 old_location: Some(Location::with_column(
948 file_a.display().to_string(),
949 node_a.line,
950 node_a.column,
951 )),
952 new_location: None,
953 old_text: None,
954 new_text: None,
955 similarity: None,
956 children: None,
957 base_changes: None,
958 });
959 }
960 }
961
962 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
964 if !used_b[j] {
965 changes.push(ASTChange {
966 change_type: ChangeType::Insert,
967 node_kind: node_b.kind,
968 name: Some(node_b.name.clone()),
969 old_location: None,
970 new_location: Some(Location::with_column(
971 file_b.display().to_string(),
972 node_b.line,
973 node_b.column,
974 )),
975 old_text: None,
976 new_text: None,
977 similarity: None,
978 children: None,
979 base_changes: None,
980 });
981 }
982 }
983
984 changes.sort_by_key(|c| match c.change_type {
986 ChangeType::Delete => 0,
987 ChangeType::Rename => 1,
988 ChangeType::Update => 2,
989 ChangeType::Move => 3,
990 ChangeType::Insert => 4,
991 _ => 5,
992 });
993
994 changes
995}
996
997fn compute_similarity(a: &str, b: &str) -> f64 {
1004 if a == b {
1005 return 1.0;
1006 }
1007 if a.is_empty() || b.is_empty() {
1008 return 0.0;
1009 }
1010
1011 let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1013 let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1014
1015 let intersection = lines_a.intersection(&lines_b).count();
1016 let union = lines_a.union(&lines_b).count();
1017
1018 let line_sim = if union == 0 {
1019 0.0
1020 } else {
1021 intersection as f64 / union as f64
1022 };
1023
1024 if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1027 return char_jaccard_similarity(a, b);
1028 }
1029
1030 line_sim
1031}
1032
1033fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1035 if a.len() < 2 || b.len() < 2 {
1036 return if a == b { 1.0 } else { 0.0 };
1037 }
1038
1039 let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1040 let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1041
1042 let intersection = bigrams_a.intersection(&bigrams_b).count();
1043 let union = bigrams_a.union(&bigrams_b).count();
1044
1045 if union == 0 {
1046 0.0
1047 } else {
1048 intersection as f64 / union as f64
1049 }
1050}
1051
1052fn format_diff_text(report: &DiffReport) -> String {
1058 let mut out = String::new();
1059
1060 out.push_str("Diff Report\n");
1061 out.push_str("===========\n\n");
1062 out.push_str(&format!("File A: {}\n", report.file_a));
1063 out.push_str(&format!("File B: {}\n", report.file_b));
1064 out.push_str(&format!("Identical: {}\n\n", report.identical));
1065
1066 if report.identical {
1067 out.push_str("No structural changes detected.\n");
1068 return out;
1069 }
1070
1071 out.push_str("Changes:\n");
1072 out.push_str("--------\n");
1073
1074 for change in &report.changes {
1075 let change_type = match change.change_type {
1076 ChangeType::Insert => "+",
1077 ChangeType::Delete => "-",
1078 ChangeType::Update => "~",
1079 ChangeType::Move => ">",
1080 ChangeType::Rename => "R",
1081 ChangeType::Format => "F",
1082 ChangeType::Extract => "E",
1083 ChangeType::Inline => "I",
1084 };
1085
1086 let kind = match change.node_kind {
1087 NodeKind::Function => "function",
1088 NodeKind::Class => "class",
1089 NodeKind::Method => "method",
1090 NodeKind::Field => "field",
1091 NodeKind::Statement => "statement",
1092 NodeKind::Expression => "expression",
1093 NodeKind::Block => "block",
1094 };
1095
1096 let name = change.name.as_deref().unwrap_or("<unknown>");
1097
1098 match change.change_type {
1099 ChangeType::Insert => {
1100 if let Some(ref loc) = change.new_location {
1101 out.push_str(&format!(
1102 " {} {} {} at {}:{}\n",
1103 change_type, kind, name, loc.file, loc.line
1104 ));
1105 }
1106 }
1107 ChangeType::Delete => {
1108 if let Some(ref loc) = change.old_location {
1109 out.push_str(&format!(
1110 " {} {} {} at {}:{}\n",
1111 change_type, kind, name, loc.file, loc.line
1112 ));
1113 }
1114 }
1115 ChangeType::Update | ChangeType::Move => {
1116 if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1117 {
1118 out.push_str(&format!(
1119 " {} {} {} from {}:{} to {}:{}\n",
1120 change_type, kind, name, old.file, old.line, new.file, new.line
1121 ));
1122 }
1123 }
1124 ChangeType::Rename => {
1125 let old_name = change.old_text.as_deref().unwrap_or(name);
1126 let new_name = change.new_text.as_deref().unwrap_or(name);
1127 out.push_str(&format!(
1128 " {} {} {} -> {}\n",
1129 change_type, kind, old_name, new_name
1130 ));
1131 }
1132 _ => {
1133 out.push_str(&format!(" {} {} {}\n", change_type, kind, name));
1134 }
1135 }
1136 }
1137
1138 if let Some(ref summary) = report.summary {
1139 out.push_str("\nSummary:\n");
1140 out.push_str("--------\n");
1141 out.push_str(&format!(" Total changes: {}\n", summary.total_changes));
1142 out.push_str(&format!(
1143 " Semantic changes: {}\n",
1144 summary.semantic_changes
1145 ));
1146 out.push_str(&format!(" Inserts: {}\n", summary.inserts));
1147 out.push_str(&format!(" Deletes: {}\n", summary.deletes));
1148 out.push_str(&format!(" Updates: {}\n", summary.updates));
1149 out.push_str(&format!(" Renames: {}\n", summary.renames));
1150 out.push_str(&format!(" Moves: {}\n", summary.moves));
1151 }
1152
1153 if let Some(ref file_changes) = report.file_changes {
1155 out.push_str("\nFile-Level Changes:\n");
1156 out.push_str("-------------------\n");
1157 for fc in file_changes {
1158 let change_type = match fc.change_type {
1159 ChangeType::Insert => "+",
1160 ChangeType::Delete => "-",
1161 ChangeType::Update => "~",
1162 _ => "?",
1163 };
1164 out.push_str(&format!(" {} {}\n", change_type, fc.relative_path));
1165 if let Some(ref sigs) = fc.signature_changes {
1166 for sig in sigs {
1167 out.push_str(&format!(" changed: {}\n", sig));
1168 }
1169 }
1170 }
1171 }
1172
1173 if let Some(ref module_changes) = report.module_changes {
1175 out.push_str("\nModule-Level Changes:\n");
1176 out.push_str("---------------------\n");
1177 for mc in module_changes {
1178 let change_type = match mc.change_type {
1179 ChangeType::Insert => "+",
1180 ChangeType::Delete => "-",
1181 ChangeType::Update => "~",
1182 _ => "?",
1183 };
1184 out.push_str(&format!(" {} {}\n", change_type, mc.module_path));
1185 for edge in &mc.imports_added {
1186 let names = if edge.imported_names.is_empty() {
1187 String::new()
1188 } else {
1189 format!(" ({})", edge.imported_names.join(", "))
1190 };
1191 out.push_str(&format!(" + import {}{}\n", edge.target_module, names));
1192 }
1193 for edge in &mc.imports_removed {
1194 let names = if edge.imported_names.is_empty() {
1195 String::new()
1196 } else {
1197 format!(" ({})", edge.imported_names.join(", "))
1198 };
1199 out.push_str(&format!(" - import {}{}\n", edge.target_module, names));
1200 }
1201 }
1202 }
1203
1204 if let Some(ref igs) = report.import_graph_summary {
1206 out.push_str("\nImport Graph Summary:\n");
1207 out.push_str("---------------------\n");
1208 out.push_str(&format!(" Edges in A: {}\n", igs.total_edges_a));
1209 out.push_str(&format!(" Edges in B: {}\n", igs.total_edges_b));
1210 out.push_str(&format!(" Edges added: {}\n", igs.edges_added));
1211 out.push_str(&format!(" Edges removed: {}\n", igs.edges_removed));
1212 out.push_str(&format!(
1213 " Modules with import changes: {}\n",
1214 igs.modules_with_import_changes
1215 ));
1216 }
1217
1218 if let Some(ref arch_changes) = report.arch_changes {
1220 out.push_str("\nArchitecture-Level Changes:\n");
1221 out.push_str("---------------------------\n");
1222 for ac in arch_changes {
1223 let change_label = match ac.change_type {
1224 ArchChangeType::LayerMigration => "migration",
1225 ArchChangeType::Added => "added",
1226 ArchChangeType::Removed => "removed",
1227 ArchChangeType::CompositionChanged => "composition changed",
1228 ArchChangeType::CycleIntroduced => "cycle introduced",
1229 ArchChangeType::CycleResolved => "cycle resolved",
1230 };
1231 out.push_str(&format!(" [{}] {}\n", change_label, ac.directory));
1232 if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1233 out.push_str(&format!(" {} -> {}\n", old, new));
1234 } else if let Some(ref new) = ac.new_layer {
1235 out.push_str(&format!(" -> {}\n", new));
1236 } else if let Some(ref old) = ac.old_layer {
1237 out.push_str(&format!(" {} ->\n", old));
1238 }
1239 if !ac.migrated_functions.is_empty() {
1240 out.push_str(&format!(
1241 " migrated: {}\n",
1242 ac.migrated_functions.join(", ")
1243 ));
1244 }
1245 }
1246 }
1247
1248 if let Some(ref arch_summary) = report.arch_summary {
1250 out.push_str("\nArchitecture Summary:\n");
1251 out.push_str("---------------------\n");
1252 out.push_str(&format!(
1253 " Layer migrations: {}\n",
1254 arch_summary.layer_migrations
1255 ));
1256 out.push_str(&format!(
1257 " Directories added: {}\n",
1258 arch_summary.directories_added
1259 ));
1260 out.push_str(&format!(
1261 " Directories removed: {}\n",
1262 arch_summary.directories_removed
1263 ));
1264 out.push_str(&format!(
1265 " Cycles introduced: {}\n",
1266 arch_summary.cycles_introduced
1267 ));
1268 out.push_str(&format!(
1269 " Cycles resolved: {}\n",
1270 arch_summary.cycles_resolved
1271 ));
1272 out.push_str(&format!(
1273 " Stability score: {}\n",
1274 arch_summary.stability_score
1275 ));
1276 }
1277
1278 out
1279}
1280
1281fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1287 match lang {
1288 Language::Python => &[
1289 "return_statement",
1290 "if_statement",
1291 "for_statement",
1292 "while_statement",
1293 "expression_statement",
1294 "assert_statement",
1295 "raise_statement",
1296 "try_statement",
1297 "with_statement",
1298 "assignment",
1299 "augmented_assignment",
1300 "delete_statement",
1301 "pass_statement",
1302 "break_statement",
1303 "continue_statement",
1304 ],
1305 Language::TypeScript | Language::JavaScript => &[
1306 "return_statement",
1307 "if_statement",
1308 "for_statement",
1309 "for_in_statement",
1310 "while_statement",
1311 "do_statement",
1312 "expression_statement",
1313 "variable_declaration",
1314 "lexical_declaration",
1315 "throw_statement",
1316 "try_statement",
1317 "switch_statement",
1318 "break_statement",
1319 "continue_statement",
1320 ],
1321 Language::Go => &[
1322 "return_statement",
1323 "if_statement",
1324 "for_statement",
1325 "expression_statement",
1326 "short_var_declaration",
1327 "var_declaration",
1328 "assignment_statement",
1329 "go_statement",
1330 "defer_statement",
1331 "select_statement",
1332 "switch_statement",
1333 ],
1334 Language::Rust => &[
1335 "let_declaration",
1336 "expression_statement",
1337 "return_expression",
1338 "if_expression",
1339 "for_expression",
1340 "while_expression",
1341 "loop_expression",
1342 "match_expression",
1343 ],
1344 Language::Java => &[
1345 "return_statement",
1346 "if_statement",
1347 "for_statement",
1348 "enhanced_for_statement",
1349 "while_statement",
1350 "do_statement",
1351 "expression_statement",
1352 "local_variable_declaration",
1353 "throw_statement",
1354 "try_statement",
1355 "switch_expression",
1356 ],
1357 Language::C | Language::Cpp => &[
1358 "return_statement",
1359 "if_statement",
1360 "for_statement",
1361 "while_statement",
1362 "do_statement",
1363 "expression_statement",
1364 "declaration",
1365 "switch_statement",
1366 ],
1367 Language::Ruby => &[
1368 "return",
1369 "if",
1370 "unless",
1371 "for",
1372 "while",
1373 "until",
1374 "assignment",
1375 "call",
1376 "begin",
1377 ],
1378 Language::Php => &[
1379 "return_statement",
1380 "if_statement",
1381 "for_statement",
1382 "foreach_statement",
1383 "while_statement",
1384 "expression_statement",
1385 "echo_statement",
1386 "throw_expression",
1387 "try_statement",
1388 ],
1389 Language::CSharp => &[
1390 "return_statement",
1391 "if_statement",
1392 "for_statement",
1393 "foreach_statement",
1394 "while_statement",
1395 "expression_statement",
1396 "local_declaration_statement",
1397 "throw_statement",
1398 "try_statement",
1399 ],
1400 Language::Kotlin => &[
1401 "property_declaration",
1402 "assignment",
1403 "if_expression",
1404 "for_statement",
1405 "while_statement",
1406 "do_while_statement",
1407 "return_expression",
1408 "throw_expression",
1409 "try_expression",
1410 ],
1411 Language::Scala => &[
1412 "val_definition",
1413 "var_definition",
1414 "if_expression",
1415 "for_expression",
1416 "while_expression",
1417 "return_expression",
1418 "throw_expression",
1419 "try_expression",
1420 "call_expression",
1421 ],
1422 Language::Swift => &[
1423 "value_binding_pattern",
1424 "if_statement",
1425 "for_in_statement",
1426 "while_statement",
1427 "return_statement",
1428 "throw_statement",
1429 "guard_statement",
1430 "switch_statement",
1431 ],
1432 Language::Elixir => &["call", "if", "case", "cond"],
1433 Language::Lua | Language::Luau => &[
1434 "return_statement",
1435 "if_statement",
1436 "for_statement",
1437 "while_statement",
1438 "variable_declaration",
1439 "assignment_statement",
1440 "function_call",
1441 ],
1442 Language::Ocaml => &[
1443 "let_binding",
1444 "if_expression",
1445 "match_expression",
1446 "application",
1447 ],
1448 }
1449}
1450
1451#[derive(Debug, Clone)]
1453struct LabeledTreeNode {
1454 label: String,
1456 children: Vec<LabeledTreeNode>,
1458 line: u32,
1460}
1461
1462#[derive(Debug, Clone)]
1464struct PostorderNode {
1465 label: String,
1466 line: u32,
1467 leftmost_leaf: usize,
1469}
1470
1471#[derive(Debug, Clone)]
1473enum EditOp {
1474 Delete { index_a: usize },
1476 Insert { index_b: usize },
1478 Relabel { index_a: usize, index_b: usize },
1480}
1481
1482fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1488 let label = build_node_label(node, source);
1489 let line = node.start_position().row as u32 + 1;
1490
1491 let mut children = Vec::new();
1492 let mut cursor = node.walk();
1493 for child in node.children(&mut cursor) {
1494 if statement_kinds.contains(&child.kind()) {
1495 children.push(build_labeled_tree(child, source, statement_kinds));
1497 } else {
1498 let nested = collect_nested_statements(child, source, statement_kinds);
1500 children.extend(nested);
1501 }
1502 }
1503
1504 LabeledTreeNode {
1505 label,
1506 children,
1507 line,
1508 }
1509}
1510
1511fn collect_nested_statements(
1513 node: Node,
1514 source: &[u8],
1515 statement_kinds: &[&str],
1516) -> Vec<LabeledTreeNode> {
1517 let mut result = Vec::new();
1518 let mut cursor = node.walk();
1519 for child in node.children(&mut cursor) {
1520 if statement_kinds.contains(&child.kind()) {
1521 result.push(build_labeled_tree(child, source, statement_kinds));
1522 } else {
1523 result.extend(collect_nested_statements(child, source, statement_kinds));
1524 }
1525 }
1526 result
1527}
1528
1529fn build_node_label(node: Node, source: &[u8]) -> String {
1534 let kind = node.kind();
1535 let text = node.utf8_text(source).unwrap_or("");
1536
1537 let first_line = text.lines().next().unwrap_or("").trim();
1540
1541 let significant = if first_line.len() > 120 {
1543 &first_line[..120]
1544 } else {
1545 first_line
1546 };
1547
1548 format!("{}:{}", kind, significant)
1549}
1550
1551fn extract_statement_tree(
1556 func_node: Node,
1557 source: &[u8],
1558 lang: Language,
1559 statement_kinds: &[&str],
1560) -> LabeledTreeNode {
1561 let body_node = find_function_body(func_node, lang);
1563
1564 match body_node {
1565 Some(body) => {
1566 let mut children = Vec::new();
1568 let mut cursor = body.walk();
1569 for child in body.children(&mut cursor) {
1570 if statement_kinds.contains(&child.kind()) {
1571 children.push(build_labeled_tree(child, source, statement_kinds));
1572 } else {
1573 children.extend(collect_nested_statements(child, source, statement_kinds));
1574 }
1575 }
1576
1577 LabeledTreeNode {
1578 label: format!("body:{}", func_node.kind()),
1579 children,
1580 line: body.start_position().row as u32 + 1,
1581 }
1582 }
1583 None => {
1584 build_labeled_tree(func_node, source, statement_kinds)
1586 }
1587 }
1588}
1589
1590fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1592 if let Some(body) = func_node.child_by_field_name("body") {
1594 return Some(body);
1595 }
1596 if let Some(body) = func_node.child_by_field_name("block") {
1597 return Some(body);
1598 }
1599
1600 let body_kinds = match lang {
1602 Language::Python => &["block"][..],
1603 Language::TypeScript | Language::JavaScript => &["statement_block"],
1604 Language::Go => &["block"],
1605 Language::Rust => &["block"],
1606 Language::Java => &["block"],
1607 Language::C | Language::Cpp => &["compound_statement"],
1608 Language::Ruby => &["body_statement"],
1609 Language::Php => &["compound_statement"],
1610 Language::CSharp => &["block"],
1611 Language::Kotlin => &["function_body"],
1612 Language::Scala => &["block", "indented_block"],
1613 Language::Swift => &["function_body"],
1614 Language::Elixir => &["do_block"],
1615 Language::Lua | Language::Luau => &["block"],
1616 Language::Ocaml => &["let_binding"],
1617 };
1618
1619 let mut cursor = func_node.walk();
1620 let found = func_node
1621 .children(&mut cursor)
1622 .find(|&child| body_kinds.contains(&child.kind()));
1623 found
1624}
1625
1626fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1628 1 + tree
1629 .children
1630 .iter()
1631 .map(count_tree_nodes)
1632 .sum::<usize>()
1633}
1634
1635fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1641 let mut nodes = Vec::new();
1642 flatten_postorder_recursive(tree, &mut nodes);
1643 nodes
1644}
1645
1646fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1647 if tree.children.is_empty() {
1648 let idx = nodes.len();
1650 nodes.push(PostorderNode {
1651 label: tree.label.clone(),
1652 line: tree.line,
1653 leftmost_leaf: idx,
1654 });
1655 return idx;
1656 }
1657
1658 let mut first_child_leftmost = usize::MAX;
1660 for (i, child) in tree.children.iter().enumerate() {
1661 let child_leftmost = flatten_postorder_recursive(child, nodes);
1662 if i == 0 {
1663 first_child_leftmost = child_leftmost;
1664 }
1665 }
1666
1667 nodes.push(PostorderNode {
1669 label: tree.label.clone(),
1670 line: tree.line,
1671 leftmost_leaf: first_child_leftmost,
1672 });
1673
1674 first_child_leftmost
1676}
1677
1678fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1684 let n = nodes.len();
1685 if n == 0 {
1686 return Vec::new();
1687 }
1688
1689 let mut lr_map: HashMap<usize, usize> = HashMap::new();
1691 for (i, node) in nodes.iter().enumerate() {
1692 lr_map.insert(node.leftmost_leaf, i);
1693 }
1694
1695 let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1696 keyroots.sort();
1697 keyroots
1698}
1699
1700fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1706 let na = nodes_a.len();
1707 let nb = nodes_b.len();
1708
1709 if na == 0 && nb == 0 {
1710 return Vec::new();
1711 }
1712 if na == 0 {
1713 return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1715 }
1716 if nb == 0 {
1717 return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1719 }
1720
1721 let keyroots_a = compute_keyroots(nodes_a);
1722 let keyroots_b = compute_keyroots(nodes_b);
1723
1724 let mut td = vec![vec![0usize; nb + 1]; na + 1];
1726 let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1728
1729 for &kr_a in &keyroots_a {
1730 for &kr_b in &keyroots_b {
1731 let la = nodes_a[kr_a].leftmost_leaf;
1732 let lb = nodes_b[kr_b].leftmost_leaf;
1733
1734 let rows = kr_a - la + 2;
1735 let cols = kr_b - lb + 2;
1736 let mut fd = vec![vec![0usize; cols]; rows];
1737
1738 for i in 1..rows {
1740 fd[i][0] = fd[i - 1][0] + 1;
1741 }
1742 for j in 1..cols {
1743 fd[0][j] = fd[0][j - 1] + 1;
1744 }
1745
1746 for i in 1..rows {
1747 for j in 1..cols {
1748 let idx_a = la + i - 1;
1749 let idx_b = lb + j - 1;
1750
1751 let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1752 0
1753 } else {
1754 1
1755 };
1756
1757 if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1758 let delete = fd[i - 1][j] + 1;
1759 let insert = fd[i][j - 1] + 1;
1760 let relabel = fd[i - 1][j - 1] + cost_relabel;
1761
1762 if relabel <= delete && relabel <= insert {
1763 fd[i][j] = relabel;
1764 td[idx_a + 1][idx_b + 1] = relabel;
1765 td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1766 } else if delete <= insert {
1767 fd[i][j] = delete;
1768 td[idx_a + 1][idx_b + 1] = delete;
1769 td_ops[idx_a + 1][idx_b + 1] = 1;
1770 } else {
1771 fd[i][j] = insert;
1772 td[idx_a + 1][idx_b + 1] = insert;
1773 td_ops[idx_a + 1][idx_b + 1] = 2;
1774 }
1775 } else {
1776 let p = nodes_a[idx_a].leftmost_leaf - la;
1777 let q = nodes_b[idx_b].leftmost_leaf - lb;
1778
1779 let delete = fd[i - 1][j] + 1;
1780 let insert = fd[i][j - 1] + 1;
1781 let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1782
1783 if tree_match <= delete && tree_match <= insert {
1784 fd[i][j] = tree_match;
1785 } else if delete <= insert {
1786 fd[i][j] = delete;
1787 } else {
1788 fd[i][j] = insert;
1789 }
1790 }
1791 }
1792 }
1793 }
1794 }
1795
1796 let mut ops = Vec::new();
1799 derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1800 ops
1801}
1802
1803fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1808 let na = nodes_a.len();
1809 let nb = nodes_b.len();
1810
1811 let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1812 let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1813
1814 for i in 1..=na {
1815 dp[i][0] = i;
1816 choice[i][0] = 1;
1817 }
1818 for j in 1..=nb {
1819 dp[0][j] = j;
1820 choice[0][j] = 2;
1821 }
1822
1823 for i in 1..=na {
1824 for j in 1..=nb {
1825 let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1826 0
1827 } else {
1828 1
1829 };
1830
1831 let del = dp[i - 1][j] + 1;
1832 let ins = dp[i][j - 1] + 1;
1833 let sub = dp[i - 1][j - 1] + cost;
1834
1835 if sub <= del && sub <= ins {
1836 dp[i][j] = sub;
1837 choice[i][j] = if cost == 0 { 0 } else { 3 };
1838 } else if del <= ins {
1839 dp[i][j] = del;
1840 choice[i][j] = 1;
1841 } else {
1842 dp[i][j] = ins;
1843 choice[i][j] = 2;
1844 }
1845 }
1846 }
1847
1848 let mut i = na;
1850 let mut j = nb;
1851 let mut rev_ops = Vec::new();
1852
1853 while i > 0 || j > 0 {
1854 if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1855 if choice[i][j] == 3 {
1856 rev_ops.push(EditOp::Relabel {
1857 index_a: i - 1,
1858 index_b: j - 1,
1859 });
1860 }
1861 i -= 1;
1862 j -= 1;
1863 } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1864 rev_ops.push(EditOp::Delete { index_a: i - 1 });
1865 i -= 1;
1866 } else if j > 0 {
1867 rev_ops.push(EditOp::Insert { index_b: j - 1 });
1868 j -= 1;
1869 }
1870 }
1871
1872 rev_ops.reverse();
1873 ops.extend(rev_ops);
1874}
1875
1876fn edit_ops_to_ast_changes(
1878 ops: &[EditOp],
1879 nodes_a: &[PostorderNode],
1880 nodes_b: &[PostorderNode],
1881 file_a: &Path,
1882 file_b: &Path,
1883) -> Vec<ASTChange> {
1884 let mut changes = Vec::new();
1885
1886 for op in ops {
1887 match op {
1888 EditOp::Delete { index_a } => {
1889 let node = &nodes_a[*index_a];
1890 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1891 changes.push(ASTChange {
1892 change_type: ChangeType::Delete,
1893 node_kind: NodeKind::Statement,
1894 name: Some(stmt_kind.to_string()),
1895 old_location: Some(Location::new(file_a.display().to_string(), node.line)),
1896 new_location: None,
1897 old_text: Some(node.label.clone()),
1898 new_text: None,
1899 similarity: None,
1900 children: None,
1901 base_changes: None,
1902 });
1903 }
1904 EditOp::Insert { index_b } => {
1905 let node = &nodes_b[*index_b];
1906 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1907 changes.push(ASTChange {
1908 change_type: ChangeType::Insert,
1909 node_kind: NodeKind::Statement,
1910 name: Some(stmt_kind.to_string()),
1911 old_location: None,
1912 new_location: Some(Location::new(file_b.display().to_string(), node.line)),
1913 old_text: None,
1914 new_text: Some(node.label.clone()),
1915 similarity: None,
1916 children: None,
1917 base_changes: None,
1918 });
1919 }
1920 EditOp::Relabel { index_a, index_b } => {
1921 let node_a = &nodes_a[*index_a];
1922 let node_b = &nodes_b[*index_b];
1923 let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
1924 changes.push(ASTChange {
1925 change_type: ChangeType::Update,
1926 node_kind: NodeKind::Statement,
1927 name: Some(stmt_kind.to_string()),
1928 old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
1929 new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
1930 old_text: Some(node_a.label.clone()),
1931 new_text: Some(node_b.label.clone()),
1932 similarity: None,
1933 children: None,
1934 base_changes: None,
1935 });
1936 }
1937 }
1938 }
1939
1940 changes
1941}
1942
1943const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
1945
1946impl DiffArgs {
1947 fn run_statement_level_diff(&self) -> Result<DiffReport> {
1959 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
1961 let ext = self
1962 .file_a
1963 .extension()
1964 .map(|e| e.to_string_lossy().to_string())
1965 .unwrap_or_else(|| "unknown".to_string());
1966 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
1967 })?;
1968
1969 let source_a = fs::read_to_string(&self.file_a)?;
1971 let source_b = fs::read_to_string(&self.file_b)?;
1972
1973 let pool = ParserPool::new();
1975 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
1976 RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
1977 })?;
1978 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
1979 RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
1980 })?;
1981
1982 let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
1984 let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
1985
1986 let statement_kinds = get_statement_node_kinds(lang);
1987
1988 let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
1990 .iter()
1991 .enumerate()
1992 .map(|(i, n)| (n.name.as_str(), (i, n)))
1993 .collect();
1994
1995 let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
1996 let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
1997 let mut changes = Vec::new();
1998
1999 for (i, func_a) in funcs_a.iter().enumerate() {
2001 if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2002 matched_a[i] = true;
2003 matched_b[j] = true;
2004
2005 if func_a.normalized_body != func_b.normalized_body {
2007 let func_node_a =
2009 find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2010 let func_node_b =
2011 find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2012
2013 let stmt_children = match (func_node_a, func_node_b) {
2014 (Some(node_a), Some(node_b)) => {
2015 let tree_a_stmts = extract_statement_tree(
2017 node_a,
2018 source_a.as_bytes(),
2019 lang,
2020 statement_kinds,
2021 );
2022 let tree_b_stmts = extract_statement_tree(
2023 node_b,
2024 source_b.as_bytes(),
2025 lang,
2026 statement_kinds,
2027 );
2028
2029 let count_a = count_tree_nodes(&tree_a_stmts);
2030 let count_b = count_tree_nodes(&tree_b_stmts);
2031
2032 if count_a > STATEMENT_FALLBACK_THRESHOLD
2034 || count_b > STATEMENT_FALLBACK_THRESHOLD
2035 {
2036 None
2038 } else {
2039 let po_a = flatten_postorder(&tree_a_stmts);
2041 let po_b = flatten_postorder(&tree_b_stmts);
2042
2043 let edit_ops = zhang_shasha(&po_a, &po_b);
2044
2045 if edit_ops.is_empty() {
2046 None
2047 } else {
2048 let stmt_changes = edit_ops_to_ast_changes(
2049 &edit_ops,
2050 &po_a,
2051 &po_b,
2052 &self.file_a,
2053 &self.file_b,
2054 );
2055 if stmt_changes.is_empty() {
2056 None
2057 } else {
2058 Some(stmt_changes)
2059 }
2060 }
2061 }
2062 }
2063 _ => None,
2064 };
2065
2066 changes.push(ASTChange {
2067 change_type: ChangeType::Update,
2068 node_kind: func_a.kind,
2069 name: Some(func_a.name.clone()),
2070 old_location: Some(Location::with_column(
2071 self.file_a.display().to_string(),
2072 func_a.line,
2073 func_a.column,
2074 )),
2075 new_location: Some(Location::with_column(
2076 self.file_b.display().to_string(),
2077 func_b.line,
2078 func_b.column,
2079 )),
2080 old_text: Some(func_a.body.clone()),
2081 new_text: Some(func_b.body.clone()),
2082 similarity: Some(compute_similarity(
2083 &func_a.normalized_body,
2084 &func_b.normalized_body,
2085 )),
2086 children: stmt_children,
2087 base_changes: None,
2088 });
2089 }
2090 }
2091 }
2092
2093 let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2095 .iter()
2096 .enumerate()
2097 .filter(|(i, _)| !matched_a[*i])
2098 .collect();
2099 let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2100 .iter()
2101 .enumerate()
2102 .filter(|(i, _)| !matched_b[*i])
2103 .collect();
2104
2105 let mut used_b = vec![false; unmatched_b.len()];
2106
2107 for (_, func_a) in &unmatched_a {
2108 let mut best_match: Option<(usize, f64)> = None;
2109 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2110 if used_b[j] || func_a.kind != func_b.kind {
2111 continue;
2112 }
2113 let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2114 if sim >= RENAME_SIMILARITY_THRESHOLD
2115 && (best_match.is_none() || sim > best_match.unwrap().1)
2116 {
2117 best_match = Some((j, sim));
2118 }
2119 }
2120
2121 if let Some((j, sim)) = best_match {
2122 let (_, func_b) = unmatched_b[j];
2123 used_b[j] = true;
2124 changes.push(ASTChange {
2125 change_type: ChangeType::Rename,
2126 node_kind: func_a.kind,
2127 name: Some(func_a.name.clone()),
2128 old_location: Some(Location::with_column(
2129 self.file_a.display().to_string(),
2130 func_a.line,
2131 func_a.column,
2132 )),
2133 new_location: Some(Location::with_column(
2134 self.file_b.display().to_string(),
2135 func_b.line,
2136 func_b.column,
2137 )),
2138 old_text: Some(func_a.name.clone()),
2139 new_text: Some(func_b.name.clone()),
2140 similarity: Some(sim),
2141 children: None,
2142 base_changes: None,
2143 });
2144 }
2145 }
2146
2147 for (_, func_a) in &unmatched_a {
2149 let is_renamed = changes.iter().any(|c| {
2150 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2151 });
2152 if !is_renamed {
2153 changes.push(ASTChange {
2154 change_type: ChangeType::Delete,
2155 node_kind: func_a.kind,
2156 name: Some(func_a.name.clone()),
2157 old_location: Some(Location::with_column(
2158 self.file_a.display().to_string(),
2159 func_a.line,
2160 func_a.column,
2161 )),
2162 new_location: None,
2163 old_text: None,
2164 new_text: None,
2165 similarity: None,
2166 children: None,
2167 base_changes: None,
2168 });
2169 }
2170 }
2171
2172 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2174 if !used_b[j] {
2175 changes.push(ASTChange {
2176 change_type: ChangeType::Insert,
2177 node_kind: func_b.kind,
2178 name: Some(func_b.name.clone()),
2179 old_location: None,
2180 new_location: Some(Location::with_column(
2181 self.file_b.display().to_string(),
2182 func_b.line,
2183 func_b.column,
2184 )),
2185 old_text: None,
2186 new_text: None,
2187 similarity: None,
2188 children: None,
2189 base_changes: None,
2190 });
2191 }
2192 }
2193
2194 let mut summary = DiffSummary::default();
2196 for change in &changes {
2197 summary.total_changes += 1;
2198 if change.change_type != ChangeType::Format {
2199 summary.semantic_changes += 1;
2200 }
2201 match change.change_type {
2202 ChangeType::Insert => summary.inserts += 1,
2203 ChangeType::Delete => summary.deletes += 1,
2204 ChangeType::Update => summary.updates += 1,
2205 ChangeType::Move => summary.moves += 1,
2206 ChangeType::Rename => summary.renames += 1,
2207 ChangeType::Format => summary.formats += 1,
2208 ChangeType::Extract => summary.extracts += 1,
2209 ChangeType::Inline => {}
2210 }
2211 }
2212
2213 changes.sort_by_key(|c| match c.change_type {
2215 ChangeType::Delete => 0,
2216 ChangeType::Rename => 1,
2217 ChangeType::Update => 2,
2218 ChangeType::Move => 3,
2219 ChangeType::Insert => 4,
2220 _ => 5,
2221 });
2222
2223 Ok(DiffReport {
2224 file_a: self.file_a.display().to_string(),
2225 file_b: self.file_b.display().to_string(),
2226 identical: changes.is_empty(),
2227 changes,
2228 summary: Some(summary),
2229 granularity: DiffGranularity::Statement,
2230 file_changes: None,
2231 module_changes: None,
2232 import_graph_summary: None,
2233 arch_changes: None,
2234 arch_summary: None,
2235 })
2236 }
2237}
2238
2239fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2241 let func_kinds = get_function_node_kinds(lang);
2242 find_function_node_recursive(root, target_line, func_kinds)
2243}
2244
2245fn find_function_node_recursive<'a>(
2246 node: Node<'a>,
2247 target_line: u32,
2248 func_kinds: &[&str],
2249) -> Option<Node<'a>> {
2250 let line = node.start_position().row as u32 + 1;
2251
2252 if func_kinds.contains(&node.kind()) && line == target_line {
2253 return Some(node);
2254 }
2255
2256 let mut cursor = node.walk();
2257 for child in node.children(&mut cursor) {
2258 if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2259 return Some(found);
2260 }
2261 }
2262
2263 None
2264}
2265
2266#[derive(Debug, Clone)]
2272struct ClassNode {
2273 name: String,
2275 line: u32,
2277 end_line: u32,
2279 column: u32,
2281 body: String,
2283 normalized_body: String,
2285 methods: Vec<ExtractedNode>,
2287 fields: Vec<FieldNode>,
2289 bases: Vec<String>,
2291}
2292
2293#[derive(Debug, Clone)]
2295struct FieldNode {
2296 name: String,
2298 line: u32,
2300 column: u32,
2302 body: String,
2304 normalized_body: String,
2306}
2307
2308pub fn run_class_diff(
2313 file_a: &Path,
2314 file_b: &Path,
2315 semantic_only: bool,
2316) -> Result<DiffReport> {
2317 if !file_a.exists() {
2319 return Err(RemainingError::file_not_found(file_a).into());
2320 }
2321 if !file_b.exists() {
2322 return Err(RemainingError::file_not_found(file_b).into());
2323 }
2324
2325 let lang = Language::from_path(file_a).ok_or_else(|| {
2327 let ext = file_a
2328 .extension()
2329 .map(|e| e.to_string_lossy().to_string())
2330 .unwrap_or_else(|| "unknown".to_string());
2331 RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2332 })?;
2333
2334 let source_a = fs::read_to_string(file_a)?;
2336 let source_b = fs::read_to_string(file_b)?;
2337
2338 let pool = ParserPool::new();
2340 let tree_a = pool
2341 .parse(&source_a, lang)
2342 .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2343 let tree_b = pool
2344 .parse(&source_b, lang)
2345 .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2346
2347 let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2349 let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2350
2351 let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2353
2354 let mut summary = DiffSummary::default();
2356 for change in &changes {
2357 summary.total_changes += 1;
2358 if change.change_type != ChangeType::Format {
2359 summary.semantic_changes += 1;
2360 }
2361 match change.change_type {
2362 ChangeType::Insert => summary.inserts += 1,
2363 ChangeType::Delete => summary.deletes += 1,
2364 ChangeType::Update => summary.updates += 1,
2365 ChangeType::Move => summary.moves += 1,
2366 ChangeType::Rename => summary.renames += 1,
2367 ChangeType::Format => summary.formats += 1,
2368 ChangeType::Extract => summary.extracts += 1,
2369 ChangeType::Inline => {}
2370 }
2371 }
2372
2373 let report = DiffReport {
2374 file_a: file_a.display().to_string(),
2375 file_b: file_b.display().to_string(),
2376 identical: changes.is_empty(),
2377 changes,
2378 summary: Some(summary),
2379 granularity: DiffGranularity::Class,
2380 file_changes: None,
2381 module_changes: None,
2382 import_graph_summary: None,
2383 arch_changes: None,
2384 arch_summary: None,
2385 };
2386
2387 Ok(report)
2388}
2389
2390fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2393 let files_a = collect_source_files(dir_a)?;
2394 let files_b = collect_source_files(dir_b)?;
2395
2396 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2397 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2398
2399 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2400
2401 let mut all_changes = Vec::new();
2402
2403 for rel_path in all_paths {
2404 match (map_a.get(rel_path), map_b.get(rel_path)) {
2405 (Some(path_a), Some(path_b)) => {
2406 match run_class_diff(path_a, path_b, semantic_only) {
2408 Ok(sub_report) => all_changes.extend(sub_report.changes),
2409 Err(_) => continue, }
2411 }
2412 (None, Some(_)) | (Some(_), None) => {
2413 continue;
2415 }
2416 (None, None) => unreachable!(),
2417 }
2418 }
2419
2420 let mut summary = DiffSummary::default();
2421 for change in &all_changes {
2422 summary.total_changes += 1;
2423 if change.change_type != ChangeType::Format {
2424 summary.semantic_changes += 1;
2425 }
2426 match change.change_type {
2427 ChangeType::Insert => summary.inserts += 1,
2428 ChangeType::Delete => summary.deletes += 1,
2429 ChangeType::Update => summary.updates += 1,
2430 ChangeType::Move => summary.moves += 1,
2431 ChangeType::Rename => summary.renames += 1,
2432 ChangeType::Format => summary.formats += 1,
2433 ChangeType::Extract => summary.extracts += 1,
2434 ChangeType::Inline => {}
2435 }
2436 }
2437
2438 Ok(DiffReport {
2439 file_a: dir_a.display().to_string(),
2440 file_b: dir_b.display().to_string(),
2441 identical: all_changes.is_empty(),
2442 changes: all_changes,
2443 summary: Some(summary),
2444 granularity: DiffGranularity::Class,
2445 file_changes: None,
2446 module_changes: None,
2447 import_graph_summary: None,
2448 arch_changes: None,
2449 arch_summary: None,
2450 })
2451}
2452
2453fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2455 let mut classes = Vec::new();
2456 let class_kinds = get_class_node_kinds(lang);
2457 let func_kinds = get_function_node_kinds(lang);
2458 let body_kinds = get_class_body_kinds(lang);
2459
2460 extract_class_nodes_recursive(
2461 root,
2462 source,
2463 &mut classes,
2464 lang,
2465 func_kinds,
2466 class_kinds,
2467 body_kinds,
2468 );
2469
2470 if lang == Language::Go {
2473 associate_go_receiver_methods(root, source, lang, &mut classes);
2474 }
2475
2476 classes
2477}
2478
2479fn associate_go_receiver_methods(
2482 root: Node,
2483 source: &[u8],
2484 lang: Language,
2485 classes: &mut [ClassNode],
2486) {
2487 let source_str = std::str::from_utf8(source).unwrap_or("");
2488 let mut cursor = root.walk();
2489 for child in root.children(&mut cursor) {
2490 if child.kind() != "method_declaration" {
2491 continue;
2492 }
2493 let receiver_type = match extract_go_receiver_type(child, source) {
2495 Some(name) => name,
2496 None => continue,
2497 };
2498
2499 let method_name = match get_function_name(child, lang, source_str) {
2501 Some(name) => name,
2502 None => continue,
2503 };
2504
2505 let params = child
2506 .child_by_field_name("parameters")
2507 .map(|p| node_text(p, source).to_string())
2508 .unwrap_or_default();
2509
2510 let line = child.start_position().row as u32 + 1;
2511 let end_line = child.end_position().row as u32 + 1;
2512 let column = child.start_position().column as u32;
2513 let body = node_text(child, source).to_string();
2514
2515 let extracted =
2516 ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2517 .with_params(params)
2518 .with_method_kind();
2519
2520 for class in classes.iter_mut() {
2522 if class.name == receiver_type {
2523 class.methods.push(extracted);
2524 break;
2525 }
2526 }
2527 }
2528}
2529
2530fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2535 let receiver = method_node.child_by_field_name("receiver")?;
2537 let mut recv_cursor = receiver.walk();
2538 for recv_child in receiver.children(&mut recv_cursor) {
2539 if recv_child.kind() == "parameter_declaration" {
2540 if let Some(type_node) = recv_child.child_by_field_name("type") {
2541 return extract_go_type_identifier(type_node, source);
2542 }
2543 }
2544 }
2545 None
2546}
2547
2548fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2551 match type_node.kind() {
2552 "type_identifier" => Some(node_text(type_node, source).to_string()),
2553 "pointer_type" => {
2554 let mut cursor = type_node.walk();
2556 for child in type_node.children(&mut cursor) {
2557 if child.is_named() {
2558 return extract_go_type_identifier(child, source);
2559 }
2560 }
2561 None
2562 }
2563 _ => None,
2564 }
2565}
2566
2567fn extract_class_nodes_recursive(
2568 node: Node,
2569 source: &[u8],
2570 classes: &mut Vec<ClassNode>,
2571 lang: Language,
2572 func_kinds: &[&str],
2573 class_kinds: &[&str],
2574 body_kinds: &[&str],
2575) {
2576 let kind = node.kind();
2577
2578 if class_kinds.contains(&kind) {
2579 if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2580 classes.push(class_node);
2581 }
2582 return; }
2584
2585 for child in node.children(&mut node.walk()) {
2586 extract_class_nodes_recursive(
2587 child,
2588 source,
2589 classes,
2590 lang,
2591 func_kinds,
2592 class_kinds,
2593 body_kinds,
2594 );
2595 }
2596}
2597
2598fn build_class_node(
2600 node: Node,
2601 source: &[u8],
2602 lang: Language,
2603 func_kinds: &[&str],
2604 body_kinds: &[&str],
2605) -> Option<ClassNode> {
2606 let class_name = node
2608 .child_by_field_name("name")
2609 .map(|n| node_text(n, source).to_string())
2610 .or_else(|| {
2611 if lang == Language::Go && node.kind() == "type_declaration" {
2614 let mut cursor = node.walk();
2615 for child in node.children(&mut cursor) {
2616 if child.kind() == "type_spec" {
2617 if let Some(name_node) = child.child_by_field_name("name") {
2618 return Some(node_text(name_node, source).to_string());
2619 }
2620 }
2621 }
2622 }
2623 let mut cursor = node.walk();
2625 for child in node.children(&mut cursor) {
2626 if child.kind() == "identifier"
2627 || child.kind() == "type_identifier"
2628 || child.kind() == "constant"
2629 {
2630 return Some(node_text(child, source).to_string());
2631 }
2632 }
2633 None
2634 })?;
2635
2636 if class_name.is_empty() {
2637 return None;
2638 }
2639
2640 let line = node.start_position().row as u32 + 1;
2641 let end_line = node.end_position().row as u32 + 1;
2642 let column = node.start_position().column as u32;
2643 let body = node_text(node, source).to_string();
2644 let normalized_body = normalize_body(&body);
2645
2646 let bases = extract_bases(node, source, lang);
2648
2649 let mut methods = Vec::new();
2651 let mut fields = Vec::new();
2652
2653 for child in node.children(&mut node.walk()) {
2654 if body_kinds.contains(&child.kind()) {
2655 extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2656 }
2657 }
2658
2659 Some(ClassNode {
2660 name: class_name,
2661 line,
2662 end_line,
2663 column,
2664 body,
2665 normalized_body,
2666 methods,
2667 fields,
2668 bases,
2669 })
2670}
2671
2672fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2674 let mut bases = Vec::new();
2675
2676 match lang {
2677 Language::Python => {
2678 if let Some(superclasses) = node.child_by_field_name("superclasses") {
2681 for child in superclasses.children(&mut superclasses.walk()) {
2682 let text = node_text(child, source).trim().to_string();
2683 if !text.is_empty() && text != "(" && text != ")" && text != "," {
2684 bases.push(text);
2685 }
2686 }
2687 }
2688 }
2689 _ => {
2690 }
2693 }
2694
2695 bases
2696}
2697
2698fn extract_class_members(
2700 body_node: Node,
2701 source: &[u8],
2702 lang: Language,
2703 func_kinds: &[&str],
2704 methods: &mut Vec<ExtractedNode>,
2705 fields: &mut Vec<FieldNode>,
2706) {
2707 for child in body_node.children(&mut body_node.walk()) {
2708 let kind = child.kind();
2709
2710 if func_kinds.contains(&kind) {
2712 let source_str = std::str::from_utf8(source).unwrap_or("");
2713 if let Some(func_name) = get_function_name(child, lang, source_str) {
2714 let params = child
2715 .child_by_field_name("parameters")
2716 .or_else(|| child.child_by_field_name("formal_parameters"))
2717 .map(|p| node_text(p, source).to_string())
2718 .unwrap_or_default();
2719
2720 let line = child.start_position().row as u32 + 1;
2721 let end_line = child.end_position().row as u32 + 1;
2722 let column = child.start_position().column as u32;
2723 let body = node_text(child, source).to_string();
2724
2725 let extracted =
2726 ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2727 .with_params(params)
2728 .with_method_kind();
2729
2730 methods.push(extracted);
2731 }
2732 }
2733 else if kind == "expression_statement" {
2735 if let Some(field) = extract_field_from_statement(child, source, lang) {
2736 fields.push(field);
2737 }
2738 }
2739 }
2740}
2741
2742fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2744 for child in node.children(&mut node.walk()) {
2746 if child.kind() == "assignment" {
2747 if let Some(left) = child.child_by_field_name("left") {
2749 let name = node_text(left, source).trim().to_string();
2750 if !name.is_empty() && !name.contains('.') {
2751 let line = node.start_position().row as u32 + 1;
2753 let column = node.start_position().column as u32;
2754 let body = node_text(node, source).to_string();
2755 let normalized_body = body.trim().to_string();
2756
2757 return Some(FieldNode {
2758 name,
2759 line,
2760 column,
2761 body,
2762 normalized_body,
2763 });
2764 }
2765 }
2766 }
2767 }
2768 None
2769}
2770
2771fn detect_class_changes(
2773 classes_a: &[ClassNode],
2774 classes_b: &[ClassNode],
2775 file_a: &Path,
2776 file_b: &Path,
2777 _semantic_only: bool,
2778) -> Vec<ASTChange> {
2779 let mut changes = Vec::new();
2780
2781 let map_b: HashMap<&str, &ClassNode> = classes_b.iter().map(|c| (c.name.as_str(), c)).collect();
2783
2784 let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2786 let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2787
2788 for (i, class_a) in classes_a.iter().enumerate() {
2790 let _ = class_a.end_line;
2791 let _ = &class_a.body;
2792 let _ = &class_a.normalized_body;
2793 if let Some(&class_b) = map_b.get(class_a.name.as_str()) {
2794 matched_a[i] = true;
2795 if let Some(j) = classes_b.iter().position(|c| c.name == class_a.name) {
2796 matched_b[j] = true;
2797 }
2798
2799 if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2801 changes.push(change);
2802 }
2803 }
2804 }
2805
2806 let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2808 .iter()
2809 .enumerate()
2810 .filter(|(i, _)| !matched_a[*i])
2811 .collect();
2812 let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2813 .iter()
2814 .enumerate()
2815 .filter(|(i, _)| !matched_b[*i])
2816 .collect();
2817
2818 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2820
2821 for (_, class_a) in &unmatched_a {
2822 let mut best_match: Option<(usize, f64)> = None;
2823
2824 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2825 if used_b[j] {
2826 continue;
2827 }
2828
2829 let similarity = compute_class_similarity(class_a, class_b);
2830 if similarity >= RENAME_SIMILARITY_THRESHOLD
2831 && (best_match.is_none() || similarity > best_match.unwrap().1)
2832 {
2833 best_match = Some((j, similarity));
2834 }
2835 }
2836
2837 if let Some((j, similarity)) = best_match {
2838 let (_, class_b) = unmatched_b[j];
2839 used_b[j] = true;
2840
2841 changes.push(ASTChange {
2842 change_type: ChangeType::Rename,
2843 node_kind: NodeKind::Class,
2844 name: Some(class_a.name.clone()),
2845 old_location: Some(Location::with_column(
2846 file_a.display().to_string(),
2847 class_a.line,
2848 class_a.column,
2849 )),
2850 new_location: Some(Location::with_column(
2851 file_b.display().to_string(),
2852 class_b.line,
2853 class_b.column,
2854 )),
2855 old_text: Some(class_a.name.clone()),
2856 new_text: Some(class_b.name.clone()),
2857 similarity: Some(similarity),
2858 children: None,
2859 base_changes: None,
2860 });
2861 }
2862 }
2863
2864 for (_, class_a) in &unmatched_a {
2866 let is_renamed = changes
2867 .iter()
2868 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
2869 if !is_renamed {
2870 changes.push(ASTChange {
2871 change_type: ChangeType::Delete,
2872 node_kind: NodeKind::Class,
2873 name: Some(class_a.name.clone()),
2874 old_location: Some(Location::with_column(
2875 file_a.display().to_string(),
2876 class_a.line,
2877 class_a.column,
2878 )),
2879 new_location: None,
2880 old_text: None,
2881 new_text: None,
2882 similarity: None,
2883 children: None,
2884 base_changes: None,
2885 });
2886 }
2887 }
2888
2889 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2891 if !used_b[j] {
2892 changes.push(ASTChange {
2893 change_type: ChangeType::Insert,
2894 node_kind: NodeKind::Class,
2895 name: Some(class_b.name.clone()),
2896 old_location: None,
2897 new_location: Some(Location::with_column(
2898 file_b.display().to_string(),
2899 class_b.line,
2900 class_b.column,
2901 )),
2902 old_text: None,
2903 new_text: None,
2904 similarity: None,
2905 children: None,
2906 base_changes: None,
2907 });
2908 }
2909 }
2910
2911 changes.sort_by_key(|c| match c.change_type {
2913 ChangeType::Delete => 0,
2914 ChangeType::Rename => 1,
2915 ChangeType::Update => 2,
2916 ChangeType::Move => 3,
2917 ChangeType::Insert => 4,
2918 _ => 5,
2919 });
2920
2921 changes
2922}
2923
2924fn diff_class_pair(
2926 class_a: &ClassNode,
2927 class_b: &ClassNode,
2928 file_a: &Path,
2929 file_b: &Path,
2930) -> Option<ASTChange> {
2931 let mut children = Vec::new();
2932 let mut has_changes = false;
2933
2934 diff_methods(
2936 &class_a.methods,
2937 &class_b.methods,
2938 file_a,
2939 file_b,
2940 &mut children,
2941 );
2942
2943 diff_fields(
2945 &class_a.fields,
2946 &class_b.fields,
2947 file_a,
2948 file_b,
2949 &mut children,
2950 );
2951
2952 let base_changes = diff_bases(&class_a.bases, &class_b.bases);
2954
2955 if !children.is_empty() {
2956 has_changes = true;
2957 }
2958 if base_changes.is_some() {
2959 has_changes = true;
2960 }
2961
2962 if !has_changes {
2963 return None; }
2965
2966 Some(ASTChange {
2967 change_type: ChangeType::Update,
2968 node_kind: NodeKind::Class,
2969 name: Some(class_a.name.clone()),
2970 old_location: Some(Location::with_column(
2971 file_a.display().to_string(),
2972 class_a.line,
2973 class_a.column,
2974 )),
2975 new_location: Some(Location::with_column(
2976 file_b.display().to_string(),
2977 class_b.line,
2978 class_b.column,
2979 )),
2980 old_text: None,
2981 new_text: None,
2982 similarity: None,
2983 children: if children.is_empty() {
2984 None
2985 } else {
2986 Some(children)
2987 },
2988 base_changes,
2989 })
2990}
2991
2992fn diff_methods(
2994 methods_a: &[ExtractedNode],
2995 methods_b: &[ExtractedNode],
2996 file_a: &Path,
2997 file_b: &Path,
2998 children: &mut Vec<ASTChange>,
2999) {
3000 let map_b: HashMap<&str, &ExtractedNode> =
3001 methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3002
3003 let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3004 let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3005
3006 for (i, method_a) in methods_a.iter().enumerate() {
3008 if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3009 matched_a[i] = true;
3010 if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3011 matched_b[j] = true;
3012 }
3013
3014 if method_a.normalized_body != method_b.normalized_body {
3016 children.push(ASTChange {
3017 change_type: ChangeType::Update,
3018 node_kind: NodeKind::Method,
3019 name: Some(method_a.name.clone()),
3020 old_location: Some(Location::with_column(
3021 file_a.display().to_string(),
3022 method_a.line,
3023 method_a.column,
3024 )),
3025 new_location: Some(Location::with_column(
3026 file_b.display().to_string(),
3027 method_b.line,
3028 method_b.column,
3029 )),
3030 old_text: None,
3031 new_text: None,
3032 similarity: Some(compute_similarity(
3033 &method_a.normalized_body,
3034 &method_b.normalized_body,
3035 )),
3036 children: None,
3037 base_changes: None,
3038 });
3039 }
3040 }
3041 }
3042
3043 let unmatched_a: Vec<&ExtractedNode> = methods_a
3045 .iter()
3046 .enumerate()
3047 .filter(|(i, _)| !matched_a[*i])
3048 .map(|(_, m)| m)
3049 .collect();
3050 let unmatched_b: Vec<&ExtractedNode> = methods_b
3051 .iter()
3052 .enumerate()
3053 .filter(|(i, _)| !matched_b[*i])
3054 .map(|(_, m)| m)
3055 .collect();
3056
3057 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3059
3060 for method_a in &unmatched_a {
3061 let mut best_match: Option<(usize, f64)> = None;
3062
3063 for (j, method_b) in unmatched_b.iter().enumerate() {
3064 if used_b[j] {
3065 continue;
3066 }
3067 let similarity =
3068 compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3069 if similarity >= RENAME_SIMILARITY_THRESHOLD
3070 && (best_match.is_none() || similarity > best_match.unwrap().1)
3071 {
3072 best_match = Some((j, similarity));
3073 }
3074 }
3075
3076 if let Some((j, similarity)) = best_match {
3077 let method_b = unmatched_b[j];
3078 used_b[j] = true;
3079
3080 children.push(ASTChange {
3081 change_type: ChangeType::Rename,
3082 node_kind: NodeKind::Method,
3083 name: Some(method_a.name.clone()),
3084 old_location: Some(Location::with_column(
3085 file_a.display().to_string(),
3086 method_a.line,
3087 method_a.column,
3088 )),
3089 new_location: Some(Location::with_column(
3090 file_b.display().to_string(),
3091 method_b.line,
3092 method_b.column,
3093 )),
3094 old_text: Some(method_a.name.clone()),
3095 new_text: Some(method_b.name.clone()),
3096 similarity: Some(similarity),
3097 children: None,
3098 base_changes: None,
3099 });
3100 }
3101 }
3102
3103 for method_a in &unmatched_a {
3105 let is_renamed = children.iter().any(|c| {
3106 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3107 });
3108 if !is_renamed {
3109 children.push(ASTChange {
3110 change_type: ChangeType::Delete,
3111 node_kind: NodeKind::Method,
3112 name: Some(method_a.name.clone()),
3113 old_location: Some(Location::with_column(
3114 file_a.display().to_string(),
3115 method_a.line,
3116 method_a.column,
3117 )),
3118 new_location: None,
3119 old_text: None,
3120 new_text: None,
3121 similarity: None,
3122 children: None,
3123 base_changes: None,
3124 });
3125 }
3126 }
3127
3128 for (j, method_b) in unmatched_b.iter().enumerate() {
3130 if !used_b[j] {
3131 children.push(ASTChange {
3132 change_type: ChangeType::Insert,
3133 node_kind: NodeKind::Method,
3134 name: Some(method_b.name.clone()),
3135 old_location: None,
3136 new_location: Some(Location::with_column(
3137 file_b.display().to_string(),
3138 method_b.line,
3139 method_b.column,
3140 )),
3141 old_text: None,
3142 new_text: None,
3143 similarity: None,
3144 children: None,
3145 base_changes: None,
3146 });
3147 }
3148 }
3149}
3150
3151fn diff_fields(
3153 fields_a: &[FieldNode],
3154 fields_b: &[FieldNode],
3155 file_a: &Path,
3156 file_b: &Path,
3157 children: &mut Vec<ASTChange>,
3158) {
3159 let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3160
3161 let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3162 let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3163
3164 for (i, field_a) in fields_a.iter().enumerate() {
3166 if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3167 matched_a[i] = true;
3168 if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3169 matched_b[j] = true;
3170 }
3171
3172 if field_a.normalized_body != field_b.normalized_body {
3174 children.push(ASTChange {
3175 change_type: ChangeType::Update,
3176 node_kind: NodeKind::Field,
3177 name: Some(field_a.name.clone()),
3178 old_location: Some(Location::with_column(
3179 file_a.display().to_string(),
3180 field_a.line,
3181 field_a.column,
3182 )),
3183 new_location: Some(Location::with_column(
3184 file_b.display().to_string(),
3185 field_b.line,
3186 field_b.column,
3187 )),
3188 old_text: Some(field_a.body.trim().to_string()),
3189 new_text: Some(field_b.body.trim().to_string()),
3190 similarity: None,
3191 children: None,
3192 base_changes: None,
3193 });
3194 }
3195 }
3196 }
3197
3198 for (i, field_a) in fields_a.iter().enumerate() {
3200 if !matched_a[i] {
3201 children.push(ASTChange {
3202 change_type: ChangeType::Delete,
3203 node_kind: NodeKind::Field,
3204 name: Some(field_a.name.clone()),
3205 old_location: Some(Location::with_column(
3206 file_a.display().to_string(),
3207 field_a.line,
3208 field_a.column,
3209 )),
3210 new_location: None,
3211 old_text: None,
3212 new_text: None,
3213 similarity: None,
3214 children: None,
3215 base_changes: None,
3216 });
3217 }
3218 }
3219
3220 for (j, field_b) in fields_b.iter().enumerate() {
3222 if !matched_b[j] {
3223 children.push(ASTChange {
3224 change_type: ChangeType::Insert,
3225 node_kind: NodeKind::Field,
3226 name: Some(field_b.name.clone()),
3227 old_location: None,
3228 new_location: Some(Location::with_column(
3229 file_b.display().to_string(),
3230 field_b.line,
3231 field_b.column,
3232 )),
3233 old_text: None,
3234 new_text: None,
3235 similarity: None,
3236 children: None,
3237 base_changes: None,
3238 });
3239 }
3240 }
3241}
3242
3243fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3245 let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3246 let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3247
3248 let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3249 let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3250
3251 if added.is_empty() && removed.is_empty() {
3252 None
3253 } else {
3254 Some(BaseChanges { added, removed })
3255 }
3256}
3257
3258fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3260 let method_sigs_a: std::collections::HashSet<String> = class_a
3262 .methods
3263 .iter()
3264 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3265 .collect();
3266 let method_sigs_b: std::collections::HashSet<String> = class_b
3267 .methods
3268 .iter()
3269 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3270 .collect();
3271
3272 let field_sigs_a: std::collections::HashSet<String> = class_a
3273 .fields
3274 .iter()
3275 .map(|f| f.normalized_body.clone())
3276 .collect();
3277 let field_sigs_b: std::collections::HashSet<String> = class_b
3278 .fields
3279 .iter()
3280 .map(|f| f.normalized_body.clone())
3281 .collect();
3282
3283 let all_a: std::collections::HashSet<&String> =
3285 method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3286 let all_b: std::collections::HashSet<&String> =
3287 method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3288
3289 if all_a.is_empty() && all_b.is_empty() {
3290 return 1.0;
3292 }
3293
3294 let intersection = all_a.intersection(&all_b).count();
3295 let union = all_a.union(&all_b).count();
3296
3297 if union == 0 {
3298 0.0
3299 } else {
3300 intersection as f64 / union as f64
3301 }
3302}
3303
3304const SOURCE_EXTENSIONS: &[&str] = &[
3310 "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3311 "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3312];
3313
3314fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3316 let mut files = Vec::new();
3317 collect_source_files_recursive(root, root, &mut files)?;
3318 files.sort_by(|a, b| a.0.cmp(&b.0));
3319 Ok(files)
3320}
3321
3322fn collect_source_files_recursive(
3323 root: &Path,
3324 current: &Path,
3325 files: &mut Vec<(String, PathBuf)>,
3326) -> Result<()> {
3327 for entry in fs::read_dir(current)? {
3328 let entry = entry?;
3329 let path = entry.path();
3330 if path.is_dir() {
3331 collect_source_files_recursive(root, &path, files)?;
3332 } else if path.is_file() {
3333 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3334 if SOURCE_EXTENSIONS.contains(&ext) {
3335 let rel = path
3336 .strip_prefix(root)
3337 .unwrap_or(&path)
3338 .to_string_lossy()
3339 .replace('\\', "/");
3340 files.push((rel, path));
3341 }
3342 }
3343 }
3344 }
3345 Ok(())
3346}
3347
3348fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3354 let lang = match Language::from_path(path) {
3355 Some(l) => l,
3356 None => {
3357 let content = fs::read_to_string(path)?;
3359 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3360 content.hash(&mut hasher);
3361 return Ok((hasher.finish(), vec![]));
3362 }
3363 };
3364
3365 let source = fs::read_to_string(path)?;
3366 let pool = ParserPool::new();
3367 let tree = match pool.parse(&source, lang) {
3368 Ok(t) => t,
3369 Err(_) => {
3370 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3372 source.hash(&mut hasher);
3373 return Ok((hasher.finish(), vec![]));
3374 }
3375 };
3376
3377 let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3378
3379 let mut signatures: Vec<String> = nodes
3383 .iter()
3384 .map(|n| {
3385 let kind = match n.kind {
3386 NodeKind::Function => "fn",
3387 NodeKind::Class => "class",
3388 NodeKind::Method => "method",
3389 NodeKind::Field => "field",
3390 _ => "other",
3391 };
3392 let sig = if n.params.is_empty() {
3393 format!("{}:{}", kind, n.name)
3394 } else {
3395 format!("{}:{}({})", kind, n.name, n.params)
3396 };
3397 let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3399 n.normalized_body.hash(&mut body_hasher);
3400 format!("{}|{}", sig, body_hasher.finish())
3401 })
3402 .collect();
3403 signatures.sort();
3404
3405 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3406 for sig in &signatures {
3407 sig.hash(&mut hasher);
3408 }
3409 let fingerprint = hasher.finish();
3410
3411 Ok((fingerprint, signatures))
3412}
3413
3414fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3416 let files_a = collect_source_files(dir_a)?;
3417 let files_b = collect_source_files(dir_b)?;
3418
3419 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3421 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3422
3423 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3424
3425 let mut file_changes = Vec::new();
3426 let mut has_any_change = false;
3427
3428 for rel_path in all_paths {
3429 match (map_a.get(rel_path), map_b.get(rel_path)) {
3430 (Some(path_a), Some(path_b)) => {
3431 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3433 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3434
3435 if fp_a == fp_b {
3436 } else {
3439 has_any_change = true;
3440 let set_a: HashSet<&String> = sigs_a.iter().collect();
3442 let set_b: HashSet<&String> = sigs_b.iter().collect();
3443 let changed: Vec<String> = set_a
3444 .symmetric_difference(&set_b)
3445 .map(|s| (*s).clone())
3446 .collect();
3447
3448 file_changes.push(FileLevelChange {
3449 relative_path: rel_path.to_string(),
3450 change_type: ChangeType::Update,
3451 old_fingerprint: Some(fp_a),
3452 new_fingerprint: Some(fp_b),
3453 signature_changes: if changed.is_empty() {
3454 None
3455 } else {
3456 Some(changed)
3457 },
3458 });
3459 }
3460 }
3461 (None, Some(path_b)) => {
3462 has_any_change = true;
3464 let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3465 file_changes.push(FileLevelChange {
3466 relative_path: rel_path.to_string(),
3467 change_type: ChangeType::Insert,
3468 old_fingerprint: None,
3469 new_fingerprint: Some(fp_b),
3470 signature_changes: None,
3471 });
3472 }
3473 (Some(path_a), None) => {
3474 has_any_change = true;
3476 let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3477 file_changes.push(FileLevelChange {
3478 relative_path: rel_path.to_string(),
3479 change_type: ChangeType::Delete,
3480 old_fingerprint: Some(fp_a),
3481 new_fingerprint: None,
3482 signature_changes: None,
3483 });
3484 }
3485 (None, None) => unreachable!(),
3486 }
3487 }
3488
3489 Ok(DiffReport {
3490 file_a: dir_a.display().to_string(),
3491 file_b: dir_b.display().to_string(),
3492 identical: !has_any_change,
3493 changes: Vec::new(),
3494 summary: None,
3495 granularity: DiffGranularity::File,
3496 file_changes: Some(file_changes),
3497 module_changes: None,
3498 import_graph_summary: None,
3499 arch_changes: None,
3500 arch_summary: None,
3501 })
3502}
3503
3504#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3510struct InternalImportEdge {
3511 source_file: String,
3512 target_module: String,
3513 imported_names: Vec<String>,
3514}
3515
3516fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3522 let mut edges = Vec::new();
3523
3524 let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3526 for cap in from_re.captures_iter(source) {
3527 let target = cap[1].to_string();
3528 let names_str = &cap[2];
3529 let names: Vec<String> = names_str
3530 .split(',')
3531 .map(|n| n.trim().to_string())
3532 .filter(|n| !n.is_empty())
3533 .collect();
3534 edges.push(InternalImportEdge {
3535 source_file: relative_path.to_string(),
3536 target_module: target,
3537 imported_names: names,
3538 });
3539 }
3540
3541 let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3543 for cap in import_re.captures_iter(source) {
3544 let target = cap[1].to_string();
3545 edges.push(InternalImportEdge {
3546 source_file: relative_path.to_string(),
3547 target_module: target,
3548 imported_names: vec![],
3549 });
3550 }
3551
3552 edges
3553}
3554
3555fn parse_file_imports(
3560 registry: &LanguageRegistry,
3561 source: &str,
3562 full_path: &Path,
3563 rel_path: &str,
3564) -> Vec<InternalImportEdge> {
3565 let ext = match full_path.extension().and_then(|e| e.to_str()) {
3566 Some(e) => format!(".{}", e),
3567 None => return Vec::new(),
3568 };
3569
3570 let is_python = ext == ".py" || ext == ".pyi";
3571
3572 if let Some(handler) = registry.get_by_extension(&ext) {
3574 if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3575 return import_defs
3576 .into_iter()
3577 .map(|def| InternalImportEdge {
3578 source_file: rel_path.to_string(),
3579 target_module: def.module,
3580 imported_names: def.names,
3581 })
3582 .collect();
3583 }
3584 }
3585
3586 if is_python {
3588 return parse_python_imports(source, rel_path);
3589 }
3590
3591 Vec::new()
3592}
3593
3594fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3602 let files = collect_source_files(root)?;
3603 let registry = LanguageRegistry::with_defaults();
3604 let mut all_edges = Vec::new();
3605
3606 for (rel_path, full_path) in &files {
3607 let source = fs::read_to_string(full_path)?;
3608 let edges = parse_file_imports(®istry, &source, full_path, rel_path);
3609 all_edges.extend(edges);
3610 }
3611
3612 Ok(all_edges)
3613}
3614
3615fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3617 ImportEdge {
3618 source_file: edge.source_file.clone(),
3619 target_module: edge.target_module.clone(),
3620 imported_names: edge.imported_names.clone(),
3621 }
3622}
3623
3624fn edge_key(edge: &InternalImportEdge) -> String {
3626 format!(
3627 "{}->{}:{}",
3628 edge.source_file,
3629 edge.target_module,
3630 edge.imported_names.join(",")
3631 )
3632}
3633
3634fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3636 let edges_a = build_import_graph(dir_a)?;
3638 let edges_b = build_import_graph(dir_b)?;
3639
3640 let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3642 let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3643
3644 let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3646 let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3647
3648 let added_edges: Vec<&InternalImportEdge> = edges_b
3650 .iter()
3651 .filter(|e| added_keys.contains(&edge_key(e)))
3652 .collect();
3653 let removed_edges: Vec<&InternalImportEdge> = edges_a
3654 .iter()
3655 .filter(|e| removed_keys.contains(&edge_key(e)))
3656 .collect();
3657
3658 let files_a = collect_source_files(dir_a)?;
3660 let files_b = collect_source_files(dir_b)?;
3661 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3662 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3663 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3664
3665 let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3667 let mut modules_with_import_changes = 0usize;
3668
3669 for rel_path in &all_paths {
3670 let in_a = map_a.contains_key(rel_path);
3671 let in_b = map_b.contains_key(rel_path);
3672
3673 let change_type = if !in_a && in_b {
3675 ChangeType::Insert
3676 } else if in_a && !in_b {
3677 ChangeType::Delete
3678 } else {
3679 ChangeType::Update
3680 };
3681
3682 let mod_added: Vec<ImportEdge> = added_edges
3684 .iter()
3685 .filter(|e| e.source_file == *rel_path)
3686 .map(|e| to_public_edge(e))
3687 .collect();
3688 let mod_removed: Vec<ImportEdge> = removed_edges
3689 .iter()
3690 .filter(|e| e.source_file == *rel_path)
3691 .map(|e| to_public_edge(e))
3692 .collect();
3693
3694 let file_change = if in_a && in_b {
3696 let path_a = map_a[rel_path];
3697 let path_b = map_b[rel_path];
3698 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3699 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3700 if fp_a != fp_b {
3701 let set_a: HashSet<&String> = sigs_a.iter().collect();
3702 let set_b: HashSet<&String> = sigs_b.iter().collect();
3703 let changed: Vec<String> = set_a
3704 .symmetric_difference(&set_b)
3705 .map(|s| (*s).clone())
3706 .collect();
3707 Some(FileLevelChange {
3708 relative_path: rel_path.to_string(),
3709 change_type: ChangeType::Update,
3710 old_fingerprint: Some(fp_a),
3711 new_fingerprint: Some(fp_b),
3712 signature_changes: if changed.is_empty() {
3713 None
3714 } else {
3715 Some(changed)
3716 },
3717 })
3718 } else {
3719 None
3720 }
3721 } else {
3722 None
3723 };
3724
3725 let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3727 let has_file_change = file_change.is_some();
3728 let is_new_or_deleted =
3729 change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3730
3731 if has_import_changes || has_file_change || is_new_or_deleted {
3732 if has_import_changes {
3733 modules_with_import_changes += 1;
3734 }
3735
3736 let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3738 edges_b
3740 .iter()
3741 .filter(|e| e.source_file == *rel_path)
3742 .map(to_public_edge)
3743 .collect()
3744 } else {
3745 mod_added
3746 };
3747 let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3749 edges_a
3750 .iter()
3751 .filter(|e| e.source_file == *rel_path)
3752 .map(to_public_edge)
3753 .collect()
3754 } else {
3755 mod_removed
3756 };
3757
3758 let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3760 if has_expanded_imports && !has_import_changes {
3761 modules_with_import_changes += 1;
3762 }
3763
3764 module_changes.push(ModuleLevelChange {
3765 module_path: rel_path.to_string(),
3766 change_type,
3767 imports_added: final_added,
3768 imports_removed: final_removed,
3769 file_change,
3770 });
3771 }
3772 }
3773
3774 let summary = ImportGraphSummary {
3775 total_edges_a: edges_a.len(),
3776 total_edges_b: edges_b.len(),
3777 edges_added: added_keys.len(),
3778 edges_removed: removed_keys.len(),
3779 modules_with_import_changes,
3780 };
3781
3782 let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3783
3784 Ok(DiffReport {
3785 file_a: dir_a.display().to_string(),
3786 file_b: dir_b.display().to_string(),
3787 identical,
3788 changes: Vec::new(),
3789 summary: None,
3790 granularity: DiffGranularity::Module,
3791 file_changes: None,
3792 module_changes: Some(module_changes),
3793 import_graph_summary: Some(summary),
3794 arch_changes: None,
3795 arch_summary: None,
3796 })
3797}
3798
3799fn classify_directory_layer(dir_name: &str) -> String {
3805 let lower = dir_name.to_lowercase();
3806 match lower.as_str() {
3807 "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3808 "core" | "models" | "domain" | "entities" => "core".to_string(),
3809 "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3810 "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3811 "services" | "service" => "service".to_string(),
3812 "tests" | "test" | "spec" | "specs" => "test".to_string(),
3813 "config" | "settings" | "conf" => "config".to_string(),
3814 "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3815 _ => "other".to_string(),
3816 }
3817}
3818
3819fn classify_by_import_flow(
3827 dir_name: &str,
3828 edges: &[InternalImportEdge],
3829 all_dirs: &HashSet<String>,
3830) -> String {
3831 let fan_out: usize = edges
3833 .iter()
3834 .filter(|e| {
3835 e.source_file
3836 .split('/')
3837 .next()
3838 .map(|d| d == dir_name)
3839 .unwrap_or(false)
3840 })
3841 .filter(|e| {
3842 let target_first = e
3844 .target_module
3845 .split('/')
3846 .next()
3847 .or_else(|| e.target_module.split('.').next())
3848 .unwrap_or("");
3849 all_dirs.contains(target_first) && target_first != dir_name
3850 })
3851 .map(|e| e.target_module.clone())
3852 .collect::<HashSet<_>>()
3853 .len();
3854
3855 let fan_in: usize = edges
3857 .iter()
3858 .filter(|e| {
3859 let source_dir = e.source_file.split('/').next().unwrap_or("");
3860 source_dir != dir_name
3861 })
3862 .filter(|e| {
3863 let target_first = e
3864 .target_module
3865 .split('/')
3866 .next()
3867 .or_else(|| e.target_module.split('.').next())
3868 .unwrap_or("");
3869 target_first == dir_name
3870 })
3871 .count();
3872
3873 if fan_in == 0 && fan_out == 0 {
3874 return "other".to_string();
3875 }
3876
3877 if fan_out > 0 && fan_in == 0 {
3879 "entry".to_string()
3880 } else if fan_in > fan_out * 2 {
3881 "utility".to_string()
3882 } else if fan_out > fan_in * 2 {
3883 "entry".to_string()
3884 } else {
3885 "service".to_string()
3886 }
3887}
3888
3889fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
3896 let mut dirs: HashMap<String, String> = HashMap::new();
3897 let files = collect_source_files(root)?;
3898
3899 for (rel_path, _) in &files {
3901 if let Some(first_dir) = rel_path.split('/').next() {
3902 if rel_path.contains('/') && !dirs.contains_key(first_dir) {
3903 let layer = classify_directory_layer(first_dir);
3904 dirs.insert(first_dir.to_string(), layer);
3905 }
3906 }
3907 }
3908
3909 let other_dirs: Vec<String> = dirs
3911 .iter()
3912 .filter(|(_, layer)| *layer == "other")
3913 .map(|(name, _)| name.clone())
3914 .collect();
3915
3916 if !other_dirs.is_empty() {
3917 if let Ok(edges) = build_import_graph(root) {
3919 let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
3920 for dir_name in &other_dirs {
3921 let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
3922 if inferred != "other" {
3923 dirs.insert(dir_name.clone(), inferred);
3924 }
3925 }
3926 }
3927 }
3928
3929 Ok(dirs)
3930}
3931
3932fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3934 let dirs_a = collect_arch_directories(dir_a)?;
3935 let dirs_b = collect_arch_directories(dir_b)?;
3936
3937 let all_dirs: BTreeSet<&str> = dirs_a
3938 .keys()
3939 .chain(dirs_b.keys())
3940 .map(|s| s.as_str())
3941 .collect();
3942
3943 let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
3944 let mut directories_added = 0usize;
3945 let mut directories_removed = 0usize;
3946 let mut layer_migrations = 0usize;
3947 let mut changed_dirs = 0usize;
3948 let total_dirs = all_dirs.len();
3949
3950 for dir_name in &all_dirs {
3951 let in_a = dirs_a.get(*dir_name);
3952 let in_b = dirs_b.get(*dir_name);
3953
3954 match (in_a, in_b) {
3955 (Some(layer_a), Some(layer_b)) => {
3956 if layer_a != layer_b {
3957 changed_dirs += 1;
3959 layer_migrations += 1;
3960 arch_changes.push(ArchLevelChange {
3961 directory: dir_name.to_string(),
3962 change_type: ArchChangeType::LayerMigration,
3963 old_layer: Some(layer_a.clone()),
3964 new_layer: Some(layer_b.clone()),
3965 migrated_functions: Vec::new(),
3966 });
3967 }
3968 }
3970 (None, Some(layer_b)) => {
3971 changed_dirs += 1;
3973 directories_added += 1;
3974 arch_changes.push(ArchLevelChange {
3975 directory: dir_name.to_string(),
3976 change_type: ArchChangeType::Added,
3977 old_layer: None,
3978 new_layer: Some(layer_b.clone()),
3979 migrated_functions: Vec::new(),
3980 });
3981 }
3982 (Some(layer_a), None) => {
3983 changed_dirs += 1;
3985 directories_removed += 1;
3986 arch_changes.push(ArchLevelChange {
3987 directory: dir_name.to_string(),
3988 change_type: ArchChangeType::Removed,
3989 old_layer: Some(layer_a.clone()),
3990 new_layer: None,
3991 migrated_functions: Vec::new(),
3992 });
3993 }
3994 (None, None) => unreachable!(),
3995 }
3996 }
3997
3998 let stability_score = if total_dirs == 0 {
3999 1.0
4000 } else {
4001 1.0 - (changed_dirs as f64 / total_dirs as f64)
4002 };
4003
4004 let summary = ArchDiffSummary {
4005 layer_migrations,
4006 directories_added,
4007 directories_removed,
4008 cycles_introduced: 0,
4009 cycles_resolved: 0,
4010 stability_score,
4011 };
4012
4013 let identical = arch_changes.is_empty();
4014
4015 Ok(DiffReport {
4016 file_a: dir_a.display().to_string(),
4017 file_b: dir_b.display().to_string(),
4018 identical,
4019 changes: Vec::new(),
4020 summary: None,
4021 granularity: DiffGranularity::Architecture,
4022 file_changes: None,
4023 module_changes: None,
4024 import_graph_summary: None,
4025 arch_changes: Some(arch_changes),
4026 arch_summary: Some(summary),
4027 })
4028}
4029
4030#[cfg(test)]
4035mod tests {
4036 use super::*;
4037
4038 const SAMPLE_A: &str = r#"
4039def original_function(x):
4040 return x * 2
4041
4042def renamed_later(a, b):
4043 return a + b
4044
4045def will_be_deleted():
4046 return "goodbye"
4047
4048class OriginalClass:
4049 def method_one(self):
4050 return 1
4051"#;
4052
4053 const SAMPLE_B: &str = r#"
4054def original_function(x):
4055 # Modified implementation
4056 return x * 3
4057
4058def better_name(a, b):
4059 return a + b
4060
4061def new_function():
4062 return "hello"
4063
4064class OriginalClass:
4065 def method_one(self):
4066 return 1
4067
4068 def method_two(self):
4069 return 2
4070"#;
4071
4072 fn parse_python(source: &str) -> tree_sitter::Tree {
4074 let pool = ParserPool::new();
4075 pool.parse(source, Language::Python).unwrap()
4076 }
4077
4078 #[test]
4079 fn test_extract_nodes() {
4080 let tree = parse_python(SAMPLE_A);
4081 let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4082
4083 assert!(
4085 nodes.len() >= 5,
4086 "Expected at least 5 nodes, got {}",
4087 nodes.len()
4088 );
4089
4090 let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4091 assert!(names.contains(&"original_function"));
4092 assert!(names.contains(&"renamed_later"));
4093 assert!(names.contains(&"will_be_deleted"));
4094 assert!(names.contains(&"OriginalClass"));
4095 assert!(names.contains(&"method_one"));
4096 }
4097
4098 #[test]
4099 fn test_detect_update() {
4100 let tree_a = parse_python(SAMPLE_A);
4101 let tree_b = parse_python(SAMPLE_B);
4102
4103 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4104 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4105
4106 let file_a = PathBuf::from("a.py");
4107 let file_b = PathBuf::from("b.py");
4108 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4109
4110 let updates: Vec<_> = changes
4112 .iter()
4113 .filter(|c| c.change_type == ChangeType::Update)
4114 .collect();
4115 assert!(!updates.is_empty(), "Should detect at least one update");
4116 assert!(
4117 updates
4118 .iter()
4119 .any(|c| c.name.as_deref() == Some("original_function")),
4120 "original_function should be marked as updated"
4121 );
4122 }
4123
4124 #[test]
4125 fn test_detect_insert() {
4126 let tree_a = parse_python(SAMPLE_A);
4127 let tree_b = parse_python(SAMPLE_B);
4128
4129 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4130 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4131
4132 let file_a = PathBuf::from("a.py");
4133 let file_b = PathBuf::from("b.py");
4134 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4135
4136 let inserts: Vec<_> = changes
4138 .iter()
4139 .filter(|c| c.change_type == ChangeType::Insert)
4140 .collect();
4141 assert!(!inserts.is_empty(), "Should detect insertions");
4142 }
4143
4144 #[test]
4145 fn test_detect_delete() {
4146 let tree_a = parse_python(SAMPLE_A);
4147 let tree_b = parse_python(SAMPLE_B);
4148
4149 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4150 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4151
4152 let file_a = PathBuf::from("a.py");
4153 let file_b = PathBuf::from("b.py");
4154 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4155
4156 let deletes: Vec<_> = changes
4158 .iter()
4159 .filter(|c| c.change_type == ChangeType::Delete)
4160 .collect();
4161 assert!(!deletes.is_empty(), "Should detect deletions");
4162 assert!(
4163 deletes
4164 .iter()
4165 .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4166 "will_be_deleted should be marked as deleted"
4167 );
4168 }
4169
4170 #[test]
4171 fn test_detect_rename() {
4172 let tree_a = parse_python(SAMPLE_A);
4173 let tree_b = parse_python(SAMPLE_B);
4174
4175 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4176 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4177
4178 let file_a = PathBuf::from("a.py");
4179 let file_b = PathBuf::from("b.py");
4180 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4181
4182 let renames: Vec<_> = changes
4184 .iter()
4185 .filter(|c| c.change_type == ChangeType::Rename)
4186 .collect();
4187 assert!(!renames.is_empty(), "Should detect renames");
4188 }
4189
4190 #[test]
4191 fn test_identical_files() {
4192 let tree_a = parse_python(SAMPLE_A);
4193 let tree_b = parse_python(SAMPLE_A); let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4196 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4197
4198 let file_a = PathBuf::from("a.py");
4199 let file_b = PathBuf::from("b.py");
4200 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); assert!(
4203 changes.is_empty(),
4204 "Identical files should have no semantic changes"
4205 );
4206 }
4207
4208 #[test]
4209 fn test_compute_similarity() {
4210 assert_eq!(compute_similarity("abc", "abc"), 1.0);
4211 assert_eq!(compute_similarity("", ""), 1.0); assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); }
4214
4215 #[test]
4216 fn test_normalize_body() {
4217 let body = "def foo():\n # pure comment line\n return 1 # inline comment";
4219 let normalized = normalize_body(body);
4220 assert!(!normalized.contains('#'), "Comments should be removed");
4223 assert!(
4224 !normalized.contains("def foo"),
4225 "Signature should be skipped"
4226 );
4227 assert!(normalized.contains("return 1"), "Body should remain");
4228 }
4229
4230 #[test]
4235 fn test_format_diff_text_renders_file_changes() {
4236 let mut report = DiffReport::new("dir_a/", "dir_b/");
4237 report.identical = false;
4238 report.file_changes = Some(vec![
4239 FileLevelChange {
4240 relative_path: "src/main.py".to_string(),
4241 change_type: ChangeType::Update,
4242 old_fingerprint: Some(12345),
4243 new_fingerprint: Some(67890),
4244 signature_changes: Some(vec!["fn foo()".to_string()]),
4245 },
4246 FileLevelChange {
4247 relative_path: "src/new_module.py".to_string(),
4248 change_type: ChangeType::Insert,
4249 old_fingerprint: None,
4250 new_fingerprint: Some(11111),
4251 signature_changes: None,
4252 },
4253 FileLevelChange {
4254 relative_path: "src/removed.py".to_string(),
4255 change_type: ChangeType::Delete,
4256 old_fingerprint: Some(99999),
4257 new_fingerprint: None,
4258 signature_changes: None,
4259 },
4260 ]);
4261
4262 let text = format_diff_text(&report);
4263 assert!(
4264 text.contains("File-Level Changes"),
4265 "Should have file-level section header"
4266 );
4267 assert!(text.contains("src/main.py"), "Should mention updated file");
4268 assert!(
4269 text.contains("src/new_module.py"),
4270 "Should mention added file"
4271 );
4272 assert!(
4273 text.contains("src/removed.py"),
4274 "Should mention removed file"
4275 );
4276 }
4277
4278 #[test]
4279 fn test_format_diff_text_renders_module_changes() {
4280 let mut report = DiffReport::new("dir_a/", "dir_b/");
4281 report.identical = false;
4282 report.module_changes = Some(vec![ModuleLevelChange {
4283 module_path: "src/utils.py".to_string(),
4284 change_type: ChangeType::Update,
4285 imports_added: vec![ImportEdge {
4286 source_file: "src/utils.py".to_string(),
4287 target_module: "os.path".to_string(),
4288 imported_names: vec!["join".to_string()],
4289 }],
4290 imports_removed: vec![],
4291 file_change: None,
4292 }]);
4293
4294 let text = format_diff_text(&report);
4295 assert!(
4296 text.contains("Module-Level Changes"),
4297 "Should have module-level section header"
4298 );
4299 assert!(
4300 text.contains("src/utils.py"),
4301 "Should mention the module path"
4302 );
4303 assert!(
4304 text.contains("os.path"),
4305 "Should mention added import target"
4306 );
4307 }
4308
4309 #[test]
4310 fn test_format_diff_text_renders_import_graph_summary() {
4311 let mut report = DiffReport::new("dir_a/", "dir_b/");
4312 report.identical = false;
4313 report.import_graph_summary = Some(ImportGraphSummary {
4314 total_edges_a: 10,
4315 total_edges_b: 15,
4316 edges_added: 7,
4317 edges_removed: 2,
4318 modules_with_import_changes: 3,
4319 });
4320
4321 let text = format_diff_text(&report);
4322 assert!(
4323 text.contains("Import Graph"),
4324 "Should have import graph section"
4325 );
4326 assert!(text.contains("7"), "Should show edges added");
4327 assert!(text.contains("2"), "Should show edges removed");
4328 }
4329
4330 #[test]
4331 fn test_format_diff_text_renders_arch_changes() {
4332 let mut report = DiffReport::new("dir_a/", "dir_b/");
4333 report.identical = false;
4334 report.arch_changes = Some(vec![
4335 ArchLevelChange {
4336 directory: "src/api/".to_string(),
4337 change_type: ArchChangeType::LayerMigration,
4338 old_layer: Some("presentation".to_string()),
4339 new_layer: Some("business".to_string()),
4340 migrated_functions: vec!["handle_request".to_string()],
4341 },
4342 ArchLevelChange {
4343 directory: "src/new_service/".to_string(),
4344 change_type: ArchChangeType::Added,
4345 old_layer: None,
4346 new_layer: Some("service".to_string()),
4347 migrated_functions: vec![],
4348 },
4349 ]);
4350
4351 let text = format_diff_text(&report);
4352 assert!(
4353 text.contains("Architecture-Level Changes"),
4354 "Should have arch section header"
4355 );
4356 assert!(
4357 text.contains("src/api/"),
4358 "Should mention migrated directory"
4359 );
4360 assert!(text.contains("presentation"), "Should show old layer");
4361 assert!(text.contains("business"), "Should show new layer");
4362 assert!(
4363 text.contains("src/new_service/"),
4364 "Should mention added directory"
4365 );
4366 }
4367
4368 #[test]
4369 fn test_format_diff_text_renders_arch_summary() {
4370 let mut report = DiffReport::new("dir_a/", "dir_b/");
4371 report.identical = false;
4372 report.arch_summary = Some(ArchDiffSummary {
4373 layer_migrations: 2,
4374 directories_added: 1,
4375 directories_removed: 0,
4376 cycles_introduced: 1,
4377 cycles_resolved: 0,
4378 stability_score: 0.75,
4379 });
4380
4381 let text = format_diff_text(&report);
4382 assert!(
4383 text.contains("Architecture Summary"),
4384 "Should have arch summary section"
4385 );
4386 assert!(text.contains("0.75"), "Should show stability score");
4387 }
4388
4389 #[test]
4390 fn test_format_diff_text_identical_skips_higher_levels() {
4391 let mut report = DiffReport::new("a.py", "b.py");
4394 report.identical = true;
4395 report.file_changes = Some(vec![FileLevelChange {
4396 relative_path: "should_not_appear.py".to_string(),
4397 change_type: ChangeType::Insert,
4398 old_fingerprint: None,
4399 new_fingerprint: Some(1),
4400 signature_changes: None,
4401 }]);
4402
4403 let text = format_diff_text(&report);
4404 assert!(
4405 !text.contains("should_not_appear"),
4406 "Identical report should skip all change sections"
4407 );
4408 assert!(
4409 text.contains("No structural changes"),
4410 "Should show identical message"
4411 );
4412 }
4413}