1use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35 ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36 DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37 Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48#[derive(Debug, Args)]
64pub struct DiffArgs {
65 pub file_a: PathBuf,
67
68 pub file_b: PathBuf,
70
71 #[arg(long, short = 'g', default_value = "function")]
73 pub granularity: DiffGranularity,
74
75 #[arg(long)]
77 pub semantic_only: bool,
78
79 #[arg(long, short = 'O')]
81 pub output: Option<PathBuf>,
82}
83
84#[derive(Debug, Clone)]
90struct ExtractedNode {
91 name: String,
93 kind: NodeKind,
95 line: u32,
97 end_line: u32,
99 column: u32,
101 body: String,
103 normalized_body: String,
105 params: String,
107 is_method: bool,
109}
110
111impl ExtractedNode {
112 fn new(
113 name: impl Into<String>,
114 kind: NodeKind,
115 line: u32,
116 end_line: u32,
117 column: u32,
118 body: impl Into<String>,
119 ) -> Self {
120 let body_str: String = body.into();
121 let normalized = normalize_body(&body_str);
122 Self {
123 name: name.into(),
124 kind,
125 line,
126 end_line,
127 column,
128 body: body_str,
129 normalized_body: normalized,
130 params: String::new(),
131 is_method: false,
132 }
133 }
134
135 fn with_params(mut self, params: impl Into<String>) -> Self {
136 self.params = params.into();
137 self
138 }
139
140 fn with_method_kind(mut self) -> Self {
141 self.is_method = true;
142 if self.kind == NodeKind::Function {
143 self.kind = NodeKind::Method;
144 }
145 self
146 }
147}
148
149fn normalize_body(body: &str) -> String {
153 body.lines()
154 .skip(1) .map(|line| {
156 let stripped = if let Some(pos) = line.find('#') {
158 let before_hash = &line[..pos];
161 let single_quotes = before_hash.matches('\'').count();
162 let double_quotes = before_hash.matches('"').count();
163 if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165 &line[..pos]
166 } else {
167 line
168 }
169 } else {
170 line
171 };
172 stripped.trim()
173 })
174 .filter(|line| !line.is_empty())
175 .collect::<Vec<_>>()
176 .join("\n")
177}
178
179impl DiffArgs {
184 pub fn run_to_report(&self) -> Result<DiffReport> {
190 if !self.file_a.exists() {
192 return Err(RemainingError::file_not_found(&self.file_a).into());
193 }
194 if !self.file_b.exists() {
195 return Err(RemainingError::file_not_found(&self.file_b).into());
196 }
197
198 match self.granularity {
199 DiffGranularity::File => {
200 if !self.file_a.is_dir() || !self.file_b.is_dir() {
202 bail!("File-level (L6) diff requires directories, not individual files");
203 }
204 run_file_level_diff(&self.file_a, &self.file_b)
205 }
206 DiffGranularity::Module => {
207 if !self.file_a.is_dir() || !self.file_b.is_dir() {
209 bail!("Module-level (L7) diff requires directories, not individual files");
210 }
211 run_module_level_diff(&self.file_a, &self.file_b)
212 }
213 DiffGranularity::Architecture => {
214 if !self.file_a.is_dir() || !self.file_b.is_dir() {
216 bail!(
217 "Architecture-level (L8) diff requires directories, not individual files"
218 );
219 }
220 run_arch_level_diff(&self.file_a, &self.file_b)
221 }
222 DiffGranularity::Class => {
223 if self.file_a.is_dir() && self.file_b.is_dir() {
225 run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226 } else {
227 run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228 }
229 }
230 DiffGranularity::Statement => {
231 self.run_statement_level_diff()
233 }
234 DiffGranularity::Token => {
235 self.run_token_level_diff()
237 }
238 DiffGranularity::Expression => {
239 self.run_expression_level_diff()
241 }
242 _ => {
243 self.run_function_level_diff()
245 }
246 }
247 }
248
249 pub fn run(&self, format: OutputFormat) -> Result<()> {
251 let report = self.run_to_report()?;
252
253 match format {
255 OutputFormat::Json => {
256 let json = serde_json::to_string_pretty(&report)?;
257 if let Some(ref output_path) = self.output {
258 fs::write(output_path, &json)?;
259 } else {
260 println!("{}", json);
261 }
262 }
263 OutputFormat::Text => {
264 let text = format_diff_text(&report);
265 if let Some(ref output_path) = self.output {
266 fs::write(output_path, &text)?;
267 } else {
268 println!("{}", text);
269 }
270 }
271 OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272 let json = serde_json::to_string_pretty(&report)?;
274 println!("{}", json);
275 }
276 }
277
278 Ok(())
279 }
280
281 fn run_function_level_diff(&self) -> Result<DiffReport> {
283 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285 let ext = self
286 .file_a
287 .extension()
288 .map(|e| e.to_string_lossy().to_string())
289 .unwrap_or_else(|| "unknown".to_string());
290 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291 })?;
292
293 let source_a = fs::read_to_string(&self.file_a)?;
295 let source_b = fs::read_to_string(&self.file_b)?;
296
297 let pool = ParserPool::new();
299 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301 })?;
302 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304 })?;
305
306 let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308 let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310 let changes = detect_changes(
312 &nodes_a,
313 &nodes_b,
314 &self.file_a,
315 &self.file_b,
316 self.semantic_only,
317 );
318
319 let mut summary = DiffSummary::default();
321 for change in &changes {
322 summary.total_changes += 1;
323 if change.change_type != ChangeType::Format {
324 summary.semantic_changes += 1;
325 }
326 match change.change_type {
327 ChangeType::Insert => summary.inserts += 1,
328 ChangeType::Delete => summary.deletes += 1,
329 ChangeType::Update => summary.updates += 1,
330 ChangeType::Move => summary.moves += 1,
331 ChangeType::Rename => summary.renames += 1,
332 ChangeType::Format => summary.formats += 1,
333 ChangeType::Extract => summary.extracts += 1,
334 ChangeType::Inline => {}
335 }
336 }
337
338 let report = DiffReport {
340 file_a: self.file_a.display().to_string(),
341 file_b: self.file_b.display().to_string(),
342 identical: changes.is_empty(),
343 changes,
344 summary: Some(summary),
345 granularity: self.granularity,
346 file_changes: None,
347 module_changes: None,
348 import_graph_summary: None,
349 arch_changes: None,
350 arch_summary: None,
351 };
352
353 Ok(report)
354 }
355
356 fn run_token_level_diff(&self) -> Result<DiffReport> {
365 use super::difftastic;
366 use typed_arena::Arena;
367
368 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370 let ext = self
371 .file_a
372 .extension()
373 .map(|e| e.to_string_lossy().to_string())
374 .unwrap_or_else(|| "unknown".to_string());
375 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376 })?;
377
378 let lhs_src = fs::read_to_string(&self.file_a)?;
380 let rhs_src = fs::read_to_string(&self.file_b)?;
381
382 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385 let pool = ParserPool::new();
387 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389 })?;
390 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392 })?;
393
394 let arena = Arena::new();
396 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398 );
399
400 let mut change_map = difftastic::changes::ChangeMap::default();
402
403 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406 for (lhs_chunk, rhs_chunk) in &chunks {
408 match (lhs_chunk.first(), rhs_chunk.first()) {
409 (Some(lhs_first), Some(rhs_first)) => {
410 if difftastic::dijkstra::mark_syntax(
411 Some(*lhs_first),
412 Some(*rhs_first),
413 &mut change_map,
414 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415 )
416 .is_err()
417 {
418 for node in lhs_chunk {
420 difftastic::changes::insert_deep_novel(node, &mut change_map);
421 }
422 for node in rhs_chunk {
423 difftastic::changes::insert_deep_novel(node, &mut change_map);
424 }
425 }
426 }
427 (Some(_), None) => {
428 for node in lhs_chunk {
430 difftastic::changes::insert_deep_novel(node, &mut change_map);
431 }
432 }
433 (None, Some(_)) => {
434 for node in rhs_chunk {
436 difftastic::changes::insert_deep_novel(node, &mut change_map);
437 }
438 }
439 (None, None) => {
440 }
442 }
443 }
444
445 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449 let fa = self.file_a.display().to_string();
451 let fb = self.file_b.display().to_string();
452 Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453 &lhs_nodes,
454 &rhs_nodes,
455 &change_map,
456 &fa,
457 &fb,
458 ))
459 }
460
461 fn run_expression_level_diff(&self) -> Result<DiffReport> {
467 use super::difftastic;
468 use typed_arena::Arena;
469
470 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472 let ext = self
473 .file_a
474 .extension()
475 .map(|e| e.to_string_lossy().to_string())
476 .unwrap_or_else(|| "unknown".to_string());
477 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478 })?;
479
480 let lhs_src = fs::read_to_string(&self.file_a)?;
482 let rhs_src = fs::read_to_string(&self.file_b)?;
483
484 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487 let pool = ParserPool::new();
489 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491 })?;
492 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494 })?;
495
496 let arena = Arena::new();
498 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500 );
501
502 let mut change_map = difftastic::changes::ChangeMap::default();
504
505 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508 for (lhs_chunk, rhs_chunk) in &chunks {
510 match (lhs_chunk.first(), rhs_chunk.first()) {
511 (Some(lhs_first), Some(rhs_first)) => {
512 if difftastic::dijkstra::mark_syntax(
513 Some(*lhs_first),
514 Some(*rhs_first),
515 &mut change_map,
516 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517 )
518 .is_err()
519 {
520 for node in lhs_chunk {
521 difftastic::changes::insert_deep_novel(node, &mut change_map);
522 }
523 for node in rhs_chunk {
524 difftastic::changes::insert_deep_novel(node, &mut change_map);
525 }
526 }
527 }
528 (Some(_), None) => {
529 for node in lhs_chunk {
530 difftastic::changes::insert_deep_novel(node, &mut change_map);
531 }
532 }
533 (None, Some(_)) => {
534 for node in rhs_chunk {
535 difftastic::changes::insert_deep_novel(node, &mut change_map);
536 }
537 }
538 (None, None) => {}
539 }
540 }
541
542 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546 let fa = self.file_a.display().to_string();
548 let fb = self.file_b.display().to_string();
549 Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550 &lhs_nodes,
551 &rhs_nodes,
552 &change_map,
553 &fa,
554 &fb,
555 ))
556 }
557}
558
559fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565 node.utf8_text(source).unwrap_or("")
566}
567
568fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570 match language {
571 Language::Python => &["class_definition"],
572 Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573 Language::Go => &["type_declaration"],
574 Language::Rust => &["struct_item", "enum_item", "impl_item"],
575 Language::Java => &[
576 "class_declaration",
577 "interface_declaration",
578 "enum_declaration",
579 ],
580 Language::C => &["struct_specifier", "enum_specifier"],
581 Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582 Language::Ruby => &["class", "module"],
583 Language::Php => &["class_declaration", "interface_declaration"],
584 Language::CSharp => &[
585 "class_declaration",
586 "interface_declaration",
587 "struct_declaration",
588 ],
589 Language::Kotlin => &["class_declaration", "object_declaration"],
590 Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591 Language::Swift => &[
592 "class_declaration",
593 "struct_declaration",
594 "protocol_declaration",
595 ],
596 Language::Elixir => &["call"], Language::Lua | Language::Luau => &[], Language::Ocaml => &["module_definition", "type_definition"],
599 }
600}
601
602fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604 match language {
605 Language::Python => &["block"],
606 Language::TypeScript | Language::JavaScript => &["class_body"],
607 Language::Go => &[], Language::Rust => &["declaration_list"], Language::Java => &["class_body"],
610 Language::C | Language::Cpp => &["field_declaration_list"],
611 Language::Ruby => &["body_statement"],
612 Language::Php => &["declaration_list"],
613 Language::CSharp => &["declaration_list"],
614 Language::Kotlin => &["class_body"],
615 Language::Scala => &["template_body"],
616 Language::Swift => &["class_body"],
617 Language::Elixir => &["do_block"],
618 Language::Lua | Language::Luau => &[],
619 Language::Ocaml => &[],
620 }
621}
622
623fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629 let mut nodes = Vec::new();
630 let kinds = NodeKindSets {
631 func: get_function_node_kinds(lang),
632 class: get_class_node_kinds(lang),
633 body: get_class_body_kinds(lang),
634 };
635 extract_nodes_recursive(root, source, &mut nodes, false, lang, &kinds);
636 nodes
637}
638
639struct NodeKindSets<'a> {
640 func: &'a [&'a str],
641 class: &'a [&'a str],
642 body: &'a [&'a str],
643}
644
645fn extract_nodes_recursive(
646 node: Node,
647 source: &[u8],
648 nodes: &mut Vec<ExtractedNode>,
649 in_class: bool,
650 lang: Language,
651 kinds: &NodeKindSets<'_>,
652) {
653 let kind = node.kind();
654
655 if lang == Language::Ocaml && kind == "value_definition" {
665 for child in node.children(&mut node.walk()) {
666 if child.kind() == "let_binding" && ocaml_let_binding_is_function(child) {
667 if let Some(extracted) = extract_function_node(child, source, in_class, lang) {
668 if extracted.name != "_" && extracted.name != "()" && !extracted.name.is_empty()
670 {
671 nodes.push(extracted);
672 }
673 }
674 }
675 }
676 return;
680 }
681 if lang == Language::Ocaml && kind == "let_binding" {
682 for child in node.children(&mut node.walk()) {
687 extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
688 }
689 return;
690 }
691
692 if lang == Language::Ocaml && kind == "value_specification" {
704 if let Some(extracted) = extract_ocaml_value_spec(node, source) {
705 nodes.push(extracted);
706 }
707 return;
708 }
709
710 if kinds.func.contains(&kind) {
712 if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
713 nodes.push(extracted);
714 }
715 }
716 else if kinds.class.contains(&kind) {
718 if let Some(extracted) = extract_class_node(node, source, lang) {
719 nodes.push(extracted);
720 }
721 for child in node.children(&mut node.walk()) {
723 if kinds.body.contains(&child.kind()) {
724 extract_nodes_recursive(child, source, nodes, true, lang, kinds);
725 }
726 }
727 return; }
729
730 for child in node.children(&mut node.walk()) {
732 extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
733 }
734}
735
736fn ocaml_let_binding_is_function(node: Node) -> bool {
741 for child in node.children(&mut node.walk()) {
742 if child.kind() == "parameter" {
743 return true;
744 }
745 }
746 false
747}
748
749fn extract_ocaml_value_spec(node: Node, source: &[u8]) -> Option<ExtractedNode> {
761 let mut name = None;
763 for child in node.children(&mut node.walk()) {
764 if child.kind() == "value_name" {
765 name = Some(node_text(child, source).to_string());
766 break;
767 }
768 }
769 let name = name?;
770 if name.is_empty() {
771 return None;
772 }
773
774 let line = node.start_position().row as u32 + 1;
775 let end_line = node.end_position().row as u32 + 1;
776 let column = node.start_position().column as u32;
777 let body = node_text(node, source).to_string();
778
779 Some(ExtractedNode::new(
780 name,
781 NodeKind::Function,
782 line,
783 end_line,
784 column,
785 body,
786 ))
787}
788
789fn extract_function_node(
790 node: Node,
791 source: &[u8],
792 is_method: bool,
793 lang: Language,
794) -> Option<ExtractedNode> {
795 let source_str = std::str::from_utf8(source).unwrap_or("");
797 let func_name = get_function_name(node, lang, source_str)?;
798
799 let params = node
801 .child_by_field_name("parameters")
802 .or_else(|| node.child_by_field_name("formal_parameters"))
803 .map(|p| node_text(p, source).to_string())
804 .unwrap_or_default();
805
806 let line = node.start_position().row as u32 + 1;
807 let end_line = node.end_position().row as u32 + 1;
808 let column = node.start_position().column as u32;
809 let body = node_text(node, source).to_string();
810
811 let mut extracted =
812 ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
813 .with_params(params);
814
815 if is_method {
816 extracted = extracted.with_method_kind();
817 }
818
819 Some(extracted)
820}
821
822fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
823 let class_name = node
825 .child_by_field_name("name")
826 .map(|n| node_text(n, source).to_string())
827 .or_else(|| {
828 let mut cursor = node.walk();
830 for child in node.children(&mut cursor) {
831 if child.kind() == "identifier"
832 || child.kind() == "type_identifier"
833 || child.kind() == "constant"
834 {
835 return Some(node_text(child, source).to_string());
836 }
837 }
838 None
839 })?;
840
841 if class_name.is_empty() {
843 return None;
844 }
845
846 if lang == Language::Elixir && node.kind() == "call" {
848 let first_child = node.child(0)?;
849 let first_text = node_text(first_child, source);
850 if first_text != "defmodule" {
851 return None;
852 }
853 if let Some(args) = node.child(1) {
855 let name = node_text(args, source).to_string();
856 if !name.is_empty() {
857 let line = node.start_position().row as u32 + 1;
858 let end_line = node.end_position().row as u32 + 1;
859 let column = node.start_position().column as u32;
860 let body = node_text(node, source).to_string();
861 return Some(ExtractedNode::new(
862 name,
863 NodeKind::Class,
864 line,
865 end_line,
866 column,
867 body,
868 ));
869 }
870 }
871 return None;
872 }
873
874 let line = node.start_position().row as u32 + 1;
875 let end_line = node.end_position().row as u32 + 1;
876 let column = node.start_position().column as u32;
877 let body = node_text(node, source).to_string();
878
879 Some(ExtractedNode::new(
880 class_name,
881 NodeKind::Class,
882 line,
883 end_line,
884 column,
885 body,
886 ))
887}
888
889fn detect_changes(
895 nodes_a: &[ExtractedNode],
896 nodes_b: &[ExtractedNode],
897 file_a: &Path,
898 file_b: &Path,
899 semantic_only: bool,
900) -> Vec<ASTChange> {
901 let mut changes = Vec::new();
902
903 let mut index_b: HashMap<&str, Vec<usize>> = HashMap::new();
912 for (j, n) in nodes_b.iter().enumerate() {
913 index_b.entry(n.name.as_str()).or_default().push(j);
914 }
915
916 let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
918 let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
919
920 for (i, node_a) in nodes_a.iter().enumerate() {
927 let _ = node_a.end_line;
930 let candidates = match index_b.get(node_a.name.as_str()) {
931 Some(c) => c,
932 None => continue,
933 };
934
935 let chosen = candidates
936 .iter()
937 .copied()
938 .filter(|&j| !matched_b[j])
939 .min_by_key(|&j| {
940 let n_b = &nodes_b[j];
941 let kind_mismatch = (node_a.kind != n_b.kind) as u32;
946 let method_mismatch = (node_a.is_method != n_b.is_method) as u32;
947 let body_mismatch = (node_a.normalized_body != n_b.normalized_body) as u32;
948 let line_diff =
949 (node_a.line as i64 - n_b.line as i64).unsigned_abs() as u32;
950 (kind_mismatch, method_mismatch, body_mismatch, line_diff)
951 });
952
953 if let Some(j) = chosen {
954 matched_a[i] = true;
955 matched_b[j] = true;
956 let node_b = &nodes_b[j];
957
958 if node_a.normalized_body != node_b.normalized_body {
960 changes.push(ASTChange {
962 change_type: ChangeType::Update,
963 node_kind: node_a.kind,
964 name: Some(node_a.name.clone()),
965 old_location: Some(Location::with_column(
966 file_a.display().to_string(),
967 node_a.line,
968 node_a.column,
969 )),
970 new_location: Some(Location::with_column(
971 file_b.display().to_string(),
972 node_b.line,
973 node_b.column,
974 )),
975 old_text: Some(node_a.body.clone()),
976 new_text: Some(node_b.body.clone()),
977 similarity: Some(compute_similarity(
978 &node_a.normalized_body,
979 &node_b.normalized_body,
980 )),
981 children: None,
982 base_changes: None,
983 });
984 } else if node_a.line != node_b.line && !semantic_only {
985 changes.push(ASTChange {
987 change_type: ChangeType::Move,
988 node_kind: node_a.kind,
989 name: Some(node_a.name.clone()),
990 old_location: Some(Location::with_column(
991 file_a.display().to_string(),
992 node_a.line,
993 node_a.column,
994 )),
995 new_location: Some(Location::with_column(
996 file_b.display().to_string(),
997 node_b.line,
998 node_b.column,
999 )),
1000 old_text: None,
1001 new_text: None,
1002 similarity: Some(1.0),
1003 children: None,
1004 base_changes: None,
1005 });
1006 }
1007 }
1008 }
1009
1010 let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
1012 .iter()
1013 .enumerate()
1014 .filter(|(i, _)| !matched_a[*i])
1015 .collect();
1016 let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
1017 .iter()
1018 .enumerate()
1019 .filter(|(i, _)| !matched_b[*i])
1020 .collect();
1021
1022 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
1024
1025 for (_, node_a) in &unmatched_a {
1026 let mut best_match: Option<(usize, f64)> = None;
1027
1028 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1029 if used_b[j] {
1030 continue;
1031 }
1032 if node_a.kind != node_b.kind {
1033 continue;
1034 }
1035
1036 let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
1037 if similarity >= RENAME_SIMILARITY_THRESHOLD
1038 && (best_match.is_none() || similarity > best_match.unwrap().1)
1039 {
1040 best_match = Some((j, similarity));
1041 }
1042 }
1043
1044 if let Some((j, similarity)) = best_match {
1045 let (_, node_b) = unmatched_b[j];
1046 used_b[j] = true;
1047
1048 changes.push(ASTChange {
1050 change_type: ChangeType::Rename,
1051 node_kind: node_a.kind,
1052 name: Some(node_a.name.clone()),
1053 old_location: Some(Location::with_column(
1054 file_a.display().to_string(),
1055 node_a.line,
1056 node_a.column,
1057 )),
1058 new_location: Some(Location::with_column(
1059 file_b.display().to_string(),
1060 node_b.line,
1061 node_b.column,
1062 )),
1063 old_text: Some(node_a.name.clone()),
1064 new_text: Some(node_b.name.clone()),
1065 similarity: Some(similarity),
1066 children: None,
1067 base_changes: None,
1068 });
1069 }
1070 }
1071
1072 for (_, node_a) in &unmatched_a {
1074 let is_renamed = changes
1076 .iter()
1077 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
1078 if !is_renamed {
1079 changes.push(ASTChange {
1080 change_type: ChangeType::Delete,
1081 node_kind: node_a.kind,
1082 name: Some(node_a.name.clone()),
1083 old_location: Some(Location::with_column(
1084 file_a.display().to_string(),
1085 node_a.line,
1086 node_a.column,
1087 )),
1088 new_location: None,
1089 old_text: None,
1090 new_text: None,
1091 similarity: None,
1092 children: None,
1093 base_changes: None,
1094 });
1095 }
1096 }
1097
1098 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1100 if !used_b[j] {
1101 changes.push(ASTChange {
1102 change_type: ChangeType::Insert,
1103 node_kind: node_b.kind,
1104 name: Some(node_b.name.clone()),
1105 old_location: None,
1106 new_location: Some(Location::with_column(
1107 file_b.display().to_string(),
1108 node_b.line,
1109 node_b.column,
1110 )),
1111 old_text: None,
1112 new_text: None,
1113 similarity: None,
1114 children: None,
1115 base_changes: None,
1116 });
1117 }
1118 }
1119
1120 changes.sort_by_key(|c| match c.change_type {
1122 ChangeType::Delete => 0,
1123 ChangeType::Rename => 1,
1124 ChangeType::Update => 2,
1125 ChangeType::Move => 3,
1126 ChangeType::Insert => 4,
1127 _ => 5,
1128 });
1129
1130 changes
1131}
1132
1133fn compute_similarity(a: &str, b: &str) -> f64 {
1140 if a == b {
1141 return 1.0;
1142 }
1143 if a.is_empty() || b.is_empty() {
1144 return 0.0;
1145 }
1146
1147 let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1149 let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1150
1151 let intersection = lines_a.intersection(&lines_b).count();
1152 let union = lines_a.union(&lines_b).count();
1153
1154 let line_sim = if union == 0 {
1155 0.0
1156 } else {
1157 intersection as f64 / union as f64
1158 };
1159
1160 if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1163 return char_jaccard_similarity(a, b);
1164 }
1165
1166 line_sim
1167}
1168
1169fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1171 if a.len() < 2 || b.len() < 2 {
1172 return if a == b { 1.0 } else { 0.0 };
1173 }
1174
1175 let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1176 let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1177
1178 let intersection = bigrams_a.intersection(&bigrams_b).count();
1179 let union = bigrams_a.union(&bigrams_b).count();
1180
1181 if union == 0 {
1182 0.0
1183 } else {
1184 intersection as f64 / union as f64
1185 }
1186}
1187
1188fn format_diff_text(report: &DiffReport) -> String {
1194 let mut out = String::new();
1195
1196 out.push_str("Diff Report\n");
1197 out.push_str("===========\n\n");
1198 out.push_str(&format!("File A: {}\n", report.file_a));
1199 out.push_str(&format!("File B: {}\n", report.file_b));
1200 out.push_str(&format!("Identical: {}\n\n", report.identical));
1201
1202 if report.identical {
1203 out.push_str("No structural changes detected.\n");
1204 return out;
1205 }
1206
1207 out.push_str("Changes:\n");
1208 out.push_str("--------\n");
1209
1210 for change in &report.changes {
1211 let change_type = match change.change_type {
1212 ChangeType::Insert => "+",
1213 ChangeType::Delete => "-",
1214 ChangeType::Update => "~",
1215 ChangeType::Move => ">",
1216 ChangeType::Rename => "R",
1217 ChangeType::Format => "F",
1218 ChangeType::Extract => "E",
1219 ChangeType::Inline => "I",
1220 };
1221
1222 let kind = match change.node_kind {
1223 NodeKind::Function => "function",
1224 NodeKind::Class => "class",
1225 NodeKind::Method => "method",
1226 NodeKind::Field => "field",
1227 NodeKind::Statement => "statement",
1228 NodeKind::Expression => "expression",
1229 NodeKind::Block => "block",
1230 };
1231
1232 let name = change.name.as_deref().unwrap_or("<unknown>");
1233
1234 match change.change_type {
1235 ChangeType::Insert => {
1236 if let Some(ref loc) = change.new_location {
1237 out.push_str(&format!(
1238 " {} {} {} at {}:{}\n",
1239 change_type, kind, name, loc.file, loc.line
1240 ));
1241 }
1242 }
1243 ChangeType::Delete => {
1244 if let Some(ref loc) = change.old_location {
1245 out.push_str(&format!(
1246 " {} {} {} at {}:{}\n",
1247 change_type, kind, name, loc.file, loc.line
1248 ));
1249 }
1250 }
1251 ChangeType::Update | ChangeType::Move => {
1252 if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1253 {
1254 out.push_str(&format!(
1255 " {} {} {} from {}:{} to {}:{}\n",
1256 change_type, kind, name, old.file, old.line, new.file, new.line
1257 ));
1258 }
1259 }
1260 ChangeType::Rename => {
1261 let old_name = change.old_text.as_deref().unwrap_or(name);
1262 let new_name = change.new_text.as_deref().unwrap_or(name);
1263 out.push_str(&format!(
1264 " {} {} {} -> {}\n",
1265 change_type, kind, old_name, new_name
1266 ));
1267 }
1268 _ => {
1269 out.push_str(&format!(" {} {} {}\n", change_type, kind, name));
1270 }
1271 }
1272 }
1273
1274 if let Some(ref summary) = report.summary {
1275 out.push_str("\nSummary:\n");
1276 out.push_str("--------\n");
1277 out.push_str(&format!(" Total changes: {}\n", summary.total_changes));
1278 out.push_str(&format!(
1279 " Semantic changes: {}\n",
1280 summary.semantic_changes
1281 ));
1282 out.push_str(&format!(" Inserts: {}\n", summary.inserts));
1283 out.push_str(&format!(" Deletes: {}\n", summary.deletes));
1284 out.push_str(&format!(" Updates: {}\n", summary.updates));
1285 out.push_str(&format!(" Renames: {}\n", summary.renames));
1286 out.push_str(&format!(" Moves: {}\n", summary.moves));
1287 }
1288
1289 if let Some(ref file_changes) = report.file_changes {
1291 out.push_str("\nFile-Level Changes:\n");
1292 out.push_str("-------------------\n");
1293 for fc in file_changes {
1294 let change_type = match fc.change_type {
1295 ChangeType::Insert => "+",
1296 ChangeType::Delete => "-",
1297 ChangeType::Update => "~",
1298 _ => "?",
1299 };
1300 out.push_str(&format!(" {} {}\n", change_type, fc.relative_path));
1301 if let Some(ref sigs) = fc.signature_changes {
1302 for sig in sigs {
1303 out.push_str(&format!(" changed: {}\n", sig));
1304 }
1305 }
1306 }
1307 }
1308
1309 if let Some(ref module_changes) = report.module_changes {
1311 out.push_str("\nModule-Level Changes:\n");
1312 out.push_str("---------------------\n");
1313 for mc in module_changes {
1314 let change_type = match mc.change_type {
1315 ChangeType::Insert => "+",
1316 ChangeType::Delete => "-",
1317 ChangeType::Update => "~",
1318 _ => "?",
1319 };
1320 out.push_str(&format!(" {} {}\n", change_type, mc.module_path));
1321 for edge in &mc.imports_added {
1322 let names = if edge.imported_names.is_empty() {
1323 String::new()
1324 } else {
1325 format!(" ({})", edge.imported_names.join(", "))
1326 };
1327 out.push_str(&format!(" + import {}{}\n", edge.target_module, names));
1328 }
1329 for edge in &mc.imports_removed {
1330 let names = if edge.imported_names.is_empty() {
1331 String::new()
1332 } else {
1333 format!(" ({})", edge.imported_names.join(", "))
1334 };
1335 out.push_str(&format!(" - import {}{}\n", edge.target_module, names));
1336 }
1337 }
1338 }
1339
1340 if let Some(ref igs) = report.import_graph_summary {
1342 out.push_str("\nImport Graph Summary:\n");
1343 out.push_str("---------------------\n");
1344 out.push_str(&format!(" Edges in A: {}\n", igs.total_edges_a));
1345 out.push_str(&format!(" Edges in B: {}\n", igs.total_edges_b));
1346 out.push_str(&format!(" Edges added: {}\n", igs.edges_added));
1347 out.push_str(&format!(" Edges removed: {}\n", igs.edges_removed));
1348 out.push_str(&format!(
1349 " Modules with import changes: {}\n",
1350 igs.modules_with_import_changes
1351 ));
1352 }
1353
1354 if let Some(ref arch_changes) = report.arch_changes {
1356 out.push_str("\nArchitecture-Level Changes:\n");
1357 out.push_str("---------------------------\n");
1358 for ac in arch_changes {
1359 let change_label = match ac.change_type {
1360 ArchChangeType::LayerMigration => "migration",
1361 ArchChangeType::Added => "added",
1362 ArchChangeType::Removed => "removed",
1363 ArchChangeType::CompositionChanged => "composition changed",
1364 ArchChangeType::CycleIntroduced => "cycle introduced",
1365 ArchChangeType::CycleResolved => "cycle resolved",
1366 };
1367 out.push_str(&format!(" [{}] {}\n", change_label, ac.directory));
1368 if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1369 out.push_str(&format!(" {} -> {}\n", old, new));
1370 } else if let Some(ref new) = ac.new_layer {
1371 out.push_str(&format!(" -> {}\n", new));
1372 } else if let Some(ref old) = ac.old_layer {
1373 out.push_str(&format!(" {} ->\n", old));
1374 }
1375 if !ac.migrated_functions.is_empty() {
1376 out.push_str(&format!(
1377 " migrated: {}\n",
1378 ac.migrated_functions.join(", ")
1379 ));
1380 }
1381 }
1382 }
1383
1384 if let Some(ref arch_summary) = report.arch_summary {
1386 out.push_str("\nArchitecture Summary:\n");
1387 out.push_str("---------------------\n");
1388 out.push_str(&format!(
1389 " Layer migrations: {}\n",
1390 arch_summary.layer_migrations
1391 ));
1392 out.push_str(&format!(
1393 " Directories added: {}\n",
1394 arch_summary.directories_added
1395 ));
1396 out.push_str(&format!(
1397 " Directories removed: {}\n",
1398 arch_summary.directories_removed
1399 ));
1400 out.push_str(&format!(
1401 " Cycles introduced: {}\n",
1402 arch_summary.cycles_introduced
1403 ));
1404 out.push_str(&format!(
1405 " Cycles resolved: {}\n",
1406 arch_summary.cycles_resolved
1407 ));
1408 out.push_str(&format!(
1409 " Stability score: {}\n",
1410 arch_summary.stability_score
1411 ));
1412 }
1413
1414 out
1415}
1416
1417fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1423 match lang {
1424 Language::Python => &[
1425 "return_statement",
1426 "if_statement",
1427 "for_statement",
1428 "while_statement",
1429 "expression_statement",
1430 "assert_statement",
1431 "raise_statement",
1432 "try_statement",
1433 "with_statement",
1434 "assignment",
1435 "augmented_assignment",
1436 "delete_statement",
1437 "pass_statement",
1438 "break_statement",
1439 "continue_statement",
1440 ],
1441 Language::TypeScript | Language::JavaScript => &[
1442 "return_statement",
1443 "if_statement",
1444 "for_statement",
1445 "for_in_statement",
1446 "while_statement",
1447 "do_statement",
1448 "expression_statement",
1449 "variable_declaration",
1450 "lexical_declaration",
1451 "throw_statement",
1452 "try_statement",
1453 "switch_statement",
1454 "break_statement",
1455 "continue_statement",
1456 ],
1457 Language::Go => &[
1458 "return_statement",
1459 "if_statement",
1460 "for_statement",
1461 "expression_statement",
1462 "short_var_declaration",
1463 "var_declaration",
1464 "assignment_statement",
1465 "go_statement",
1466 "defer_statement",
1467 "select_statement",
1468 "switch_statement",
1469 ],
1470 Language::Rust => &[
1471 "let_declaration",
1472 "expression_statement",
1473 "return_expression",
1474 "if_expression",
1475 "for_expression",
1476 "while_expression",
1477 "loop_expression",
1478 "match_expression",
1479 ],
1480 Language::Java => &[
1481 "return_statement",
1482 "if_statement",
1483 "for_statement",
1484 "enhanced_for_statement",
1485 "while_statement",
1486 "do_statement",
1487 "expression_statement",
1488 "local_variable_declaration",
1489 "throw_statement",
1490 "try_statement",
1491 "switch_expression",
1492 ],
1493 Language::C | Language::Cpp => &[
1494 "return_statement",
1495 "if_statement",
1496 "for_statement",
1497 "while_statement",
1498 "do_statement",
1499 "expression_statement",
1500 "declaration",
1501 "switch_statement",
1502 ],
1503 Language::Ruby => &[
1504 "return",
1505 "if",
1506 "unless",
1507 "for",
1508 "while",
1509 "until",
1510 "assignment",
1511 "call",
1512 "begin",
1513 ],
1514 Language::Php => &[
1515 "return_statement",
1516 "if_statement",
1517 "for_statement",
1518 "foreach_statement",
1519 "while_statement",
1520 "expression_statement",
1521 "echo_statement",
1522 "throw_expression",
1523 "try_statement",
1524 ],
1525 Language::CSharp => &[
1526 "return_statement",
1527 "if_statement",
1528 "for_statement",
1529 "foreach_statement",
1530 "while_statement",
1531 "expression_statement",
1532 "local_declaration_statement",
1533 "throw_statement",
1534 "try_statement",
1535 ],
1536 Language::Kotlin => &[
1537 "property_declaration",
1538 "assignment",
1539 "if_expression",
1540 "for_statement",
1541 "while_statement",
1542 "do_while_statement",
1543 "return_expression",
1544 "throw_expression",
1545 "try_expression",
1546 ],
1547 Language::Scala => &[
1548 "val_definition",
1549 "var_definition",
1550 "if_expression",
1551 "for_expression",
1552 "while_expression",
1553 "return_expression",
1554 "throw_expression",
1555 "try_expression",
1556 "call_expression",
1557 ],
1558 Language::Swift => &[
1559 "value_binding_pattern",
1560 "if_statement",
1561 "for_in_statement",
1562 "while_statement",
1563 "return_statement",
1564 "throw_statement",
1565 "guard_statement",
1566 "switch_statement",
1567 ],
1568 Language::Elixir => &["call", "if", "case", "cond"],
1569 Language::Lua | Language::Luau => &[
1570 "return_statement",
1571 "if_statement",
1572 "for_statement",
1573 "while_statement",
1574 "variable_declaration",
1575 "assignment_statement",
1576 "function_call",
1577 ],
1578 Language::Ocaml => &[
1579 "let_binding",
1580 "if_expression",
1581 "match_expression",
1582 "application",
1583 ],
1584 }
1585}
1586
1587#[derive(Debug, Clone)]
1589struct LabeledTreeNode {
1590 label: String,
1592 children: Vec<LabeledTreeNode>,
1594 line: u32,
1596}
1597
1598#[derive(Debug, Clone)]
1600struct PostorderNode {
1601 label: String,
1602 line: u32,
1603 leftmost_leaf: usize,
1605}
1606
1607#[derive(Debug, Clone)]
1609enum EditOp {
1610 Delete { index_a: usize },
1612 Insert { index_b: usize },
1614 Relabel { index_a: usize, index_b: usize },
1616}
1617
1618fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1624 let label = build_node_label(node, source);
1625 let line = node.start_position().row as u32 + 1;
1626
1627 let mut children = Vec::new();
1628 let mut cursor = node.walk();
1629 for child in node.children(&mut cursor) {
1630 if statement_kinds.contains(&child.kind()) {
1631 children.push(build_labeled_tree(child, source, statement_kinds));
1633 } else {
1634 let nested = collect_nested_statements(child, source, statement_kinds);
1636 children.extend(nested);
1637 }
1638 }
1639
1640 LabeledTreeNode {
1641 label,
1642 children,
1643 line,
1644 }
1645}
1646
1647fn collect_nested_statements(
1649 node: Node,
1650 source: &[u8],
1651 statement_kinds: &[&str],
1652) -> Vec<LabeledTreeNode> {
1653 let mut result = Vec::new();
1654 let mut cursor = node.walk();
1655 for child in node.children(&mut cursor) {
1656 if statement_kinds.contains(&child.kind()) {
1657 result.push(build_labeled_tree(child, source, statement_kinds));
1658 } else {
1659 result.extend(collect_nested_statements(child, source, statement_kinds));
1660 }
1661 }
1662 result
1663}
1664
1665fn build_node_label(node: Node, source: &[u8]) -> String {
1670 let kind = node.kind();
1671 let text = node.utf8_text(source).unwrap_or("");
1672
1673 let first_line = text.lines().next().unwrap_or("").trim();
1676
1677 let significant = if first_line.len() > 120 {
1679 &first_line[..120]
1680 } else {
1681 first_line
1682 };
1683
1684 format!("{}:{}", kind, significant)
1685}
1686
1687fn extract_statement_tree(
1692 func_node: Node,
1693 source: &[u8],
1694 lang: Language,
1695 statement_kinds: &[&str],
1696) -> LabeledTreeNode {
1697 let body_node = find_function_body(func_node, lang);
1699
1700 match body_node {
1701 Some(body) => {
1702 let mut children = Vec::new();
1704 let mut cursor = body.walk();
1705 for child in body.children(&mut cursor) {
1706 if statement_kinds.contains(&child.kind()) {
1707 children.push(build_labeled_tree(child, source, statement_kinds));
1708 } else {
1709 children.extend(collect_nested_statements(child, source, statement_kinds));
1710 }
1711 }
1712
1713 LabeledTreeNode {
1714 label: format!("body:{}", func_node.kind()),
1715 children,
1716 line: body.start_position().row as u32 + 1,
1717 }
1718 }
1719 None => {
1720 build_labeled_tree(func_node, source, statement_kinds)
1722 }
1723 }
1724}
1725
1726fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1728 if let Some(body) = func_node.child_by_field_name("body") {
1730 return Some(body);
1731 }
1732 if let Some(body) = func_node.child_by_field_name("block") {
1733 return Some(body);
1734 }
1735
1736 let body_kinds = match lang {
1738 Language::Python => &["block"][..],
1739 Language::TypeScript | Language::JavaScript => &["statement_block"],
1740 Language::Go => &["block"],
1741 Language::Rust => &["block"],
1742 Language::Java => &["block"],
1743 Language::C | Language::Cpp => &["compound_statement"],
1744 Language::Ruby => &["body_statement"],
1745 Language::Php => &["compound_statement"],
1746 Language::CSharp => &["block"],
1747 Language::Kotlin => &["function_body"],
1748 Language::Scala => &["block", "indented_block"],
1749 Language::Swift => &["function_body"],
1750 Language::Elixir => &["do_block"],
1751 Language::Lua | Language::Luau => &["block"],
1752 Language::Ocaml => &["let_binding"],
1753 };
1754
1755 let mut cursor = func_node.walk();
1756 let found = func_node
1757 .children(&mut cursor)
1758 .find(|&child| body_kinds.contains(&child.kind()));
1759 found
1760}
1761
1762fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1764 1 + tree.children.iter().map(count_tree_nodes).sum::<usize>()
1765}
1766
1767fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1773 let mut nodes = Vec::new();
1774 flatten_postorder_recursive(tree, &mut nodes);
1775 nodes
1776}
1777
1778fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1779 if tree.children.is_empty() {
1780 let idx = nodes.len();
1782 nodes.push(PostorderNode {
1783 label: tree.label.clone(),
1784 line: tree.line,
1785 leftmost_leaf: idx,
1786 });
1787 return idx;
1788 }
1789
1790 let mut first_child_leftmost = usize::MAX;
1792 for (i, child) in tree.children.iter().enumerate() {
1793 let child_leftmost = flatten_postorder_recursive(child, nodes);
1794 if i == 0 {
1795 first_child_leftmost = child_leftmost;
1796 }
1797 }
1798
1799 nodes.push(PostorderNode {
1801 label: tree.label.clone(),
1802 line: tree.line,
1803 leftmost_leaf: first_child_leftmost,
1804 });
1805
1806 first_child_leftmost
1808}
1809
1810fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1816 let n = nodes.len();
1817 if n == 0 {
1818 return Vec::new();
1819 }
1820
1821 let mut lr_map: HashMap<usize, usize> = HashMap::new();
1823 for (i, node) in nodes.iter().enumerate() {
1824 lr_map.insert(node.leftmost_leaf, i);
1825 }
1826
1827 let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1828 keyroots.sort();
1829 keyroots
1830}
1831
1832fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1838 let na = nodes_a.len();
1839 let nb = nodes_b.len();
1840
1841 if na == 0 && nb == 0 {
1842 return Vec::new();
1843 }
1844 if na == 0 {
1845 return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1847 }
1848 if nb == 0 {
1849 return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1851 }
1852
1853 let keyroots_a = compute_keyroots(nodes_a);
1854 let keyroots_b = compute_keyroots(nodes_b);
1855
1856 let mut td = vec![vec![0usize; nb + 1]; na + 1];
1858 let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1860
1861 for &kr_a in &keyroots_a {
1862 for &kr_b in &keyroots_b {
1863 let la = nodes_a[kr_a].leftmost_leaf;
1864 let lb = nodes_b[kr_b].leftmost_leaf;
1865
1866 let rows = kr_a - la + 2;
1867 let cols = kr_b - lb + 2;
1868 let mut fd = vec![vec![0usize; cols]; rows];
1869
1870 for i in 1..rows {
1872 fd[i][0] = fd[i - 1][0] + 1;
1873 }
1874 for j in 1..cols {
1875 fd[0][j] = fd[0][j - 1] + 1;
1876 }
1877
1878 for i in 1..rows {
1879 for j in 1..cols {
1880 let idx_a = la + i - 1;
1881 let idx_b = lb + j - 1;
1882
1883 let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1884 0
1885 } else {
1886 1
1887 };
1888
1889 if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1890 let delete = fd[i - 1][j] + 1;
1891 let insert = fd[i][j - 1] + 1;
1892 let relabel = fd[i - 1][j - 1] + cost_relabel;
1893
1894 if relabel <= delete && relabel <= insert {
1895 fd[i][j] = relabel;
1896 td[idx_a + 1][idx_b + 1] = relabel;
1897 td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1898 } else if delete <= insert {
1899 fd[i][j] = delete;
1900 td[idx_a + 1][idx_b + 1] = delete;
1901 td_ops[idx_a + 1][idx_b + 1] = 1;
1902 } else {
1903 fd[i][j] = insert;
1904 td[idx_a + 1][idx_b + 1] = insert;
1905 td_ops[idx_a + 1][idx_b + 1] = 2;
1906 }
1907 } else {
1908 let p = nodes_a[idx_a].leftmost_leaf - la;
1909 let q = nodes_b[idx_b].leftmost_leaf - lb;
1910
1911 let delete = fd[i - 1][j] + 1;
1912 let insert = fd[i][j - 1] + 1;
1913 let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1914
1915 if tree_match <= delete && tree_match <= insert {
1916 fd[i][j] = tree_match;
1917 } else if delete <= insert {
1918 fd[i][j] = delete;
1919 } else {
1920 fd[i][j] = insert;
1921 }
1922 }
1923 }
1924 }
1925 }
1926 }
1927
1928 let mut ops = Vec::new();
1931 derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1932 ops
1933}
1934
1935fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1940 let na = nodes_a.len();
1941 let nb = nodes_b.len();
1942
1943 let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1944 let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1945
1946 for i in 1..=na {
1947 dp[i][0] = i;
1948 choice[i][0] = 1;
1949 }
1950 for j in 1..=nb {
1951 dp[0][j] = j;
1952 choice[0][j] = 2;
1953 }
1954
1955 for i in 1..=na {
1956 for j in 1..=nb {
1957 let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1958 0
1959 } else {
1960 1
1961 };
1962
1963 let del = dp[i - 1][j] + 1;
1964 let ins = dp[i][j - 1] + 1;
1965 let sub = dp[i - 1][j - 1] + cost;
1966
1967 if sub <= del && sub <= ins {
1968 dp[i][j] = sub;
1969 choice[i][j] = if cost == 0 { 0 } else { 3 };
1970 } else if del <= ins {
1971 dp[i][j] = del;
1972 choice[i][j] = 1;
1973 } else {
1974 dp[i][j] = ins;
1975 choice[i][j] = 2;
1976 }
1977 }
1978 }
1979
1980 let mut i = na;
1982 let mut j = nb;
1983 let mut rev_ops = Vec::new();
1984
1985 while i > 0 || j > 0 {
1986 if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1987 if choice[i][j] == 3 {
1988 rev_ops.push(EditOp::Relabel {
1989 index_a: i - 1,
1990 index_b: j - 1,
1991 });
1992 }
1993 i -= 1;
1994 j -= 1;
1995 } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1996 rev_ops.push(EditOp::Delete { index_a: i - 1 });
1997 i -= 1;
1998 } else if j > 0 {
1999 rev_ops.push(EditOp::Insert { index_b: j - 1 });
2000 j -= 1;
2001 }
2002 }
2003
2004 rev_ops.reverse();
2005 ops.extend(rev_ops);
2006}
2007
2008fn edit_ops_to_ast_changes(
2010 ops: &[EditOp],
2011 nodes_a: &[PostorderNode],
2012 nodes_b: &[PostorderNode],
2013 file_a: &Path,
2014 file_b: &Path,
2015) -> Vec<ASTChange> {
2016 let mut changes = Vec::new();
2017
2018 for op in ops {
2019 match op {
2020 EditOp::Delete { index_a } => {
2021 let node = &nodes_a[*index_a];
2022 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
2023 changes.push(ASTChange {
2024 change_type: ChangeType::Delete,
2025 node_kind: NodeKind::Statement,
2026 name: Some(stmt_kind.to_string()),
2027 old_location: Some(Location::new(file_a.display().to_string(), node.line)),
2028 new_location: None,
2029 old_text: Some(node.label.clone()),
2030 new_text: None,
2031 similarity: None,
2032 children: None,
2033 base_changes: None,
2034 });
2035 }
2036 EditOp::Insert { index_b } => {
2037 let node = &nodes_b[*index_b];
2038 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
2039 changes.push(ASTChange {
2040 change_type: ChangeType::Insert,
2041 node_kind: NodeKind::Statement,
2042 name: Some(stmt_kind.to_string()),
2043 old_location: None,
2044 new_location: Some(Location::new(file_b.display().to_string(), node.line)),
2045 old_text: None,
2046 new_text: Some(node.label.clone()),
2047 similarity: None,
2048 children: None,
2049 base_changes: None,
2050 });
2051 }
2052 EditOp::Relabel { index_a, index_b } => {
2053 let node_a = &nodes_a[*index_a];
2054 let node_b = &nodes_b[*index_b];
2055 let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
2056 changes.push(ASTChange {
2057 change_type: ChangeType::Update,
2058 node_kind: NodeKind::Statement,
2059 name: Some(stmt_kind.to_string()),
2060 old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
2061 new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
2062 old_text: Some(node_a.label.clone()),
2063 new_text: Some(node_b.label.clone()),
2064 similarity: None,
2065 children: None,
2066 base_changes: None,
2067 });
2068 }
2069 }
2070 }
2071
2072 changes
2073}
2074
2075const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
2077
2078impl DiffArgs {
2079 fn run_statement_level_diff(&self) -> Result<DiffReport> {
2091 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
2093 let ext = self
2094 .file_a
2095 .extension()
2096 .map(|e| e.to_string_lossy().to_string())
2097 .unwrap_or_else(|| "unknown".to_string());
2098 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
2099 })?;
2100
2101 let source_a = fs::read_to_string(&self.file_a)?;
2103 let source_b = fs::read_to_string(&self.file_b)?;
2104
2105 let pool = ParserPool::new();
2107 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
2108 RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
2109 })?;
2110 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
2111 RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
2112 })?;
2113
2114 let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2116 let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2117
2118 let statement_kinds = get_statement_node_kinds(lang);
2119
2120 let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
2122 .iter()
2123 .enumerate()
2124 .map(|(i, n)| (n.name.as_str(), (i, n)))
2125 .collect();
2126
2127 let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
2128 let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
2129 let mut changes = Vec::new();
2130
2131 for (i, func_a) in funcs_a.iter().enumerate() {
2133 if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2134 matched_a[i] = true;
2135 matched_b[j] = true;
2136
2137 if func_a.normalized_body != func_b.normalized_body {
2139 let func_node_a =
2141 find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2142 let func_node_b =
2143 find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2144
2145 let stmt_children = match (func_node_a, func_node_b) {
2146 (Some(node_a), Some(node_b)) => {
2147 let tree_a_stmts = extract_statement_tree(
2149 node_a,
2150 source_a.as_bytes(),
2151 lang,
2152 statement_kinds,
2153 );
2154 let tree_b_stmts = extract_statement_tree(
2155 node_b,
2156 source_b.as_bytes(),
2157 lang,
2158 statement_kinds,
2159 );
2160
2161 let count_a = count_tree_nodes(&tree_a_stmts);
2162 let count_b = count_tree_nodes(&tree_b_stmts);
2163
2164 if count_a > STATEMENT_FALLBACK_THRESHOLD
2166 || count_b > STATEMENT_FALLBACK_THRESHOLD
2167 {
2168 None
2170 } else {
2171 let po_a = flatten_postorder(&tree_a_stmts);
2173 let po_b = flatten_postorder(&tree_b_stmts);
2174
2175 let edit_ops = zhang_shasha(&po_a, &po_b);
2176
2177 if edit_ops.is_empty() {
2178 None
2179 } else {
2180 let stmt_changes = edit_ops_to_ast_changes(
2181 &edit_ops,
2182 &po_a,
2183 &po_b,
2184 &self.file_a,
2185 &self.file_b,
2186 );
2187 if stmt_changes.is_empty() {
2188 None
2189 } else {
2190 Some(stmt_changes)
2191 }
2192 }
2193 }
2194 }
2195 _ => None,
2196 };
2197
2198 changes.push(ASTChange {
2199 change_type: ChangeType::Update,
2200 node_kind: func_a.kind,
2201 name: Some(func_a.name.clone()),
2202 old_location: Some(Location::with_column(
2203 self.file_a.display().to_string(),
2204 func_a.line,
2205 func_a.column,
2206 )),
2207 new_location: Some(Location::with_column(
2208 self.file_b.display().to_string(),
2209 func_b.line,
2210 func_b.column,
2211 )),
2212 old_text: Some(func_a.body.clone()),
2213 new_text: Some(func_b.body.clone()),
2214 similarity: Some(compute_similarity(
2215 &func_a.normalized_body,
2216 &func_b.normalized_body,
2217 )),
2218 children: stmt_children,
2219 base_changes: None,
2220 });
2221 }
2222 }
2223 }
2224
2225 let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2227 .iter()
2228 .enumerate()
2229 .filter(|(i, _)| !matched_a[*i])
2230 .collect();
2231 let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2232 .iter()
2233 .enumerate()
2234 .filter(|(i, _)| !matched_b[*i])
2235 .collect();
2236
2237 let mut used_b = vec![false; unmatched_b.len()];
2238
2239 for (_, func_a) in &unmatched_a {
2240 let mut best_match: Option<(usize, f64)> = None;
2241 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2242 if used_b[j] || func_a.kind != func_b.kind {
2243 continue;
2244 }
2245 let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2246 if sim >= RENAME_SIMILARITY_THRESHOLD
2247 && (best_match.is_none() || sim > best_match.unwrap().1)
2248 {
2249 best_match = Some((j, sim));
2250 }
2251 }
2252
2253 if let Some((j, sim)) = best_match {
2254 let (_, func_b) = unmatched_b[j];
2255 used_b[j] = true;
2256 changes.push(ASTChange {
2257 change_type: ChangeType::Rename,
2258 node_kind: func_a.kind,
2259 name: Some(func_a.name.clone()),
2260 old_location: Some(Location::with_column(
2261 self.file_a.display().to_string(),
2262 func_a.line,
2263 func_a.column,
2264 )),
2265 new_location: Some(Location::with_column(
2266 self.file_b.display().to_string(),
2267 func_b.line,
2268 func_b.column,
2269 )),
2270 old_text: Some(func_a.name.clone()),
2271 new_text: Some(func_b.name.clone()),
2272 similarity: Some(sim),
2273 children: None,
2274 base_changes: None,
2275 });
2276 }
2277 }
2278
2279 for (_, func_a) in &unmatched_a {
2281 let is_renamed = changes.iter().any(|c| {
2282 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2283 });
2284 if !is_renamed {
2285 changes.push(ASTChange {
2286 change_type: ChangeType::Delete,
2287 node_kind: func_a.kind,
2288 name: Some(func_a.name.clone()),
2289 old_location: Some(Location::with_column(
2290 self.file_a.display().to_string(),
2291 func_a.line,
2292 func_a.column,
2293 )),
2294 new_location: None,
2295 old_text: None,
2296 new_text: None,
2297 similarity: None,
2298 children: None,
2299 base_changes: None,
2300 });
2301 }
2302 }
2303
2304 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2306 if !used_b[j] {
2307 changes.push(ASTChange {
2308 change_type: ChangeType::Insert,
2309 node_kind: func_b.kind,
2310 name: Some(func_b.name.clone()),
2311 old_location: None,
2312 new_location: Some(Location::with_column(
2313 self.file_b.display().to_string(),
2314 func_b.line,
2315 func_b.column,
2316 )),
2317 old_text: None,
2318 new_text: None,
2319 similarity: None,
2320 children: None,
2321 base_changes: None,
2322 });
2323 }
2324 }
2325
2326 let mut summary = DiffSummary::default();
2328 for change in &changes {
2329 summary.total_changes += 1;
2330 if change.change_type != ChangeType::Format {
2331 summary.semantic_changes += 1;
2332 }
2333 match change.change_type {
2334 ChangeType::Insert => summary.inserts += 1,
2335 ChangeType::Delete => summary.deletes += 1,
2336 ChangeType::Update => summary.updates += 1,
2337 ChangeType::Move => summary.moves += 1,
2338 ChangeType::Rename => summary.renames += 1,
2339 ChangeType::Format => summary.formats += 1,
2340 ChangeType::Extract => summary.extracts += 1,
2341 ChangeType::Inline => {}
2342 }
2343 }
2344
2345 changes.sort_by_key(|c| match c.change_type {
2347 ChangeType::Delete => 0,
2348 ChangeType::Rename => 1,
2349 ChangeType::Update => 2,
2350 ChangeType::Move => 3,
2351 ChangeType::Insert => 4,
2352 _ => 5,
2353 });
2354
2355 Ok(DiffReport {
2356 file_a: self.file_a.display().to_string(),
2357 file_b: self.file_b.display().to_string(),
2358 identical: changes.is_empty(),
2359 changes,
2360 summary: Some(summary),
2361 granularity: DiffGranularity::Statement,
2362 file_changes: None,
2363 module_changes: None,
2364 import_graph_summary: None,
2365 arch_changes: None,
2366 arch_summary: None,
2367 })
2368 }
2369}
2370
2371fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2373 let func_kinds = get_function_node_kinds(lang);
2374 find_function_node_recursive(root, target_line, func_kinds)
2375}
2376
2377fn find_function_node_recursive<'a>(
2378 node: Node<'a>,
2379 target_line: u32,
2380 func_kinds: &[&str],
2381) -> Option<Node<'a>> {
2382 let line = node.start_position().row as u32 + 1;
2383
2384 if func_kinds.contains(&node.kind()) && line == target_line {
2385 return Some(node);
2386 }
2387
2388 let mut cursor = node.walk();
2389 for child in node.children(&mut cursor) {
2390 if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2391 return Some(found);
2392 }
2393 }
2394
2395 None
2396}
2397
2398#[derive(Debug, Clone)]
2404struct ClassNode {
2405 name: String,
2407 line: u32,
2409 end_line: u32,
2411 column: u32,
2413 body: String,
2415 normalized_body: String,
2417 methods: Vec<ExtractedNode>,
2419 fields: Vec<FieldNode>,
2421 bases: Vec<String>,
2423}
2424
2425#[derive(Debug, Clone)]
2427struct FieldNode {
2428 name: String,
2430 line: u32,
2432 column: u32,
2434 body: String,
2436 normalized_body: String,
2438}
2439
2440pub fn run_class_diff(file_a: &Path, file_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2445 if !file_a.exists() {
2447 return Err(RemainingError::file_not_found(file_a).into());
2448 }
2449 if !file_b.exists() {
2450 return Err(RemainingError::file_not_found(file_b).into());
2451 }
2452
2453 let lang = Language::from_path(file_a).ok_or_else(|| {
2455 let ext = file_a
2456 .extension()
2457 .map(|e| e.to_string_lossy().to_string())
2458 .unwrap_or_else(|| "unknown".to_string());
2459 RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2460 })?;
2461
2462 let source_a = fs::read_to_string(file_a)?;
2464 let source_b = fs::read_to_string(file_b)?;
2465
2466 let pool = ParserPool::new();
2468 let tree_a = pool
2469 .parse(&source_a, lang)
2470 .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2471 let tree_b = pool
2472 .parse(&source_b, lang)
2473 .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2474
2475 let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2477 let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2478
2479 let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2481
2482 let mut summary = DiffSummary::default();
2484 for change in &changes {
2485 summary.total_changes += 1;
2486 if change.change_type != ChangeType::Format {
2487 summary.semantic_changes += 1;
2488 }
2489 match change.change_type {
2490 ChangeType::Insert => summary.inserts += 1,
2491 ChangeType::Delete => summary.deletes += 1,
2492 ChangeType::Update => summary.updates += 1,
2493 ChangeType::Move => summary.moves += 1,
2494 ChangeType::Rename => summary.renames += 1,
2495 ChangeType::Format => summary.formats += 1,
2496 ChangeType::Extract => summary.extracts += 1,
2497 ChangeType::Inline => {}
2498 }
2499 }
2500
2501 let report = DiffReport {
2502 file_a: file_a.display().to_string(),
2503 file_b: file_b.display().to_string(),
2504 identical: changes.is_empty(),
2505 changes,
2506 summary: Some(summary),
2507 granularity: DiffGranularity::Class,
2508 file_changes: None,
2509 module_changes: None,
2510 import_graph_summary: None,
2511 arch_changes: None,
2512 arch_summary: None,
2513 };
2514
2515 Ok(report)
2516}
2517
2518fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2521 let files_a = collect_source_files(dir_a)?;
2522 let files_b = collect_source_files(dir_b)?;
2523
2524 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2525 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2526
2527 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2528
2529 let mut all_changes = Vec::new();
2530
2531 for rel_path in all_paths {
2532 match (map_a.get(rel_path), map_b.get(rel_path)) {
2533 (Some(path_a), Some(path_b)) => {
2534 match run_class_diff(path_a, path_b, semantic_only) {
2536 Ok(sub_report) => all_changes.extend(sub_report.changes),
2537 Err(_) => continue, }
2539 }
2540 (None, Some(_)) | (Some(_), None) => {
2541 continue;
2543 }
2544 (None, None) => unreachable!(),
2545 }
2546 }
2547
2548 let mut summary = DiffSummary::default();
2549 for change in &all_changes {
2550 summary.total_changes += 1;
2551 if change.change_type != ChangeType::Format {
2552 summary.semantic_changes += 1;
2553 }
2554 match change.change_type {
2555 ChangeType::Insert => summary.inserts += 1,
2556 ChangeType::Delete => summary.deletes += 1,
2557 ChangeType::Update => summary.updates += 1,
2558 ChangeType::Move => summary.moves += 1,
2559 ChangeType::Rename => summary.renames += 1,
2560 ChangeType::Format => summary.formats += 1,
2561 ChangeType::Extract => summary.extracts += 1,
2562 ChangeType::Inline => {}
2563 }
2564 }
2565
2566 Ok(DiffReport {
2567 file_a: dir_a.display().to_string(),
2568 file_b: dir_b.display().to_string(),
2569 identical: all_changes.is_empty(),
2570 changes: all_changes,
2571 summary: Some(summary),
2572 granularity: DiffGranularity::Class,
2573 file_changes: None,
2574 module_changes: None,
2575 import_graph_summary: None,
2576 arch_changes: None,
2577 arch_summary: None,
2578 })
2579}
2580
2581fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2583 let mut classes = Vec::new();
2584 let class_kinds = get_class_node_kinds(lang);
2585 let func_kinds = get_function_node_kinds(lang);
2586 let body_kinds = get_class_body_kinds(lang);
2587
2588 extract_class_nodes_recursive(
2589 root,
2590 source,
2591 &mut classes,
2592 lang,
2593 func_kinds,
2594 class_kinds,
2595 body_kinds,
2596 );
2597
2598 if lang == Language::Go {
2601 associate_go_receiver_methods(root, source, lang, &mut classes);
2602 }
2603
2604 classes
2605}
2606
2607fn associate_go_receiver_methods(
2610 root: Node,
2611 source: &[u8],
2612 lang: Language,
2613 classes: &mut [ClassNode],
2614) {
2615 let source_str = std::str::from_utf8(source).unwrap_or("");
2616 let mut cursor = root.walk();
2617 for child in root.children(&mut cursor) {
2618 if child.kind() != "method_declaration" {
2619 continue;
2620 }
2621 let receiver_type = match extract_go_receiver_type(child, source) {
2623 Some(name) => name,
2624 None => continue,
2625 };
2626
2627 let method_name = match get_function_name(child, lang, source_str) {
2629 Some(name) => name,
2630 None => continue,
2631 };
2632
2633 let params = child
2634 .child_by_field_name("parameters")
2635 .map(|p| node_text(p, source).to_string())
2636 .unwrap_or_default();
2637
2638 let line = child.start_position().row as u32 + 1;
2639 let end_line = child.end_position().row as u32 + 1;
2640 let column = child.start_position().column as u32;
2641 let body = node_text(child, source).to_string();
2642
2643 let extracted =
2644 ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2645 .with_params(params)
2646 .with_method_kind();
2647
2648 for class in classes.iter_mut() {
2650 if class.name == receiver_type {
2651 class.methods.push(extracted);
2652 break;
2653 }
2654 }
2655 }
2656}
2657
2658fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2663 let receiver = method_node.child_by_field_name("receiver")?;
2665 let mut recv_cursor = receiver.walk();
2666 for recv_child in receiver.children(&mut recv_cursor) {
2667 if recv_child.kind() == "parameter_declaration" {
2668 if let Some(type_node) = recv_child.child_by_field_name("type") {
2669 return extract_go_type_identifier(type_node, source);
2670 }
2671 }
2672 }
2673 None
2674}
2675
2676fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2679 match type_node.kind() {
2680 "type_identifier" => Some(node_text(type_node, source).to_string()),
2681 "pointer_type" => {
2682 let mut cursor = type_node.walk();
2684 for child in type_node.children(&mut cursor) {
2685 if child.is_named() {
2686 return extract_go_type_identifier(child, source);
2687 }
2688 }
2689 None
2690 }
2691 _ => None,
2692 }
2693}
2694
2695fn extract_class_nodes_recursive(
2696 node: Node,
2697 source: &[u8],
2698 classes: &mut Vec<ClassNode>,
2699 lang: Language,
2700 func_kinds: &[&str],
2701 class_kinds: &[&str],
2702 body_kinds: &[&str],
2703) {
2704 let kind = node.kind();
2705
2706 if class_kinds.contains(&kind) {
2707 if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2708 classes.push(class_node);
2709 }
2710 return; }
2712
2713 for child in node.children(&mut node.walk()) {
2714 extract_class_nodes_recursive(
2715 child,
2716 source,
2717 classes,
2718 lang,
2719 func_kinds,
2720 class_kinds,
2721 body_kinds,
2722 );
2723 }
2724}
2725
2726fn build_class_node(
2728 node: Node,
2729 source: &[u8],
2730 lang: Language,
2731 func_kinds: &[&str],
2732 body_kinds: &[&str],
2733) -> Option<ClassNode> {
2734 let class_name = node
2736 .child_by_field_name("name")
2737 .map(|n| node_text(n, source).to_string())
2738 .or_else(|| {
2739 if lang == Language::Go && node.kind() == "type_declaration" {
2742 let mut cursor = node.walk();
2743 for child in node.children(&mut cursor) {
2744 if child.kind() == "type_spec" {
2745 if let Some(name_node) = child.child_by_field_name("name") {
2746 return Some(node_text(name_node, source).to_string());
2747 }
2748 }
2749 }
2750 }
2751 let mut cursor = node.walk();
2753 for child in node.children(&mut cursor) {
2754 if child.kind() == "identifier"
2755 || child.kind() == "type_identifier"
2756 || child.kind() == "constant"
2757 {
2758 return Some(node_text(child, source).to_string());
2759 }
2760 }
2761 None
2762 })?;
2763
2764 if class_name.is_empty() {
2765 return None;
2766 }
2767
2768 let line = node.start_position().row as u32 + 1;
2769 let end_line = node.end_position().row as u32 + 1;
2770 let column = node.start_position().column as u32;
2771 let body = node_text(node, source).to_string();
2772 let normalized_body = normalize_body(&body);
2773
2774 let bases = extract_bases(node, source, lang);
2776
2777 let mut methods = Vec::new();
2779 let mut fields = Vec::new();
2780
2781 for child in node.children(&mut node.walk()) {
2782 if body_kinds.contains(&child.kind()) {
2783 extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2784 }
2785 }
2786
2787 Some(ClassNode {
2788 name: class_name,
2789 line,
2790 end_line,
2791 column,
2792 body,
2793 normalized_body,
2794 methods,
2795 fields,
2796 bases,
2797 })
2798}
2799
2800fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2802 let mut bases = Vec::new();
2803
2804 match lang {
2805 Language::Python => {
2806 if let Some(superclasses) = node.child_by_field_name("superclasses") {
2809 for child in superclasses.children(&mut superclasses.walk()) {
2810 let text = node_text(child, source).trim().to_string();
2811 if !text.is_empty() && text != "(" && text != ")" && text != "," {
2812 bases.push(text);
2813 }
2814 }
2815 }
2816 }
2817 _ => {
2818 }
2821 }
2822
2823 bases
2824}
2825
2826fn extract_class_members(
2828 body_node: Node,
2829 source: &[u8],
2830 lang: Language,
2831 func_kinds: &[&str],
2832 methods: &mut Vec<ExtractedNode>,
2833 fields: &mut Vec<FieldNode>,
2834) {
2835 for child in body_node.children(&mut body_node.walk()) {
2836 let kind = child.kind();
2837
2838 if func_kinds.contains(&kind) {
2840 let source_str = std::str::from_utf8(source).unwrap_or("");
2841 if let Some(func_name) = get_function_name(child, lang, source_str) {
2842 let params = child
2843 .child_by_field_name("parameters")
2844 .or_else(|| child.child_by_field_name("formal_parameters"))
2845 .map(|p| node_text(p, source).to_string())
2846 .unwrap_or_default();
2847
2848 let line = child.start_position().row as u32 + 1;
2849 let end_line = child.end_position().row as u32 + 1;
2850 let column = child.start_position().column as u32;
2851 let body = node_text(child, source).to_string();
2852
2853 let extracted =
2854 ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2855 .with_params(params)
2856 .with_method_kind();
2857
2858 methods.push(extracted);
2859 }
2860 }
2861 else if kind == "expression_statement" {
2863 if let Some(field) = extract_field_from_statement(child, source, lang) {
2864 fields.push(field);
2865 }
2866 }
2867 }
2868}
2869
2870fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2872 for child in node.children(&mut node.walk()) {
2874 if child.kind() == "assignment" {
2875 if let Some(left) = child.child_by_field_name("left") {
2877 let name = node_text(left, source).trim().to_string();
2878 if !name.is_empty() && !name.contains('.') {
2879 let line = node.start_position().row as u32 + 1;
2881 let column = node.start_position().column as u32;
2882 let body = node_text(node, source).to_string();
2883 let normalized_body = body.trim().to_string();
2884
2885 return Some(FieldNode {
2886 name,
2887 line,
2888 column,
2889 body,
2890 normalized_body,
2891 });
2892 }
2893 }
2894 }
2895 }
2896 None
2897}
2898
2899fn detect_class_changes(
2901 classes_a: &[ClassNode],
2902 classes_b: &[ClassNode],
2903 file_a: &Path,
2904 file_b: &Path,
2905 _semantic_only: bool,
2906) -> Vec<ASTChange> {
2907 let mut changes = Vec::new();
2908
2909 let mut index_b: HashMap<&str, Vec<usize>> = HashMap::new();
2918 for (j, c) in classes_b.iter().enumerate() {
2919 index_b.entry(c.name.as_str()).or_default().push(j);
2920 }
2921
2922 let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2924 let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2925
2926 for (i, class_a) in classes_a.iter().enumerate() {
2936 let candidates = match index_b.get(class_a.name.as_str()) {
2937 Some(c) => c,
2938 None => continue,
2939 };
2940
2941 let chosen = candidates
2942 .iter()
2943 .copied()
2944 .filter(|&j| !matched_b[j])
2945 .min_by_key(|&j| {
2946 let c_b = &classes_b[j];
2947 let body_mismatch = (class_a.normalized_body != c_b.normalized_body) as u32;
2950 let raw_body_mismatch = (class_a.body != c_b.body) as u32;
2951 let span_a = (class_a.end_line as i64 - class_a.line as i64).unsigned_abs() as u32;
2952 let span_b = (c_b.end_line as i64 - c_b.line as i64).unsigned_abs() as u32;
2953 let span_diff = (span_a as i64 - span_b as i64).unsigned_abs() as u32;
2954 let line_diff = (class_a.line as i64 - c_b.line as i64).unsigned_abs() as u32;
2955 (body_mismatch, raw_body_mismatch, span_diff, line_diff)
2956 });
2957
2958 if let Some(j) = chosen {
2959 matched_a[i] = true;
2960 matched_b[j] = true;
2961 let class_b = &classes_b[j];
2962
2963 if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2965 changes.push(change);
2966 }
2967 }
2968 }
2969
2970 let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2972 .iter()
2973 .enumerate()
2974 .filter(|(i, _)| !matched_a[*i])
2975 .collect();
2976 let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2977 .iter()
2978 .enumerate()
2979 .filter(|(i, _)| !matched_b[*i])
2980 .collect();
2981
2982 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2984
2985 for (_, class_a) in &unmatched_a {
2986 let mut best_match: Option<(usize, f64)> = None;
2987
2988 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2989 if used_b[j] {
2990 continue;
2991 }
2992
2993 let similarity = compute_class_similarity(class_a, class_b);
2994 if similarity >= RENAME_SIMILARITY_THRESHOLD
2995 && (best_match.is_none() || similarity > best_match.unwrap().1)
2996 {
2997 best_match = Some((j, similarity));
2998 }
2999 }
3000
3001 if let Some((j, similarity)) = best_match {
3002 let (_, class_b) = unmatched_b[j];
3003 used_b[j] = true;
3004
3005 changes.push(ASTChange {
3006 change_type: ChangeType::Rename,
3007 node_kind: NodeKind::Class,
3008 name: Some(class_a.name.clone()),
3009 old_location: Some(Location::with_column(
3010 file_a.display().to_string(),
3011 class_a.line,
3012 class_a.column,
3013 )),
3014 new_location: Some(Location::with_column(
3015 file_b.display().to_string(),
3016 class_b.line,
3017 class_b.column,
3018 )),
3019 old_text: Some(class_a.name.clone()),
3020 new_text: Some(class_b.name.clone()),
3021 similarity: Some(similarity),
3022 children: None,
3023 base_changes: None,
3024 });
3025 }
3026 }
3027
3028 for (_, class_a) in &unmatched_a {
3030 let is_renamed = changes
3031 .iter()
3032 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
3033 if !is_renamed {
3034 changes.push(ASTChange {
3035 change_type: ChangeType::Delete,
3036 node_kind: NodeKind::Class,
3037 name: Some(class_a.name.clone()),
3038 old_location: Some(Location::with_column(
3039 file_a.display().to_string(),
3040 class_a.line,
3041 class_a.column,
3042 )),
3043 new_location: None,
3044 old_text: None,
3045 new_text: None,
3046 similarity: None,
3047 children: None,
3048 base_changes: None,
3049 });
3050 }
3051 }
3052
3053 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
3055 if !used_b[j] {
3056 changes.push(ASTChange {
3057 change_type: ChangeType::Insert,
3058 node_kind: NodeKind::Class,
3059 name: Some(class_b.name.clone()),
3060 old_location: None,
3061 new_location: Some(Location::with_column(
3062 file_b.display().to_string(),
3063 class_b.line,
3064 class_b.column,
3065 )),
3066 old_text: None,
3067 new_text: None,
3068 similarity: None,
3069 children: None,
3070 base_changes: None,
3071 });
3072 }
3073 }
3074
3075 changes.sort_by_key(|c| match c.change_type {
3077 ChangeType::Delete => 0,
3078 ChangeType::Rename => 1,
3079 ChangeType::Update => 2,
3080 ChangeType::Move => 3,
3081 ChangeType::Insert => 4,
3082 _ => 5,
3083 });
3084
3085 changes
3086}
3087
3088fn diff_class_pair(
3090 class_a: &ClassNode,
3091 class_b: &ClassNode,
3092 file_a: &Path,
3093 file_b: &Path,
3094) -> Option<ASTChange> {
3095 let mut children = Vec::new();
3096 let mut has_changes = false;
3097
3098 diff_methods(
3100 &class_a.methods,
3101 &class_b.methods,
3102 file_a,
3103 file_b,
3104 &mut children,
3105 );
3106
3107 diff_fields(
3109 &class_a.fields,
3110 &class_b.fields,
3111 file_a,
3112 file_b,
3113 &mut children,
3114 );
3115
3116 let base_changes = diff_bases(&class_a.bases, &class_b.bases);
3118
3119 if !children.is_empty() {
3120 has_changes = true;
3121 }
3122 if base_changes.is_some() {
3123 has_changes = true;
3124 }
3125
3126 if !has_changes {
3127 return None; }
3129
3130 Some(ASTChange {
3131 change_type: ChangeType::Update,
3132 node_kind: NodeKind::Class,
3133 name: Some(class_a.name.clone()),
3134 old_location: Some(Location::with_column(
3135 file_a.display().to_string(),
3136 class_a.line,
3137 class_a.column,
3138 )),
3139 new_location: Some(Location::with_column(
3140 file_b.display().to_string(),
3141 class_b.line,
3142 class_b.column,
3143 )),
3144 old_text: None,
3145 new_text: None,
3146 similarity: None,
3147 children: if children.is_empty() {
3148 None
3149 } else {
3150 Some(children)
3151 },
3152 base_changes,
3153 })
3154}
3155
3156fn diff_methods(
3158 methods_a: &[ExtractedNode],
3159 methods_b: &[ExtractedNode],
3160 file_a: &Path,
3161 file_b: &Path,
3162 children: &mut Vec<ASTChange>,
3163) {
3164 let map_b: HashMap<&str, &ExtractedNode> =
3165 methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3166
3167 let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3168 let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3169
3170 for (i, method_a) in methods_a.iter().enumerate() {
3172 if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3173 matched_a[i] = true;
3174 if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3175 matched_b[j] = true;
3176 }
3177
3178 if method_a.normalized_body != method_b.normalized_body {
3180 children.push(ASTChange {
3181 change_type: ChangeType::Update,
3182 node_kind: NodeKind::Method,
3183 name: Some(method_a.name.clone()),
3184 old_location: Some(Location::with_column(
3185 file_a.display().to_string(),
3186 method_a.line,
3187 method_a.column,
3188 )),
3189 new_location: Some(Location::with_column(
3190 file_b.display().to_string(),
3191 method_b.line,
3192 method_b.column,
3193 )),
3194 old_text: None,
3195 new_text: None,
3196 similarity: Some(compute_similarity(
3197 &method_a.normalized_body,
3198 &method_b.normalized_body,
3199 )),
3200 children: None,
3201 base_changes: None,
3202 });
3203 }
3204 }
3205 }
3206
3207 let unmatched_a: Vec<&ExtractedNode> = methods_a
3209 .iter()
3210 .enumerate()
3211 .filter(|(i, _)| !matched_a[*i])
3212 .map(|(_, m)| m)
3213 .collect();
3214 let unmatched_b: Vec<&ExtractedNode> = methods_b
3215 .iter()
3216 .enumerate()
3217 .filter(|(i, _)| !matched_b[*i])
3218 .map(|(_, m)| m)
3219 .collect();
3220
3221 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3223
3224 for method_a in &unmatched_a {
3225 let mut best_match: Option<(usize, f64)> = None;
3226
3227 for (j, method_b) in unmatched_b.iter().enumerate() {
3228 if used_b[j] {
3229 continue;
3230 }
3231 let similarity =
3232 compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3233 if similarity >= RENAME_SIMILARITY_THRESHOLD
3234 && (best_match.is_none() || similarity > best_match.unwrap().1)
3235 {
3236 best_match = Some((j, similarity));
3237 }
3238 }
3239
3240 if let Some((j, similarity)) = best_match {
3241 let method_b = unmatched_b[j];
3242 used_b[j] = true;
3243
3244 children.push(ASTChange {
3245 change_type: ChangeType::Rename,
3246 node_kind: NodeKind::Method,
3247 name: Some(method_a.name.clone()),
3248 old_location: Some(Location::with_column(
3249 file_a.display().to_string(),
3250 method_a.line,
3251 method_a.column,
3252 )),
3253 new_location: Some(Location::with_column(
3254 file_b.display().to_string(),
3255 method_b.line,
3256 method_b.column,
3257 )),
3258 old_text: Some(method_a.name.clone()),
3259 new_text: Some(method_b.name.clone()),
3260 similarity: Some(similarity),
3261 children: None,
3262 base_changes: None,
3263 });
3264 }
3265 }
3266
3267 for method_a in &unmatched_a {
3269 let is_renamed = children.iter().any(|c| {
3270 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3271 });
3272 if !is_renamed {
3273 children.push(ASTChange {
3274 change_type: ChangeType::Delete,
3275 node_kind: NodeKind::Method,
3276 name: Some(method_a.name.clone()),
3277 old_location: Some(Location::with_column(
3278 file_a.display().to_string(),
3279 method_a.line,
3280 method_a.column,
3281 )),
3282 new_location: None,
3283 old_text: None,
3284 new_text: None,
3285 similarity: None,
3286 children: None,
3287 base_changes: None,
3288 });
3289 }
3290 }
3291
3292 for (j, method_b) in unmatched_b.iter().enumerate() {
3294 if !used_b[j] {
3295 children.push(ASTChange {
3296 change_type: ChangeType::Insert,
3297 node_kind: NodeKind::Method,
3298 name: Some(method_b.name.clone()),
3299 old_location: None,
3300 new_location: Some(Location::with_column(
3301 file_b.display().to_string(),
3302 method_b.line,
3303 method_b.column,
3304 )),
3305 old_text: None,
3306 new_text: None,
3307 similarity: None,
3308 children: None,
3309 base_changes: None,
3310 });
3311 }
3312 }
3313}
3314
3315fn diff_fields(
3317 fields_a: &[FieldNode],
3318 fields_b: &[FieldNode],
3319 file_a: &Path,
3320 file_b: &Path,
3321 children: &mut Vec<ASTChange>,
3322) {
3323 let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3324
3325 let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3326 let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3327
3328 for (i, field_a) in fields_a.iter().enumerate() {
3330 if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3331 matched_a[i] = true;
3332 if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3333 matched_b[j] = true;
3334 }
3335
3336 if field_a.normalized_body != field_b.normalized_body {
3338 children.push(ASTChange {
3339 change_type: ChangeType::Update,
3340 node_kind: NodeKind::Field,
3341 name: Some(field_a.name.clone()),
3342 old_location: Some(Location::with_column(
3343 file_a.display().to_string(),
3344 field_a.line,
3345 field_a.column,
3346 )),
3347 new_location: Some(Location::with_column(
3348 file_b.display().to_string(),
3349 field_b.line,
3350 field_b.column,
3351 )),
3352 old_text: Some(field_a.body.trim().to_string()),
3353 new_text: Some(field_b.body.trim().to_string()),
3354 similarity: None,
3355 children: None,
3356 base_changes: None,
3357 });
3358 }
3359 }
3360 }
3361
3362 for (i, field_a) in fields_a.iter().enumerate() {
3364 if !matched_a[i] {
3365 children.push(ASTChange {
3366 change_type: ChangeType::Delete,
3367 node_kind: NodeKind::Field,
3368 name: Some(field_a.name.clone()),
3369 old_location: Some(Location::with_column(
3370 file_a.display().to_string(),
3371 field_a.line,
3372 field_a.column,
3373 )),
3374 new_location: None,
3375 old_text: None,
3376 new_text: None,
3377 similarity: None,
3378 children: None,
3379 base_changes: None,
3380 });
3381 }
3382 }
3383
3384 for (j, field_b) in fields_b.iter().enumerate() {
3386 if !matched_b[j] {
3387 children.push(ASTChange {
3388 change_type: ChangeType::Insert,
3389 node_kind: NodeKind::Field,
3390 name: Some(field_b.name.clone()),
3391 old_location: None,
3392 new_location: Some(Location::with_column(
3393 file_b.display().to_string(),
3394 field_b.line,
3395 field_b.column,
3396 )),
3397 old_text: None,
3398 new_text: None,
3399 similarity: None,
3400 children: None,
3401 base_changes: None,
3402 });
3403 }
3404 }
3405}
3406
3407fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3409 let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3410 let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3411
3412 let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3413 let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3414
3415 if added.is_empty() && removed.is_empty() {
3416 None
3417 } else {
3418 Some(BaseChanges { added, removed })
3419 }
3420}
3421
3422fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3424 let method_sigs_a: std::collections::HashSet<String> = class_a
3426 .methods
3427 .iter()
3428 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3429 .collect();
3430 let method_sigs_b: std::collections::HashSet<String> = class_b
3431 .methods
3432 .iter()
3433 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3434 .collect();
3435
3436 let field_sigs_a: std::collections::HashSet<String> = class_a
3437 .fields
3438 .iter()
3439 .map(|f| f.normalized_body.clone())
3440 .collect();
3441 let field_sigs_b: std::collections::HashSet<String> = class_b
3442 .fields
3443 .iter()
3444 .map(|f| f.normalized_body.clone())
3445 .collect();
3446
3447 let all_a: std::collections::HashSet<&String> =
3449 method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3450 let all_b: std::collections::HashSet<&String> =
3451 method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3452
3453 if all_a.is_empty() && all_b.is_empty() {
3454 return 1.0;
3456 }
3457
3458 let intersection = all_a.intersection(&all_b).count();
3459 let union = all_a.union(&all_b).count();
3460
3461 if union == 0 {
3462 0.0
3463 } else {
3464 intersection as f64 / union as f64
3465 }
3466}
3467
3468const SOURCE_EXTENSIONS: &[&str] = &[
3474 "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3475 "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3476];
3477
3478fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3480 let mut files = Vec::new();
3481 collect_source_files_recursive(root, root, &mut files)?;
3482 files.sort_by(|a, b| a.0.cmp(&b.0));
3483 Ok(files)
3484}
3485
3486fn collect_source_files_recursive(
3487 root: &Path,
3488 current: &Path,
3489 files: &mut Vec<(String, PathBuf)>,
3490) -> Result<()> {
3491 for entry in fs::read_dir(current)? {
3492 let entry = entry?;
3493 let path = entry.path();
3494 if path.is_dir() {
3495 collect_source_files_recursive(root, &path, files)?;
3496 } else if path.is_file() {
3497 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3498 if SOURCE_EXTENSIONS.contains(&ext) {
3499 let rel = path
3500 .strip_prefix(root)
3501 .unwrap_or(&path)
3502 .to_string_lossy()
3503 .replace('\\', "/");
3504 files.push((rel, path));
3505 }
3506 }
3507 }
3508 }
3509 Ok(())
3510}
3511
3512fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3518 let lang = match Language::from_path(path) {
3519 Some(l) => l,
3520 None => {
3521 let content = fs::read_to_string(path)?;
3523 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3524 content.hash(&mut hasher);
3525 return Ok((hasher.finish(), vec![]));
3526 }
3527 };
3528
3529 let source = fs::read_to_string(path)?;
3530 let pool = ParserPool::new();
3531 let tree = match pool.parse(&source, lang) {
3532 Ok(t) => t,
3533 Err(_) => {
3534 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3536 source.hash(&mut hasher);
3537 return Ok((hasher.finish(), vec![]));
3538 }
3539 };
3540
3541 let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3542
3543 let mut signatures: Vec<String> = nodes
3547 .iter()
3548 .map(|n| {
3549 let kind = match n.kind {
3550 NodeKind::Function => "fn",
3551 NodeKind::Class => "class",
3552 NodeKind::Method => "method",
3553 NodeKind::Field => "field",
3554 _ => "other",
3555 };
3556 let sig = if n.params.is_empty() {
3557 format!("{}:{}", kind, n.name)
3558 } else {
3559 format!("{}:{}({})", kind, n.name, n.params)
3560 };
3561 let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3563 n.normalized_body.hash(&mut body_hasher);
3564 format!("{}|{}", sig, body_hasher.finish())
3565 })
3566 .collect();
3567 signatures.sort();
3568
3569 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3570 for sig in &signatures {
3571 sig.hash(&mut hasher);
3572 }
3573 let fingerprint = hasher.finish();
3574
3575 Ok((fingerprint, signatures))
3576}
3577
3578fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3580 let files_a = collect_source_files(dir_a)?;
3581 let files_b = collect_source_files(dir_b)?;
3582
3583 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3585 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3586
3587 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3588
3589 let mut file_changes = Vec::new();
3590 let mut has_any_change = false;
3591
3592 for rel_path in all_paths {
3593 match (map_a.get(rel_path), map_b.get(rel_path)) {
3594 (Some(path_a), Some(path_b)) => {
3595 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3597 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3598
3599 if fp_a == fp_b {
3600 } else {
3603 has_any_change = true;
3604 let set_a: HashSet<&String> = sigs_a.iter().collect();
3606 let set_b: HashSet<&String> = sigs_b.iter().collect();
3607 let changed: Vec<String> = set_a
3608 .symmetric_difference(&set_b)
3609 .map(|s| (*s).clone())
3610 .collect();
3611
3612 file_changes.push(FileLevelChange {
3613 relative_path: rel_path.to_string(),
3614 change_type: ChangeType::Update,
3615 old_fingerprint: Some(fp_a),
3616 new_fingerprint: Some(fp_b),
3617 signature_changes: if changed.is_empty() {
3618 None
3619 } else {
3620 Some(changed)
3621 },
3622 });
3623 }
3624 }
3625 (None, Some(path_b)) => {
3626 has_any_change = true;
3628 let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3629 file_changes.push(FileLevelChange {
3630 relative_path: rel_path.to_string(),
3631 change_type: ChangeType::Insert,
3632 old_fingerprint: None,
3633 new_fingerprint: Some(fp_b),
3634 signature_changes: None,
3635 });
3636 }
3637 (Some(path_a), None) => {
3638 has_any_change = true;
3640 let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3641 file_changes.push(FileLevelChange {
3642 relative_path: rel_path.to_string(),
3643 change_type: ChangeType::Delete,
3644 old_fingerprint: Some(fp_a),
3645 new_fingerprint: None,
3646 signature_changes: None,
3647 });
3648 }
3649 (None, None) => unreachable!(),
3650 }
3651 }
3652
3653 Ok(DiffReport {
3654 file_a: dir_a.display().to_string(),
3655 file_b: dir_b.display().to_string(),
3656 identical: !has_any_change,
3657 changes: Vec::new(),
3658 summary: None,
3659 granularity: DiffGranularity::File,
3660 file_changes: Some(file_changes),
3661 module_changes: None,
3662 import_graph_summary: None,
3663 arch_changes: None,
3664 arch_summary: None,
3665 })
3666}
3667
3668#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3674struct InternalImportEdge {
3675 source_file: String,
3676 target_module: String,
3677 imported_names: Vec<String>,
3678}
3679
3680fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3686 let mut edges = Vec::new();
3687
3688 let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3690 for cap in from_re.captures_iter(source) {
3691 let target = cap[1].to_string();
3692 let names_str = &cap[2];
3693 let names: Vec<String> = names_str
3694 .split(',')
3695 .map(|n| n.trim().to_string())
3696 .filter(|n| !n.is_empty())
3697 .collect();
3698 edges.push(InternalImportEdge {
3699 source_file: relative_path.to_string(),
3700 target_module: target,
3701 imported_names: names,
3702 });
3703 }
3704
3705 let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3707 for cap in import_re.captures_iter(source) {
3708 let target = cap[1].to_string();
3709 edges.push(InternalImportEdge {
3710 source_file: relative_path.to_string(),
3711 target_module: target,
3712 imported_names: vec![],
3713 });
3714 }
3715
3716 edges
3717}
3718
3719fn parse_file_imports(
3724 registry: &LanguageRegistry,
3725 source: &str,
3726 full_path: &Path,
3727 rel_path: &str,
3728) -> Vec<InternalImportEdge> {
3729 let ext = match full_path.extension().and_then(|e| e.to_str()) {
3730 Some(e) => format!(".{}", e),
3731 None => return Vec::new(),
3732 };
3733
3734 let is_python = ext == ".py" || ext == ".pyi";
3735
3736 if let Some(handler) = registry.get_by_extension(&ext) {
3738 if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3739 return import_defs
3740 .into_iter()
3741 .map(|def| InternalImportEdge {
3742 source_file: rel_path.to_string(),
3743 target_module: def.module,
3744 imported_names: def.names,
3745 })
3746 .collect();
3747 }
3748 }
3749
3750 if is_python {
3752 return parse_python_imports(source, rel_path);
3753 }
3754
3755 Vec::new()
3756}
3757
3758fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3766 let files = collect_source_files(root)?;
3767 let registry = LanguageRegistry::with_defaults();
3768 let mut all_edges = Vec::new();
3769
3770 for (rel_path, full_path) in &files {
3771 let source = fs::read_to_string(full_path)?;
3772 let edges = parse_file_imports(®istry, &source, full_path, rel_path);
3773 all_edges.extend(edges);
3774 }
3775
3776 Ok(all_edges)
3777}
3778
3779fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3781 ImportEdge {
3782 source_file: edge.source_file.clone(),
3783 target_module: edge.target_module.clone(),
3784 imported_names: edge.imported_names.clone(),
3785 }
3786}
3787
3788fn edge_key(edge: &InternalImportEdge) -> String {
3790 format!(
3791 "{}->{}:{}",
3792 edge.source_file,
3793 edge.target_module,
3794 edge.imported_names.join(",")
3795 )
3796}
3797
3798fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3800 let edges_a = build_import_graph(dir_a)?;
3802 let edges_b = build_import_graph(dir_b)?;
3803
3804 let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3806 let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3807
3808 let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3810 let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3811
3812 let added_edges: Vec<&InternalImportEdge> = edges_b
3814 .iter()
3815 .filter(|e| added_keys.contains(&edge_key(e)))
3816 .collect();
3817 let removed_edges: Vec<&InternalImportEdge> = edges_a
3818 .iter()
3819 .filter(|e| removed_keys.contains(&edge_key(e)))
3820 .collect();
3821
3822 let files_a = collect_source_files(dir_a)?;
3824 let files_b = collect_source_files(dir_b)?;
3825 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3826 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3827 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3828
3829 let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3831 let mut modules_with_import_changes = 0usize;
3832
3833 for rel_path in &all_paths {
3834 let in_a = map_a.contains_key(rel_path);
3835 let in_b = map_b.contains_key(rel_path);
3836
3837 let change_type = if !in_a && in_b {
3839 ChangeType::Insert
3840 } else if in_a && !in_b {
3841 ChangeType::Delete
3842 } else {
3843 ChangeType::Update
3844 };
3845
3846 let mod_added: Vec<ImportEdge> = added_edges
3848 .iter()
3849 .filter(|e| e.source_file == *rel_path)
3850 .map(|e| to_public_edge(e))
3851 .collect();
3852 let mod_removed: Vec<ImportEdge> = removed_edges
3853 .iter()
3854 .filter(|e| e.source_file == *rel_path)
3855 .map(|e| to_public_edge(e))
3856 .collect();
3857
3858 let file_change = if in_a && in_b {
3860 let path_a = map_a[rel_path];
3861 let path_b = map_b[rel_path];
3862 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3863 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3864 if fp_a != fp_b {
3865 let set_a: HashSet<&String> = sigs_a.iter().collect();
3866 let set_b: HashSet<&String> = sigs_b.iter().collect();
3867 let changed: Vec<String> = set_a
3868 .symmetric_difference(&set_b)
3869 .map(|s| (*s).clone())
3870 .collect();
3871 Some(FileLevelChange {
3872 relative_path: rel_path.to_string(),
3873 change_type: ChangeType::Update,
3874 old_fingerprint: Some(fp_a),
3875 new_fingerprint: Some(fp_b),
3876 signature_changes: if changed.is_empty() {
3877 None
3878 } else {
3879 Some(changed)
3880 },
3881 })
3882 } else {
3883 None
3884 }
3885 } else {
3886 None
3887 };
3888
3889 let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3891 let has_file_change = file_change.is_some();
3892 let is_new_or_deleted =
3893 change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3894
3895 if has_import_changes || has_file_change || is_new_or_deleted {
3896 if has_import_changes {
3897 modules_with_import_changes += 1;
3898 }
3899
3900 let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3902 edges_b
3904 .iter()
3905 .filter(|e| e.source_file == *rel_path)
3906 .map(to_public_edge)
3907 .collect()
3908 } else {
3909 mod_added
3910 };
3911 let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3913 edges_a
3914 .iter()
3915 .filter(|e| e.source_file == *rel_path)
3916 .map(to_public_edge)
3917 .collect()
3918 } else {
3919 mod_removed
3920 };
3921
3922 let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3924 if has_expanded_imports && !has_import_changes {
3925 modules_with_import_changes += 1;
3926 }
3927
3928 module_changes.push(ModuleLevelChange {
3929 module_path: rel_path.to_string(),
3930 change_type,
3931 imports_added: final_added,
3932 imports_removed: final_removed,
3933 file_change,
3934 });
3935 }
3936 }
3937
3938 let summary = ImportGraphSummary {
3939 total_edges_a: edges_a.len(),
3940 total_edges_b: edges_b.len(),
3941 edges_added: added_keys.len(),
3942 edges_removed: removed_keys.len(),
3943 modules_with_import_changes,
3944 };
3945
3946 let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3947
3948 Ok(DiffReport {
3949 file_a: dir_a.display().to_string(),
3950 file_b: dir_b.display().to_string(),
3951 identical,
3952 changes: Vec::new(),
3953 summary: None,
3954 granularity: DiffGranularity::Module,
3955 file_changes: None,
3956 module_changes: Some(module_changes),
3957 import_graph_summary: Some(summary),
3958 arch_changes: None,
3959 arch_summary: None,
3960 })
3961}
3962
3963fn classify_directory_layer(dir_name: &str) -> String {
3969 let lower = dir_name.to_lowercase();
3970 match lower.as_str() {
3971 "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3972 "core" | "models" | "domain" | "entities" => "core".to_string(),
3973 "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3974 "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3975 "services" | "service" => "service".to_string(),
3976 "tests" | "test" | "spec" | "specs" => "test".to_string(),
3977 "config" | "settings" | "conf" => "config".to_string(),
3978 "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3979 _ => "other".to_string(),
3980 }
3981}
3982
3983fn classify_by_import_flow(
3991 dir_name: &str,
3992 edges: &[InternalImportEdge],
3993 all_dirs: &HashSet<String>,
3994) -> String {
3995 let fan_out: usize = edges
3997 .iter()
3998 .filter(|e| {
3999 e.source_file
4000 .split('/')
4001 .next()
4002 .map(|d| d == dir_name)
4003 .unwrap_or(false)
4004 })
4005 .filter(|e| {
4006 let target_first = e
4008 .target_module
4009 .split('/')
4010 .next()
4011 .or_else(|| e.target_module.split('.').next())
4012 .unwrap_or("");
4013 all_dirs.contains(target_first) && target_first != dir_name
4014 })
4015 .map(|e| e.target_module.clone())
4016 .collect::<HashSet<_>>()
4017 .len();
4018
4019 let fan_in: usize = edges
4021 .iter()
4022 .filter(|e| {
4023 let source_dir = e.source_file.split('/').next().unwrap_or("");
4024 source_dir != dir_name
4025 })
4026 .filter(|e| {
4027 let target_first = e
4028 .target_module
4029 .split('/')
4030 .next()
4031 .or_else(|| e.target_module.split('.').next())
4032 .unwrap_or("");
4033 target_first == dir_name
4034 })
4035 .count();
4036
4037 if fan_in == 0 && fan_out == 0 {
4038 return "other".to_string();
4039 }
4040
4041 if fan_out > 0 && fan_in == 0 {
4043 "entry".to_string()
4044 } else if fan_in > fan_out * 2 {
4045 "utility".to_string()
4046 } else if fan_out > fan_in * 2 {
4047 "entry".to_string()
4048 } else {
4049 "service".to_string()
4050 }
4051}
4052
4053fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
4060 let mut dirs: HashMap<String, String> = HashMap::new();
4061 let files = collect_source_files(root)?;
4062
4063 for (rel_path, _) in &files {
4065 if let Some(first_dir) = rel_path.split('/').next() {
4066 if rel_path.contains('/') && !dirs.contains_key(first_dir) {
4067 let layer = classify_directory_layer(first_dir);
4068 dirs.insert(first_dir.to_string(), layer);
4069 }
4070 }
4071 }
4072
4073 let other_dirs: Vec<String> = dirs
4075 .iter()
4076 .filter(|(_, layer)| *layer == "other")
4077 .map(|(name, _)| name.clone())
4078 .collect();
4079
4080 if !other_dirs.is_empty() {
4081 if let Ok(edges) = build_import_graph(root) {
4083 let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
4084 for dir_name in &other_dirs {
4085 let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
4086 if inferred != "other" {
4087 dirs.insert(dir_name.clone(), inferred);
4088 }
4089 }
4090 }
4091 }
4092
4093 Ok(dirs)
4094}
4095
4096fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
4098 let dirs_a = collect_arch_directories(dir_a)?;
4099 let dirs_b = collect_arch_directories(dir_b)?;
4100
4101 let all_dirs: BTreeSet<&str> = dirs_a
4102 .keys()
4103 .chain(dirs_b.keys())
4104 .map(|s| s.as_str())
4105 .collect();
4106
4107 let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
4108 let mut directories_added = 0usize;
4109 let mut directories_removed = 0usize;
4110 let mut layer_migrations = 0usize;
4111 let mut changed_dirs = 0usize;
4112 let total_dirs = all_dirs.len();
4113
4114 for dir_name in &all_dirs {
4115 let in_a = dirs_a.get(*dir_name);
4116 let in_b = dirs_b.get(*dir_name);
4117
4118 match (in_a, in_b) {
4119 (Some(layer_a), Some(layer_b)) => {
4120 if layer_a != layer_b {
4121 changed_dirs += 1;
4123 layer_migrations += 1;
4124 arch_changes.push(ArchLevelChange {
4125 directory: dir_name.to_string(),
4126 change_type: ArchChangeType::LayerMigration,
4127 old_layer: Some(layer_a.clone()),
4128 new_layer: Some(layer_b.clone()),
4129 migrated_functions: Vec::new(),
4130 });
4131 }
4132 }
4134 (None, Some(layer_b)) => {
4135 changed_dirs += 1;
4137 directories_added += 1;
4138 arch_changes.push(ArchLevelChange {
4139 directory: dir_name.to_string(),
4140 change_type: ArchChangeType::Added,
4141 old_layer: None,
4142 new_layer: Some(layer_b.clone()),
4143 migrated_functions: Vec::new(),
4144 });
4145 }
4146 (Some(layer_a), None) => {
4147 changed_dirs += 1;
4149 directories_removed += 1;
4150 arch_changes.push(ArchLevelChange {
4151 directory: dir_name.to_string(),
4152 change_type: ArchChangeType::Removed,
4153 old_layer: Some(layer_a.clone()),
4154 new_layer: None,
4155 migrated_functions: Vec::new(),
4156 });
4157 }
4158 (None, None) => unreachable!(),
4159 }
4160 }
4161
4162 let stability_score = if total_dirs == 0 {
4163 1.0
4164 } else {
4165 1.0 - (changed_dirs as f64 / total_dirs as f64)
4166 };
4167
4168 let summary = ArchDiffSummary {
4169 layer_migrations,
4170 directories_added,
4171 directories_removed,
4172 cycles_introduced: 0,
4173 cycles_resolved: 0,
4174 stability_score,
4175 };
4176
4177 let identical = arch_changes.is_empty();
4178
4179 Ok(DiffReport {
4180 file_a: dir_a.display().to_string(),
4181 file_b: dir_b.display().to_string(),
4182 identical,
4183 changes: Vec::new(),
4184 summary: None,
4185 granularity: DiffGranularity::Architecture,
4186 file_changes: None,
4187 module_changes: None,
4188 import_graph_summary: None,
4189 arch_changes: Some(arch_changes),
4190 arch_summary: Some(summary),
4191 })
4192}
4193
4194#[cfg(test)]
4199mod tests {
4200 use super::*;
4201
4202 const SAMPLE_A: &str = r#"
4203def original_function(x):
4204 return x * 2
4205
4206def renamed_later(a, b):
4207 return a + b
4208
4209def will_be_deleted():
4210 return "goodbye"
4211
4212class OriginalClass:
4213 def method_one(self):
4214 return 1
4215"#;
4216
4217 const SAMPLE_B: &str = r#"
4218def original_function(x):
4219 # Modified implementation
4220 return x * 3
4221
4222def better_name(a, b):
4223 return a + b
4224
4225def new_function():
4226 return "hello"
4227
4228class OriginalClass:
4229 def method_one(self):
4230 return 1
4231
4232 def method_two(self):
4233 return 2
4234"#;
4235
4236 fn parse_python(source: &str) -> tree_sitter::Tree {
4238 let pool = ParserPool::new();
4239 pool.parse(source, Language::Python).unwrap()
4240 }
4241
4242 #[test]
4243 fn test_extract_nodes() {
4244 let tree = parse_python(SAMPLE_A);
4245 let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4246
4247 assert!(
4249 nodes.len() >= 5,
4250 "Expected at least 5 nodes, got {}",
4251 nodes.len()
4252 );
4253
4254 let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4255 assert!(names.contains(&"original_function"));
4256 assert!(names.contains(&"renamed_later"));
4257 assert!(names.contains(&"will_be_deleted"));
4258 assert!(names.contains(&"OriginalClass"));
4259 assert!(names.contains(&"method_one"));
4260 }
4261
4262 #[test]
4263 fn test_detect_update() {
4264 let tree_a = parse_python(SAMPLE_A);
4265 let tree_b = parse_python(SAMPLE_B);
4266
4267 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4268 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4269
4270 let file_a = PathBuf::from("a.py");
4271 let file_b = PathBuf::from("b.py");
4272 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4273
4274 let updates: Vec<_> = changes
4276 .iter()
4277 .filter(|c| c.change_type == ChangeType::Update)
4278 .collect();
4279 assert!(!updates.is_empty(), "Should detect at least one update");
4280 assert!(
4281 updates
4282 .iter()
4283 .any(|c| c.name.as_deref() == Some("original_function")),
4284 "original_function should be marked as updated"
4285 );
4286 }
4287
4288 #[test]
4289 fn test_detect_insert() {
4290 let tree_a = parse_python(SAMPLE_A);
4291 let tree_b = parse_python(SAMPLE_B);
4292
4293 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4294 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4295
4296 let file_a = PathBuf::from("a.py");
4297 let file_b = PathBuf::from("b.py");
4298 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4299
4300 let inserts: Vec<_> = changes
4302 .iter()
4303 .filter(|c| c.change_type == ChangeType::Insert)
4304 .collect();
4305 assert!(!inserts.is_empty(), "Should detect insertions");
4306 }
4307
4308 #[test]
4309 fn test_detect_delete() {
4310 let tree_a = parse_python(SAMPLE_A);
4311 let tree_b = parse_python(SAMPLE_B);
4312
4313 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4314 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4315
4316 let file_a = PathBuf::from("a.py");
4317 let file_b = PathBuf::from("b.py");
4318 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4319
4320 let deletes: Vec<_> = changes
4322 .iter()
4323 .filter(|c| c.change_type == ChangeType::Delete)
4324 .collect();
4325 assert!(!deletes.is_empty(), "Should detect deletions");
4326 assert!(
4327 deletes
4328 .iter()
4329 .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4330 "will_be_deleted should be marked as deleted"
4331 );
4332 }
4333
4334 #[test]
4335 fn test_detect_rename() {
4336 let tree_a = parse_python(SAMPLE_A);
4337 let tree_b = parse_python(SAMPLE_B);
4338
4339 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4340 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4341
4342 let file_a = PathBuf::from("a.py");
4343 let file_b = PathBuf::from("b.py");
4344 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4345
4346 let renames: Vec<_> = changes
4348 .iter()
4349 .filter(|c| c.change_type == ChangeType::Rename)
4350 .collect();
4351 assert!(!renames.is_empty(), "Should detect renames");
4352 }
4353
4354 #[test]
4355 fn test_identical_files() {
4356 let tree_a = parse_python(SAMPLE_A);
4357 let tree_b = parse_python(SAMPLE_A); let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4360 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4361
4362 let file_a = PathBuf::from("a.py");
4363 let file_b = PathBuf::from("b.py");
4364 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); assert!(
4367 changes.is_empty(),
4368 "Identical files should have no semantic changes"
4369 );
4370 }
4371
4372 #[test]
4373 fn test_compute_similarity() {
4374 assert_eq!(compute_similarity("abc", "abc"), 1.0);
4375 assert_eq!(compute_similarity("", ""), 1.0); assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); }
4378
4379 #[test]
4380 fn test_normalize_body() {
4381 let body = "def foo():\n # pure comment line\n return 1 # inline comment";
4383 let normalized = normalize_body(body);
4384 assert!(!normalized.contains('#'), "Comments should be removed");
4387 assert!(
4388 !normalized.contains("def foo"),
4389 "Signature should be skipped"
4390 );
4391 assert!(normalized.contains("return 1"), "Body should remain");
4392 }
4393
4394 #[test]
4399 fn test_format_diff_text_renders_file_changes() {
4400 let mut report = DiffReport::new("dir_a/", "dir_b/");
4401 report.identical = false;
4402 report.file_changes = Some(vec![
4403 FileLevelChange {
4404 relative_path: "src/main.py".to_string(),
4405 change_type: ChangeType::Update,
4406 old_fingerprint: Some(12345),
4407 new_fingerprint: Some(67890),
4408 signature_changes: Some(vec!["fn foo()".to_string()]),
4409 },
4410 FileLevelChange {
4411 relative_path: "src/new_module.py".to_string(),
4412 change_type: ChangeType::Insert,
4413 old_fingerprint: None,
4414 new_fingerprint: Some(11111),
4415 signature_changes: None,
4416 },
4417 FileLevelChange {
4418 relative_path: "src/removed.py".to_string(),
4419 change_type: ChangeType::Delete,
4420 old_fingerprint: Some(99999),
4421 new_fingerprint: None,
4422 signature_changes: None,
4423 },
4424 ]);
4425
4426 let text = format_diff_text(&report);
4427 assert!(
4428 text.contains("File-Level Changes"),
4429 "Should have file-level section header"
4430 );
4431 assert!(text.contains("src/main.py"), "Should mention updated file");
4432 assert!(
4433 text.contains("src/new_module.py"),
4434 "Should mention added file"
4435 );
4436 assert!(
4437 text.contains("src/removed.py"),
4438 "Should mention removed file"
4439 );
4440 }
4441
4442 #[test]
4443 fn test_format_diff_text_renders_module_changes() {
4444 let mut report = DiffReport::new("dir_a/", "dir_b/");
4445 report.identical = false;
4446 report.module_changes = Some(vec![ModuleLevelChange {
4447 module_path: "src/utils.py".to_string(),
4448 change_type: ChangeType::Update,
4449 imports_added: vec![ImportEdge {
4450 source_file: "src/utils.py".to_string(),
4451 target_module: "os.path".to_string(),
4452 imported_names: vec!["join".to_string()],
4453 }],
4454 imports_removed: vec![],
4455 file_change: None,
4456 }]);
4457
4458 let text = format_diff_text(&report);
4459 assert!(
4460 text.contains("Module-Level Changes"),
4461 "Should have module-level section header"
4462 );
4463 assert!(
4464 text.contains("src/utils.py"),
4465 "Should mention the module path"
4466 );
4467 assert!(
4468 text.contains("os.path"),
4469 "Should mention added import target"
4470 );
4471 }
4472
4473 #[test]
4474 fn test_format_diff_text_renders_import_graph_summary() {
4475 let mut report = DiffReport::new("dir_a/", "dir_b/");
4476 report.identical = false;
4477 report.import_graph_summary = Some(ImportGraphSummary {
4478 total_edges_a: 10,
4479 total_edges_b: 15,
4480 edges_added: 7,
4481 edges_removed: 2,
4482 modules_with_import_changes: 3,
4483 });
4484
4485 let text = format_diff_text(&report);
4486 assert!(
4487 text.contains("Import Graph"),
4488 "Should have import graph section"
4489 );
4490 assert!(text.contains("7"), "Should show edges added");
4491 assert!(text.contains("2"), "Should show edges removed");
4492 }
4493
4494 #[test]
4495 fn test_format_diff_text_renders_arch_changes() {
4496 let mut report = DiffReport::new("dir_a/", "dir_b/");
4497 report.identical = false;
4498 report.arch_changes = Some(vec![
4499 ArchLevelChange {
4500 directory: "src/api/".to_string(),
4501 change_type: ArchChangeType::LayerMigration,
4502 old_layer: Some("presentation".to_string()),
4503 new_layer: Some("business".to_string()),
4504 migrated_functions: vec!["handle_request".to_string()],
4505 },
4506 ArchLevelChange {
4507 directory: "src/new_service/".to_string(),
4508 change_type: ArchChangeType::Added,
4509 old_layer: None,
4510 new_layer: Some("service".to_string()),
4511 migrated_functions: vec![],
4512 },
4513 ]);
4514
4515 let text = format_diff_text(&report);
4516 assert!(
4517 text.contains("Architecture-Level Changes"),
4518 "Should have arch section header"
4519 );
4520 assert!(
4521 text.contains("src/api/"),
4522 "Should mention migrated directory"
4523 );
4524 assert!(text.contains("presentation"), "Should show old layer");
4525 assert!(text.contains("business"), "Should show new layer");
4526 assert!(
4527 text.contains("src/new_service/"),
4528 "Should mention added directory"
4529 );
4530 }
4531
4532 #[test]
4533 fn test_format_diff_text_renders_arch_summary() {
4534 let mut report = DiffReport::new("dir_a/", "dir_b/");
4535 report.identical = false;
4536 report.arch_summary = Some(ArchDiffSummary {
4537 layer_migrations: 2,
4538 directories_added: 1,
4539 directories_removed: 0,
4540 cycles_introduced: 1,
4541 cycles_resolved: 0,
4542 stability_score: 0.75,
4543 });
4544
4545 let text = format_diff_text(&report);
4546 assert!(
4547 text.contains("Architecture Summary"),
4548 "Should have arch summary section"
4549 );
4550 assert!(text.contains("0.75"), "Should show stability score");
4551 }
4552
4553 #[test]
4554 fn test_format_diff_text_identical_skips_higher_levels() {
4555 let mut report = DiffReport::new("a.py", "b.py");
4558 report.identical = true;
4559 report.file_changes = Some(vec![FileLevelChange {
4560 relative_path: "should_not_appear.py".to_string(),
4561 change_type: ChangeType::Insert,
4562 old_fingerprint: None,
4563 new_fingerprint: Some(1),
4564 signature_changes: None,
4565 }]);
4566
4567 let text = format_diff_text(&report);
4568 assert!(
4569 !text.contains("should_not_appear"),
4570 "Identical report should skip all change sections"
4571 );
4572 assert!(
4573 text.contains("No structural changes"),
4574 "Should show identical message"
4575 );
4576 }
4577}