1use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35 ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36 DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37 Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48#[derive(Debug, Args)]
64pub struct DiffArgs {
65 pub file_a: PathBuf,
67
68 pub file_b: PathBuf,
70
71 #[arg(long, short = 'g', default_value = "function")]
73 pub granularity: DiffGranularity,
74
75 #[arg(long)]
77 pub semantic_only: bool,
78
79 #[arg(long, short = 'O')]
81 pub output: Option<PathBuf>,
82}
83
84#[derive(Debug, Clone)]
90struct ExtractedNode {
91 name: String,
93 kind: NodeKind,
95 line: u32,
97 end_line: u32,
99 column: u32,
101 body: String,
103 normalized_body: String,
105 params: String,
107 is_method: bool,
109}
110
111impl ExtractedNode {
112 fn new(
113 name: impl Into<String>,
114 kind: NodeKind,
115 line: u32,
116 end_line: u32,
117 column: u32,
118 body: impl Into<String>,
119 ) -> Self {
120 let body_str: String = body.into();
121 let normalized = normalize_body(&body_str);
122 Self {
123 name: name.into(),
124 kind,
125 line,
126 end_line,
127 column,
128 body: body_str,
129 normalized_body: normalized,
130 params: String::new(),
131 is_method: false,
132 }
133 }
134
135 fn with_params(mut self, params: impl Into<String>) -> Self {
136 self.params = params.into();
137 self
138 }
139
140 fn with_method_kind(mut self) -> Self {
141 self.is_method = true;
142 if self.kind == NodeKind::Function {
143 self.kind = NodeKind::Method;
144 }
145 self
146 }
147}
148
149fn normalize_body(body: &str) -> String {
153 body.lines()
154 .skip(1) .map(|line| {
156 let stripped = if let Some(pos) = line.find('#') {
158 let before_hash = &line[..pos];
161 let single_quotes = before_hash.matches('\'').count();
162 let double_quotes = before_hash.matches('"').count();
163 if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165 &line[..pos]
166 } else {
167 line
168 }
169 } else {
170 line
171 };
172 stripped.trim()
173 })
174 .filter(|line| !line.is_empty())
175 .collect::<Vec<_>>()
176 .join("\n")
177}
178
179impl DiffArgs {
184 pub fn run_to_report(&self) -> Result<DiffReport> {
190 if !self.file_a.exists() {
192 return Err(RemainingError::file_not_found(&self.file_a).into());
193 }
194 if !self.file_b.exists() {
195 return Err(RemainingError::file_not_found(&self.file_b).into());
196 }
197
198 match self.granularity {
199 DiffGranularity::File => {
200 if !self.file_a.is_dir() || !self.file_b.is_dir() {
202 bail!("File-level (L6) diff requires directories, not individual files");
203 }
204 run_file_level_diff(&self.file_a, &self.file_b)
205 }
206 DiffGranularity::Module => {
207 if !self.file_a.is_dir() || !self.file_b.is_dir() {
209 bail!("Module-level (L7) diff requires directories, not individual files");
210 }
211 run_module_level_diff(&self.file_a, &self.file_b)
212 }
213 DiffGranularity::Architecture => {
214 if !self.file_a.is_dir() || !self.file_b.is_dir() {
216 bail!(
217 "Architecture-level (L8) diff requires directories, not individual files"
218 );
219 }
220 run_arch_level_diff(&self.file_a, &self.file_b)
221 }
222 DiffGranularity::Class => {
223 if self.file_a.is_dir() && self.file_b.is_dir() {
225 run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226 } else {
227 run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228 }
229 }
230 DiffGranularity::Statement => {
231 self.run_statement_level_diff()
233 }
234 DiffGranularity::Token => {
235 self.run_token_level_diff()
237 }
238 DiffGranularity::Expression => {
239 self.run_expression_level_diff()
241 }
242 _ => {
243 self.run_function_level_diff()
245 }
246 }
247 }
248
249 pub fn run(&self, format: OutputFormat) -> Result<()> {
251 let report = self.run_to_report()?;
252
253 match format {
255 OutputFormat::Json => {
256 let json = serde_json::to_string_pretty(&report)?;
257 if let Some(ref output_path) = self.output {
258 fs::write(output_path, &json)?;
259 } else {
260 println!("{}", json);
261 }
262 }
263 OutputFormat::Text => {
264 let text = format_diff_text(&report);
265 if let Some(ref output_path) = self.output {
266 fs::write(output_path, &text)?;
267 } else {
268 println!("{}", text);
269 }
270 }
271 OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272 let json = serde_json::to_string_pretty(&report)?;
274 println!("{}", json);
275 }
276 }
277
278 Ok(())
279 }
280
281 fn run_function_level_diff(&self) -> Result<DiffReport> {
283 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285 let ext = self
286 .file_a
287 .extension()
288 .map(|e| e.to_string_lossy().to_string())
289 .unwrap_or_else(|| "unknown".to_string());
290 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291 })?;
292
293 let source_a = fs::read_to_string(&self.file_a)?;
295 let source_b = fs::read_to_string(&self.file_b)?;
296
297 let pool = ParserPool::new();
299 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301 })?;
302 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304 })?;
305
306 let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308 let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310 let changes = detect_changes(
312 &nodes_a,
313 &nodes_b,
314 &self.file_a,
315 &self.file_b,
316 self.semantic_only,
317 );
318
319 let mut summary = DiffSummary::default();
321 for change in &changes {
322 summary.total_changes += 1;
323 if change.change_type != ChangeType::Format {
324 summary.semantic_changes += 1;
325 }
326 match change.change_type {
327 ChangeType::Insert => summary.inserts += 1,
328 ChangeType::Delete => summary.deletes += 1,
329 ChangeType::Update => summary.updates += 1,
330 ChangeType::Move => summary.moves += 1,
331 ChangeType::Rename => summary.renames += 1,
332 ChangeType::Format => summary.formats += 1,
333 ChangeType::Extract => summary.extracts += 1,
334 ChangeType::Inline => {}
335 }
336 }
337
338 let report = DiffReport {
340 file_a: self.file_a.display().to_string(),
341 file_b: self.file_b.display().to_string(),
342 identical: changes.is_empty(),
343 changes,
344 summary: Some(summary),
345 granularity: self.granularity,
346 file_changes: None,
347 module_changes: None,
348 import_graph_summary: None,
349 arch_changes: None,
350 arch_summary: None,
351 };
352
353 Ok(report)
354 }
355
356 fn run_token_level_diff(&self) -> Result<DiffReport> {
365 use super::difftastic;
366 use typed_arena::Arena;
367
368 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370 let ext = self
371 .file_a
372 .extension()
373 .map(|e| e.to_string_lossy().to_string())
374 .unwrap_or_else(|| "unknown".to_string());
375 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376 })?;
377
378 let lhs_src = fs::read_to_string(&self.file_a)?;
380 let rhs_src = fs::read_to_string(&self.file_b)?;
381
382 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385 let pool = ParserPool::new();
387 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389 })?;
390 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392 })?;
393
394 let arena = Arena::new();
396 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398 );
399
400 let mut change_map = difftastic::changes::ChangeMap::default();
402
403 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406 for (lhs_chunk, rhs_chunk) in &chunks {
408 match (lhs_chunk.first(), rhs_chunk.first()) {
409 (Some(lhs_first), Some(rhs_first)) => {
410 if difftastic::dijkstra::mark_syntax(
411 Some(*lhs_first),
412 Some(*rhs_first),
413 &mut change_map,
414 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415 )
416 .is_err()
417 {
418 for node in lhs_chunk {
420 difftastic::changes::insert_deep_novel(node, &mut change_map);
421 }
422 for node in rhs_chunk {
423 difftastic::changes::insert_deep_novel(node, &mut change_map);
424 }
425 }
426 }
427 (Some(_), None) => {
428 for node in lhs_chunk {
430 difftastic::changes::insert_deep_novel(node, &mut change_map);
431 }
432 }
433 (None, Some(_)) => {
434 for node in rhs_chunk {
436 difftastic::changes::insert_deep_novel(node, &mut change_map);
437 }
438 }
439 (None, None) => {
440 }
442 }
443 }
444
445 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449 let fa = self.file_a.display().to_string();
451 let fb = self.file_b.display().to_string();
452 Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453 &lhs_nodes,
454 &rhs_nodes,
455 &change_map,
456 &fa,
457 &fb,
458 ))
459 }
460
461 fn run_expression_level_diff(&self) -> Result<DiffReport> {
467 use super::difftastic;
468 use typed_arena::Arena;
469
470 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472 let ext = self
473 .file_a
474 .extension()
475 .map(|e| e.to_string_lossy().to_string())
476 .unwrap_or_else(|| "unknown".to_string());
477 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478 })?;
479
480 let lhs_src = fs::read_to_string(&self.file_a)?;
482 let rhs_src = fs::read_to_string(&self.file_b)?;
483
484 let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487 let pool = ParserPool::new();
489 let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490 RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491 })?;
492 let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493 RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494 })?;
495
496 let arena = Arena::new();
498 let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499 &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500 );
501
502 let mut change_map = difftastic::changes::ChangeMap::default();
504
505 let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508 for (lhs_chunk, rhs_chunk) in &chunks {
510 match (lhs_chunk.first(), rhs_chunk.first()) {
511 (Some(lhs_first), Some(rhs_first)) => {
512 if difftastic::dijkstra::mark_syntax(
513 Some(*lhs_first),
514 Some(*rhs_first),
515 &mut change_map,
516 difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517 )
518 .is_err()
519 {
520 for node in lhs_chunk {
521 difftastic::changes::insert_deep_novel(node, &mut change_map);
522 }
523 for node in rhs_chunk {
524 difftastic::changes::insert_deep_novel(node, &mut change_map);
525 }
526 }
527 }
528 (Some(_), None) => {
529 for node in lhs_chunk {
530 difftastic::changes::insert_deep_novel(node, &mut change_map);
531 }
532 }
533 (None, Some(_)) => {
534 for node in rhs_chunk {
535 difftastic::changes::insert_deep_novel(node, &mut change_map);
536 }
537 }
538 (None, None) => {}
539 }
540 }
541
542 difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544 difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546 let fa = self.file_a.display().to_string();
548 let fb = self.file_b.display().to_string();
549 Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550 &lhs_nodes,
551 &rhs_nodes,
552 &change_map,
553 &fa,
554 &fb,
555 ))
556 }
557}
558
559fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565 node.utf8_text(source).unwrap_or("")
566}
567
568fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570 match language {
571 Language::Python => &["class_definition"],
572 Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573 Language::Go => &["type_declaration"],
574 Language::Rust => &["struct_item", "enum_item", "impl_item"],
575 Language::Java => &[
576 "class_declaration",
577 "interface_declaration",
578 "enum_declaration",
579 ],
580 Language::C => &["struct_specifier", "enum_specifier"],
581 Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582 Language::Ruby => &["class", "module"],
583 Language::Php => &["class_declaration", "interface_declaration"],
584 Language::CSharp => &[
585 "class_declaration",
586 "interface_declaration",
587 "struct_declaration",
588 ],
589 Language::Kotlin => &["class_declaration", "object_declaration"],
590 Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591 Language::Swift => &[
592 "class_declaration",
593 "struct_declaration",
594 "protocol_declaration",
595 ],
596 Language::Elixir => &["call"], Language::Lua | Language::Luau => &[], Language::Ocaml => &["module_definition", "type_definition"],
599 }
600}
601
602fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604 match language {
605 Language::Python => &["block"],
606 Language::TypeScript | Language::JavaScript => &["class_body"],
607 Language::Go => &[], Language::Rust => &["declaration_list"], Language::Java => &["class_body"],
610 Language::C | Language::Cpp => &["field_declaration_list"],
611 Language::Ruby => &["body_statement"],
612 Language::Php => &["declaration_list"],
613 Language::CSharp => &["declaration_list"],
614 Language::Kotlin => &["class_body"],
615 Language::Scala => &["template_body"],
616 Language::Swift => &["class_body"],
617 Language::Elixir => &["do_block"],
618 Language::Lua | Language::Luau => &[],
619 Language::Ocaml => &[],
620 }
621}
622
623fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629 let mut nodes = Vec::new();
630 let kinds = NodeKindSets {
631 func: get_function_node_kinds(lang),
632 class: get_class_node_kinds(lang),
633 body: get_class_body_kinds(lang),
634 };
635 extract_nodes_recursive(root, source, &mut nodes, false, lang, &kinds);
636 nodes
637}
638
639struct NodeKindSets<'a> {
640 func: &'a [&'a str],
641 class: &'a [&'a str],
642 body: &'a [&'a str],
643}
644
645fn extract_nodes_recursive(
646 node: Node,
647 source: &[u8],
648 nodes: &mut Vec<ExtractedNode>,
649 in_class: bool,
650 lang: Language,
651 kinds: &NodeKindSets<'_>,
652) {
653 let kind = node.kind();
654
655 if lang == Language::Ocaml && kind == "value_definition" {
665 for child in node.children(&mut node.walk()) {
666 if child.kind() == "let_binding" && ocaml_let_binding_is_function(child) {
667 if let Some(extracted) = extract_function_node(child, source, in_class, lang) {
668 if extracted.name != "_" && extracted.name != "()" && !extracted.name.is_empty()
670 {
671 nodes.push(extracted);
672 }
673 }
674 }
675 }
676 return;
680 }
681 if lang == Language::Ocaml && kind == "let_binding" {
682 for child in node.children(&mut node.walk()) {
687 extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
688 }
689 return;
690 }
691
692 if kinds.func.contains(&kind) {
694 if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
695 nodes.push(extracted);
696 }
697 }
698 else if kinds.class.contains(&kind) {
700 if let Some(extracted) = extract_class_node(node, source, lang) {
701 nodes.push(extracted);
702 }
703 for child in node.children(&mut node.walk()) {
705 if kinds.body.contains(&child.kind()) {
706 extract_nodes_recursive(child, source, nodes, true, lang, kinds);
707 }
708 }
709 return; }
711
712 for child in node.children(&mut node.walk()) {
714 extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
715 }
716}
717
718fn ocaml_let_binding_is_function(node: Node) -> bool {
723 for child in node.children(&mut node.walk()) {
724 if child.kind() == "parameter" {
725 return true;
726 }
727 }
728 false
729}
730
731fn extract_function_node(
732 node: Node,
733 source: &[u8],
734 is_method: bool,
735 lang: Language,
736) -> Option<ExtractedNode> {
737 let source_str = std::str::from_utf8(source).unwrap_or("");
739 let func_name = get_function_name(node, lang, source_str)?;
740
741 let params = node
743 .child_by_field_name("parameters")
744 .or_else(|| node.child_by_field_name("formal_parameters"))
745 .map(|p| node_text(p, source).to_string())
746 .unwrap_or_default();
747
748 let line = node.start_position().row as u32 + 1;
749 let end_line = node.end_position().row as u32 + 1;
750 let column = node.start_position().column as u32;
751 let body = node_text(node, source).to_string();
752
753 let mut extracted =
754 ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
755 .with_params(params);
756
757 if is_method {
758 extracted = extracted.with_method_kind();
759 }
760
761 Some(extracted)
762}
763
764fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
765 let class_name = node
767 .child_by_field_name("name")
768 .map(|n| node_text(n, source).to_string())
769 .or_else(|| {
770 let mut cursor = node.walk();
772 for child in node.children(&mut cursor) {
773 if child.kind() == "identifier"
774 || child.kind() == "type_identifier"
775 || child.kind() == "constant"
776 {
777 return Some(node_text(child, source).to_string());
778 }
779 }
780 None
781 })?;
782
783 if class_name.is_empty() {
785 return None;
786 }
787
788 if lang == Language::Elixir && node.kind() == "call" {
790 let first_child = node.child(0)?;
791 let first_text = node_text(first_child, source);
792 if first_text != "defmodule" {
793 return None;
794 }
795 if let Some(args) = node.child(1) {
797 let name = node_text(args, source).to_string();
798 if !name.is_empty() {
799 let line = node.start_position().row as u32 + 1;
800 let end_line = node.end_position().row as u32 + 1;
801 let column = node.start_position().column as u32;
802 let body = node_text(node, source).to_string();
803 return Some(ExtractedNode::new(
804 name,
805 NodeKind::Class,
806 line,
807 end_line,
808 column,
809 body,
810 ));
811 }
812 }
813 return None;
814 }
815
816 let line = node.start_position().row as u32 + 1;
817 let end_line = node.end_position().row as u32 + 1;
818 let column = node.start_position().column as u32;
819 let body = node_text(node, source).to_string();
820
821 Some(ExtractedNode::new(
822 class_name,
823 NodeKind::Class,
824 line,
825 end_line,
826 column,
827 body,
828 ))
829}
830
831fn detect_changes(
837 nodes_a: &[ExtractedNode],
838 nodes_b: &[ExtractedNode],
839 file_a: &Path,
840 file_b: &Path,
841 semantic_only: bool,
842) -> Vec<ASTChange> {
843 let mut changes = Vec::new();
844
845 let _map_a: HashMap<&str, &ExtractedNode> =
847 nodes_a.iter().map(|n| (n.name.as_str(), n)).collect();
848 let map_b: HashMap<&str, &ExtractedNode> =
849 nodes_b.iter().map(|n| (n.name.as_str(), n)).collect();
850
851 let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
853 let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
854
855 for (i, node_a) in nodes_a.iter().enumerate() {
857 let _ = node_a.end_line;
858 if let Some(&node_b) = map_b.get(node_a.name.as_str()) {
859 matched_a[i] = true;
861 if let Some(j) = nodes_b.iter().position(|n| n.name == node_a.name) {
862 matched_b[j] = true;
863 }
864
865 if node_a.normalized_body != node_b.normalized_body {
867 changes.push(ASTChange {
869 change_type: ChangeType::Update,
870 node_kind: node_a.kind,
871 name: Some(node_a.name.clone()),
872 old_location: Some(Location::with_column(
873 file_a.display().to_string(),
874 node_a.line,
875 node_a.column,
876 )),
877 new_location: Some(Location::with_column(
878 file_b.display().to_string(),
879 node_b.line,
880 node_b.column,
881 )),
882 old_text: Some(node_a.body.clone()),
883 new_text: Some(node_b.body.clone()),
884 similarity: Some(compute_similarity(
885 &node_a.normalized_body,
886 &node_b.normalized_body,
887 )),
888 children: None,
889 base_changes: None,
890 });
891 } else if node_a.line != node_b.line && !semantic_only {
892 changes.push(ASTChange {
894 change_type: ChangeType::Move,
895 node_kind: node_a.kind,
896 name: Some(node_a.name.clone()),
897 old_location: Some(Location::with_column(
898 file_a.display().to_string(),
899 node_a.line,
900 node_a.column,
901 )),
902 new_location: Some(Location::with_column(
903 file_b.display().to_string(),
904 node_b.line,
905 node_b.column,
906 )),
907 old_text: None,
908 new_text: None,
909 similarity: Some(1.0),
910 children: None,
911 base_changes: None,
912 });
913 }
914 }
915 }
916
917 let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
919 .iter()
920 .enumerate()
921 .filter(|(i, _)| !matched_a[*i])
922 .collect();
923 let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
924 .iter()
925 .enumerate()
926 .filter(|(i, _)| !matched_b[*i])
927 .collect();
928
929 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
931
932 for (_, node_a) in &unmatched_a {
933 let mut best_match: Option<(usize, f64)> = None;
934
935 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
936 if used_b[j] {
937 continue;
938 }
939 if node_a.kind != node_b.kind {
940 continue;
941 }
942
943 let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
944 if similarity >= RENAME_SIMILARITY_THRESHOLD
945 && (best_match.is_none() || similarity > best_match.unwrap().1)
946 {
947 best_match = Some((j, similarity));
948 }
949 }
950
951 if let Some((j, similarity)) = best_match {
952 let (_, node_b) = unmatched_b[j];
953 used_b[j] = true;
954
955 changes.push(ASTChange {
957 change_type: ChangeType::Rename,
958 node_kind: node_a.kind,
959 name: Some(node_a.name.clone()),
960 old_location: Some(Location::with_column(
961 file_a.display().to_string(),
962 node_a.line,
963 node_a.column,
964 )),
965 new_location: Some(Location::with_column(
966 file_b.display().to_string(),
967 node_b.line,
968 node_b.column,
969 )),
970 old_text: Some(node_a.name.clone()),
971 new_text: Some(node_b.name.clone()),
972 similarity: Some(similarity),
973 children: None,
974 base_changes: None,
975 });
976 }
977 }
978
979 for (_, node_a) in &unmatched_a {
981 let is_renamed = changes
983 .iter()
984 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
985 if !is_renamed {
986 changes.push(ASTChange {
987 change_type: ChangeType::Delete,
988 node_kind: node_a.kind,
989 name: Some(node_a.name.clone()),
990 old_location: Some(Location::with_column(
991 file_a.display().to_string(),
992 node_a.line,
993 node_a.column,
994 )),
995 new_location: None,
996 old_text: None,
997 new_text: None,
998 similarity: None,
999 children: None,
1000 base_changes: None,
1001 });
1002 }
1003 }
1004
1005 for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1007 if !used_b[j] {
1008 changes.push(ASTChange {
1009 change_type: ChangeType::Insert,
1010 node_kind: node_b.kind,
1011 name: Some(node_b.name.clone()),
1012 old_location: None,
1013 new_location: Some(Location::with_column(
1014 file_b.display().to_string(),
1015 node_b.line,
1016 node_b.column,
1017 )),
1018 old_text: None,
1019 new_text: None,
1020 similarity: None,
1021 children: None,
1022 base_changes: None,
1023 });
1024 }
1025 }
1026
1027 changes.sort_by_key(|c| match c.change_type {
1029 ChangeType::Delete => 0,
1030 ChangeType::Rename => 1,
1031 ChangeType::Update => 2,
1032 ChangeType::Move => 3,
1033 ChangeType::Insert => 4,
1034 _ => 5,
1035 });
1036
1037 changes
1038}
1039
1040fn compute_similarity(a: &str, b: &str) -> f64 {
1047 if a == b {
1048 return 1.0;
1049 }
1050 if a.is_empty() || b.is_empty() {
1051 return 0.0;
1052 }
1053
1054 let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1056 let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1057
1058 let intersection = lines_a.intersection(&lines_b).count();
1059 let union = lines_a.union(&lines_b).count();
1060
1061 let line_sim = if union == 0 {
1062 0.0
1063 } else {
1064 intersection as f64 / union as f64
1065 };
1066
1067 if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1070 return char_jaccard_similarity(a, b);
1071 }
1072
1073 line_sim
1074}
1075
1076fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1078 if a.len() < 2 || b.len() < 2 {
1079 return if a == b { 1.0 } else { 0.0 };
1080 }
1081
1082 let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1083 let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1084
1085 let intersection = bigrams_a.intersection(&bigrams_b).count();
1086 let union = bigrams_a.union(&bigrams_b).count();
1087
1088 if union == 0 {
1089 0.0
1090 } else {
1091 intersection as f64 / union as f64
1092 }
1093}
1094
1095fn format_diff_text(report: &DiffReport) -> String {
1101 let mut out = String::new();
1102
1103 out.push_str("Diff Report\n");
1104 out.push_str("===========\n\n");
1105 out.push_str(&format!("File A: {}\n", report.file_a));
1106 out.push_str(&format!("File B: {}\n", report.file_b));
1107 out.push_str(&format!("Identical: {}\n\n", report.identical));
1108
1109 if report.identical {
1110 out.push_str("No structural changes detected.\n");
1111 return out;
1112 }
1113
1114 out.push_str("Changes:\n");
1115 out.push_str("--------\n");
1116
1117 for change in &report.changes {
1118 let change_type = match change.change_type {
1119 ChangeType::Insert => "+",
1120 ChangeType::Delete => "-",
1121 ChangeType::Update => "~",
1122 ChangeType::Move => ">",
1123 ChangeType::Rename => "R",
1124 ChangeType::Format => "F",
1125 ChangeType::Extract => "E",
1126 ChangeType::Inline => "I",
1127 };
1128
1129 let kind = match change.node_kind {
1130 NodeKind::Function => "function",
1131 NodeKind::Class => "class",
1132 NodeKind::Method => "method",
1133 NodeKind::Field => "field",
1134 NodeKind::Statement => "statement",
1135 NodeKind::Expression => "expression",
1136 NodeKind::Block => "block",
1137 };
1138
1139 let name = change.name.as_deref().unwrap_or("<unknown>");
1140
1141 match change.change_type {
1142 ChangeType::Insert => {
1143 if let Some(ref loc) = change.new_location {
1144 out.push_str(&format!(
1145 " {} {} {} at {}:{}\n",
1146 change_type, kind, name, loc.file, loc.line
1147 ));
1148 }
1149 }
1150 ChangeType::Delete => {
1151 if let Some(ref loc) = change.old_location {
1152 out.push_str(&format!(
1153 " {} {} {} at {}:{}\n",
1154 change_type, kind, name, loc.file, loc.line
1155 ));
1156 }
1157 }
1158 ChangeType::Update | ChangeType::Move => {
1159 if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1160 {
1161 out.push_str(&format!(
1162 " {} {} {} from {}:{} to {}:{}\n",
1163 change_type, kind, name, old.file, old.line, new.file, new.line
1164 ));
1165 }
1166 }
1167 ChangeType::Rename => {
1168 let old_name = change.old_text.as_deref().unwrap_or(name);
1169 let new_name = change.new_text.as_deref().unwrap_or(name);
1170 out.push_str(&format!(
1171 " {} {} {} -> {}\n",
1172 change_type, kind, old_name, new_name
1173 ));
1174 }
1175 _ => {
1176 out.push_str(&format!(" {} {} {}\n", change_type, kind, name));
1177 }
1178 }
1179 }
1180
1181 if let Some(ref summary) = report.summary {
1182 out.push_str("\nSummary:\n");
1183 out.push_str("--------\n");
1184 out.push_str(&format!(" Total changes: {}\n", summary.total_changes));
1185 out.push_str(&format!(
1186 " Semantic changes: {}\n",
1187 summary.semantic_changes
1188 ));
1189 out.push_str(&format!(" Inserts: {}\n", summary.inserts));
1190 out.push_str(&format!(" Deletes: {}\n", summary.deletes));
1191 out.push_str(&format!(" Updates: {}\n", summary.updates));
1192 out.push_str(&format!(" Renames: {}\n", summary.renames));
1193 out.push_str(&format!(" Moves: {}\n", summary.moves));
1194 }
1195
1196 if let Some(ref file_changes) = report.file_changes {
1198 out.push_str("\nFile-Level Changes:\n");
1199 out.push_str("-------------------\n");
1200 for fc in file_changes {
1201 let change_type = match fc.change_type {
1202 ChangeType::Insert => "+",
1203 ChangeType::Delete => "-",
1204 ChangeType::Update => "~",
1205 _ => "?",
1206 };
1207 out.push_str(&format!(" {} {}\n", change_type, fc.relative_path));
1208 if let Some(ref sigs) = fc.signature_changes {
1209 for sig in sigs {
1210 out.push_str(&format!(" changed: {}\n", sig));
1211 }
1212 }
1213 }
1214 }
1215
1216 if let Some(ref module_changes) = report.module_changes {
1218 out.push_str("\nModule-Level Changes:\n");
1219 out.push_str("---------------------\n");
1220 for mc in module_changes {
1221 let change_type = match mc.change_type {
1222 ChangeType::Insert => "+",
1223 ChangeType::Delete => "-",
1224 ChangeType::Update => "~",
1225 _ => "?",
1226 };
1227 out.push_str(&format!(" {} {}\n", change_type, mc.module_path));
1228 for edge in &mc.imports_added {
1229 let names = if edge.imported_names.is_empty() {
1230 String::new()
1231 } else {
1232 format!(" ({})", edge.imported_names.join(", "))
1233 };
1234 out.push_str(&format!(" + import {}{}\n", edge.target_module, names));
1235 }
1236 for edge in &mc.imports_removed {
1237 let names = if edge.imported_names.is_empty() {
1238 String::new()
1239 } else {
1240 format!(" ({})", edge.imported_names.join(", "))
1241 };
1242 out.push_str(&format!(" - import {}{}\n", edge.target_module, names));
1243 }
1244 }
1245 }
1246
1247 if let Some(ref igs) = report.import_graph_summary {
1249 out.push_str("\nImport Graph Summary:\n");
1250 out.push_str("---------------------\n");
1251 out.push_str(&format!(" Edges in A: {}\n", igs.total_edges_a));
1252 out.push_str(&format!(" Edges in B: {}\n", igs.total_edges_b));
1253 out.push_str(&format!(" Edges added: {}\n", igs.edges_added));
1254 out.push_str(&format!(" Edges removed: {}\n", igs.edges_removed));
1255 out.push_str(&format!(
1256 " Modules with import changes: {}\n",
1257 igs.modules_with_import_changes
1258 ));
1259 }
1260
1261 if let Some(ref arch_changes) = report.arch_changes {
1263 out.push_str("\nArchitecture-Level Changes:\n");
1264 out.push_str("---------------------------\n");
1265 for ac in arch_changes {
1266 let change_label = match ac.change_type {
1267 ArchChangeType::LayerMigration => "migration",
1268 ArchChangeType::Added => "added",
1269 ArchChangeType::Removed => "removed",
1270 ArchChangeType::CompositionChanged => "composition changed",
1271 ArchChangeType::CycleIntroduced => "cycle introduced",
1272 ArchChangeType::CycleResolved => "cycle resolved",
1273 };
1274 out.push_str(&format!(" [{}] {}\n", change_label, ac.directory));
1275 if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1276 out.push_str(&format!(" {} -> {}\n", old, new));
1277 } else if let Some(ref new) = ac.new_layer {
1278 out.push_str(&format!(" -> {}\n", new));
1279 } else if let Some(ref old) = ac.old_layer {
1280 out.push_str(&format!(" {} ->\n", old));
1281 }
1282 if !ac.migrated_functions.is_empty() {
1283 out.push_str(&format!(
1284 " migrated: {}\n",
1285 ac.migrated_functions.join(", ")
1286 ));
1287 }
1288 }
1289 }
1290
1291 if let Some(ref arch_summary) = report.arch_summary {
1293 out.push_str("\nArchitecture Summary:\n");
1294 out.push_str("---------------------\n");
1295 out.push_str(&format!(
1296 " Layer migrations: {}\n",
1297 arch_summary.layer_migrations
1298 ));
1299 out.push_str(&format!(
1300 " Directories added: {}\n",
1301 arch_summary.directories_added
1302 ));
1303 out.push_str(&format!(
1304 " Directories removed: {}\n",
1305 arch_summary.directories_removed
1306 ));
1307 out.push_str(&format!(
1308 " Cycles introduced: {}\n",
1309 arch_summary.cycles_introduced
1310 ));
1311 out.push_str(&format!(
1312 " Cycles resolved: {}\n",
1313 arch_summary.cycles_resolved
1314 ));
1315 out.push_str(&format!(
1316 " Stability score: {}\n",
1317 arch_summary.stability_score
1318 ));
1319 }
1320
1321 out
1322}
1323
1324fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1330 match lang {
1331 Language::Python => &[
1332 "return_statement",
1333 "if_statement",
1334 "for_statement",
1335 "while_statement",
1336 "expression_statement",
1337 "assert_statement",
1338 "raise_statement",
1339 "try_statement",
1340 "with_statement",
1341 "assignment",
1342 "augmented_assignment",
1343 "delete_statement",
1344 "pass_statement",
1345 "break_statement",
1346 "continue_statement",
1347 ],
1348 Language::TypeScript | Language::JavaScript => &[
1349 "return_statement",
1350 "if_statement",
1351 "for_statement",
1352 "for_in_statement",
1353 "while_statement",
1354 "do_statement",
1355 "expression_statement",
1356 "variable_declaration",
1357 "lexical_declaration",
1358 "throw_statement",
1359 "try_statement",
1360 "switch_statement",
1361 "break_statement",
1362 "continue_statement",
1363 ],
1364 Language::Go => &[
1365 "return_statement",
1366 "if_statement",
1367 "for_statement",
1368 "expression_statement",
1369 "short_var_declaration",
1370 "var_declaration",
1371 "assignment_statement",
1372 "go_statement",
1373 "defer_statement",
1374 "select_statement",
1375 "switch_statement",
1376 ],
1377 Language::Rust => &[
1378 "let_declaration",
1379 "expression_statement",
1380 "return_expression",
1381 "if_expression",
1382 "for_expression",
1383 "while_expression",
1384 "loop_expression",
1385 "match_expression",
1386 ],
1387 Language::Java => &[
1388 "return_statement",
1389 "if_statement",
1390 "for_statement",
1391 "enhanced_for_statement",
1392 "while_statement",
1393 "do_statement",
1394 "expression_statement",
1395 "local_variable_declaration",
1396 "throw_statement",
1397 "try_statement",
1398 "switch_expression",
1399 ],
1400 Language::C | Language::Cpp => &[
1401 "return_statement",
1402 "if_statement",
1403 "for_statement",
1404 "while_statement",
1405 "do_statement",
1406 "expression_statement",
1407 "declaration",
1408 "switch_statement",
1409 ],
1410 Language::Ruby => &[
1411 "return",
1412 "if",
1413 "unless",
1414 "for",
1415 "while",
1416 "until",
1417 "assignment",
1418 "call",
1419 "begin",
1420 ],
1421 Language::Php => &[
1422 "return_statement",
1423 "if_statement",
1424 "for_statement",
1425 "foreach_statement",
1426 "while_statement",
1427 "expression_statement",
1428 "echo_statement",
1429 "throw_expression",
1430 "try_statement",
1431 ],
1432 Language::CSharp => &[
1433 "return_statement",
1434 "if_statement",
1435 "for_statement",
1436 "foreach_statement",
1437 "while_statement",
1438 "expression_statement",
1439 "local_declaration_statement",
1440 "throw_statement",
1441 "try_statement",
1442 ],
1443 Language::Kotlin => &[
1444 "property_declaration",
1445 "assignment",
1446 "if_expression",
1447 "for_statement",
1448 "while_statement",
1449 "do_while_statement",
1450 "return_expression",
1451 "throw_expression",
1452 "try_expression",
1453 ],
1454 Language::Scala => &[
1455 "val_definition",
1456 "var_definition",
1457 "if_expression",
1458 "for_expression",
1459 "while_expression",
1460 "return_expression",
1461 "throw_expression",
1462 "try_expression",
1463 "call_expression",
1464 ],
1465 Language::Swift => &[
1466 "value_binding_pattern",
1467 "if_statement",
1468 "for_in_statement",
1469 "while_statement",
1470 "return_statement",
1471 "throw_statement",
1472 "guard_statement",
1473 "switch_statement",
1474 ],
1475 Language::Elixir => &["call", "if", "case", "cond"],
1476 Language::Lua | Language::Luau => &[
1477 "return_statement",
1478 "if_statement",
1479 "for_statement",
1480 "while_statement",
1481 "variable_declaration",
1482 "assignment_statement",
1483 "function_call",
1484 ],
1485 Language::Ocaml => &[
1486 "let_binding",
1487 "if_expression",
1488 "match_expression",
1489 "application",
1490 ],
1491 }
1492}
1493
1494#[derive(Debug, Clone)]
1496struct LabeledTreeNode {
1497 label: String,
1499 children: Vec<LabeledTreeNode>,
1501 line: u32,
1503}
1504
1505#[derive(Debug, Clone)]
1507struct PostorderNode {
1508 label: String,
1509 line: u32,
1510 leftmost_leaf: usize,
1512}
1513
1514#[derive(Debug, Clone)]
1516enum EditOp {
1517 Delete { index_a: usize },
1519 Insert { index_b: usize },
1521 Relabel { index_a: usize, index_b: usize },
1523}
1524
1525fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1531 let label = build_node_label(node, source);
1532 let line = node.start_position().row as u32 + 1;
1533
1534 let mut children = Vec::new();
1535 let mut cursor = node.walk();
1536 for child in node.children(&mut cursor) {
1537 if statement_kinds.contains(&child.kind()) {
1538 children.push(build_labeled_tree(child, source, statement_kinds));
1540 } else {
1541 let nested = collect_nested_statements(child, source, statement_kinds);
1543 children.extend(nested);
1544 }
1545 }
1546
1547 LabeledTreeNode {
1548 label,
1549 children,
1550 line,
1551 }
1552}
1553
1554fn collect_nested_statements(
1556 node: Node,
1557 source: &[u8],
1558 statement_kinds: &[&str],
1559) -> Vec<LabeledTreeNode> {
1560 let mut result = Vec::new();
1561 let mut cursor = node.walk();
1562 for child in node.children(&mut cursor) {
1563 if statement_kinds.contains(&child.kind()) {
1564 result.push(build_labeled_tree(child, source, statement_kinds));
1565 } else {
1566 result.extend(collect_nested_statements(child, source, statement_kinds));
1567 }
1568 }
1569 result
1570}
1571
1572fn build_node_label(node: Node, source: &[u8]) -> String {
1577 let kind = node.kind();
1578 let text = node.utf8_text(source).unwrap_or("");
1579
1580 let first_line = text.lines().next().unwrap_or("").trim();
1583
1584 let significant = if first_line.len() > 120 {
1586 &first_line[..120]
1587 } else {
1588 first_line
1589 };
1590
1591 format!("{}:{}", kind, significant)
1592}
1593
1594fn extract_statement_tree(
1599 func_node: Node,
1600 source: &[u8],
1601 lang: Language,
1602 statement_kinds: &[&str],
1603) -> LabeledTreeNode {
1604 let body_node = find_function_body(func_node, lang);
1606
1607 match body_node {
1608 Some(body) => {
1609 let mut children = Vec::new();
1611 let mut cursor = body.walk();
1612 for child in body.children(&mut cursor) {
1613 if statement_kinds.contains(&child.kind()) {
1614 children.push(build_labeled_tree(child, source, statement_kinds));
1615 } else {
1616 children.extend(collect_nested_statements(child, source, statement_kinds));
1617 }
1618 }
1619
1620 LabeledTreeNode {
1621 label: format!("body:{}", func_node.kind()),
1622 children,
1623 line: body.start_position().row as u32 + 1,
1624 }
1625 }
1626 None => {
1627 build_labeled_tree(func_node, source, statement_kinds)
1629 }
1630 }
1631}
1632
1633fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1635 if let Some(body) = func_node.child_by_field_name("body") {
1637 return Some(body);
1638 }
1639 if let Some(body) = func_node.child_by_field_name("block") {
1640 return Some(body);
1641 }
1642
1643 let body_kinds = match lang {
1645 Language::Python => &["block"][..],
1646 Language::TypeScript | Language::JavaScript => &["statement_block"],
1647 Language::Go => &["block"],
1648 Language::Rust => &["block"],
1649 Language::Java => &["block"],
1650 Language::C | Language::Cpp => &["compound_statement"],
1651 Language::Ruby => &["body_statement"],
1652 Language::Php => &["compound_statement"],
1653 Language::CSharp => &["block"],
1654 Language::Kotlin => &["function_body"],
1655 Language::Scala => &["block", "indented_block"],
1656 Language::Swift => &["function_body"],
1657 Language::Elixir => &["do_block"],
1658 Language::Lua | Language::Luau => &["block"],
1659 Language::Ocaml => &["let_binding"],
1660 };
1661
1662 let mut cursor = func_node.walk();
1663 let found = func_node
1664 .children(&mut cursor)
1665 .find(|&child| body_kinds.contains(&child.kind()));
1666 found
1667}
1668
1669fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1671 1 + tree.children.iter().map(count_tree_nodes).sum::<usize>()
1672}
1673
1674fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1680 let mut nodes = Vec::new();
1681 flatten_postorder_recursive(tree, &mut nodes);
1682 nodes
1683}
1684
1685fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1686 if tree.children.is_empty() {
1687 let idx = nodes.len();
1689 nodes.push(PostorderNode {
1690 label: tree.label.clone(),
1691 line: tree.line,
1692 leftmost_leaf: idx,
1693 });
1694 return idx;
1695 }
1696
1697 let mut first_child_leftmost = usize::MAX;
1699 for (i, child) in tree.children.iter().enumerate() {
1700 let child_leftmost = flatten_postorder_recursive(child, nodes);
1701 if i == 0 {
1702 first_child_leftmost = child_leftmost;
1703 }
1704 }
1705
1706 nodes.push(PostorderNode {
1708 label: tree.label.clone(),
1709 line: tree.line,
1710 leftmost_leaf: first_child_leftmost,
1711 });
1712
1713 first_child_leftmost
1715}
1716
1717fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1723 let n = nodes.len();
1724 if n == 0 {
1725 return Vec::new();
1726 }
1727
1728 let mut lr_map: HashMap<usize, usize> = HashMap::new();
1730 for (i, node) in nodes.iter().enumerate() {
1731 lr_map.insert(node.leftmost_leaf, i);
1732 }
1733
1734 let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1735 keyroots.sort();
1736 keyroots
1737}
1738
1739fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1745 let na = nodes_a.len();
1746 let nb = nodes_b.len();
1747
1748 if na == 0 && nb == 0 {
1749 return Vec::new();
1750 }
1751 if na == 0 {
1752 return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1754 }
1755 if nb == 0 {
1756 return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1758 }
1759
1760 let keyroots_a = compute_keyroots(nodes_a);
1761 let keyroots_b = compute_keyroots(nodes_b);
1762
1763 let mut td = vec![vec![0usize; nb + 1]; na + 1];
1765 let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1767
1768 for &kr_a in &keyroots_a {
1769 for &kr_b in &keyroots_b {
1770 let la = nodes_a[kr_a].leftmost_leaf;
1771 let lb = nodes_b[kr_b].leftmost_leaf;
1772
1773 let rows = kr_a - la + 2;
1774 let cols = kr_b - lb + 2;
1775 let mut fd = vec![vec![0usize; cols]; rows];
1776
1777 for i in 1..rows {
1779 fd[i][0] = fd[i - 1][0] + 1;
1780 }
1781 for j in 1..cols {
1782 fd[0][j] = fd[0][j - 1] + 1;
1783 }
1784
1785 for i in 1..rows {
1786 for j in 1..cols {
1787 let idx_a = la + i - 1;
1788 let idx_b = lb + j - 1;
1789
1790 let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1791 0
1792 } else {
1793 1
1794 };
1795
1796 if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1797 let delete = fd[i - 1][j] + 1;
1798 let insert = fd[i][j - 1] + 1;
1799 let relabel = fd[i - 1][j - 1] + cost_relabel;
1800
1801 if relabel <= delete && relabel <= insert {
1802 fd[i][j] = relabel;
1803 td[idx_a + 1][idx_b + 1] = relabel;
1804 td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1805 } else if delete <= insert {
1806 fd[i][j] = delete;
1807 td[idx_a + 1][idx_b + 1] = delete;
1808 td_ops[idx_a + 1][idx_b + 1] = 1;
1809 } else {
1810 fd[i][j] = insert;
1811 td[idx_a + 1][idx_b + 1] = insert;
1812 td_ops[idx_a + 1][idx_b + 1] = 2;
1813 }
1814 } else {
1815 let p = nodes_a[idx_a].leftmost_leaf - la;
1816 let q = nodes_b[idx_b].leftmost_leaf - lb;
1817
1818 let delete = fd[i - 1][j] + 1;
1819 let insert = fd[i][j - 1] + 1;
1820 let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1821
1822 if tree_match <= delete && tree_match <= insert {
1823 fd[i][j] = tree_match;
1824 } else if delete <= insert {
1825 fd[i][j] = delete;
1826 } else {
1827 fd[i][j] = insert;
1828 }
1829 }
1830 }
1831 }
1832 }
1833 }
1834
1835 let mut ops = Vec::new();
1838 derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1839 ops
1840}
1841
1842fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1847 let na = nodes_a.len();
1848 let nb = nodes_b.len();
1849
1850 let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1851 let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1852
1853 for i in 1..=na {
1854 dp[i][0] = i;
1855 choice[i][0] = 1;
1856 }
1857 for j in 1..=nb {
1858 dp[0][j] = j;
1859 choice[0][j] = 2;
1860 }
1861
1862 for i in 1..=na {
1863 for j in 1..=nb {
1864 let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1865 0
1866 } else {
1867 1
1868 };
1869
1870 let del = dp[i - 1][j] + 1;
1871 let ins = dp[i][j - 1] + 1;
1872 let sub = dp[i - 1][j - 1] + cost;
1873
1874 if sub <= del && sub <= ins {
1875 dp[i][j] = sub;
1876 choice[i][j] = if cost == 0 { 0 } else { 3 };
1877 } else if del <= ins {
1878 dp[i][j] = del;
1879 choice[i][j] = 1;
1880 } else {
1881 dp[i][j] = ins;
1882 choice[i][j] = 2;
1883 }
1884 }
1885 }
1886
1887 let mut i = na;
1889 let mut j = nb;
1890 let mut rev_ops = Vec::new();
1891
1892 while i > 0 || j > 0 {
1893 if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1894 if choice[i][j] == 3 {
1895 rev_ops.push(EditOp::Relabel {
1896 index_a: i - 1,
1897 index_b: j - 1,
1898 });
1899 }
1900 i -= 1;
1901 j -= 1;
1902 } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1903 rev_ops.push(EditOp::Delete { index_a: i - 1 });
1904 i -= 1;
1905 } else if j > 0 {
1906 rev_ops.push(EditOp::Insert { index_b: j - 1 });
1907 j -= 1;
1908 }
1909 }
1910
1911 rev_ops.reverse();
1912 ops.extend(rev_ops);
1913}
1914
1915fn edit_ops_to_ast_changes(
1917 ops: &[EditOp],
1918 nodes_a: &[PostorderNode],
1919 nodes_b: &[PostorderNode],
1920 file_a: &Path,
1921 file_b: &Path,
1922) -> Vec<ASTChange> {
1923 let mut changes = Vec::new();
1924
1925 for op in ops {
1926 match op {
1927 EditOp::Delete { index_a } => {
1928 let node = &nodes_a[*index_a];
1929 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1930 changes.push(ASTChange {
1931 change_type: ChangeType::Delete,
1932 node_kind: NodeKind::Statement,
1933 name: Some(stmt_kind.to_string()),
1934 old_location: Some(Location::new(file_a.display().to_string(), node.line)),
1935 new_location: None,
1936 old_text: Some(node.label.clone()),
1937 new_text: None,
1938 similarity: None,
1939 children: None,
1940 base_changes: None,
1941 });
1942 }
1943 EditOp::Insert { index_b } => {
1944 let node = &nodes_b[*index_b];
1945 let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1946 changes.push(ASTChange {
1947 change_type: ChangeType::Insert,
1948 node_kind: NodeKind::Statement,
1949 name: Some(stmt_kind.to_string()),
1950 old_location: None,
1951 new_location: Some(Location::new(file_b.display().to_string(), node.line)),
1952 old_text: None,
1953 new_text: Some(node.label.clone()),
1954 similarity: None,
1955 children: None,
1956 base_changes: None,
1957 });
1958 }
1959 EditOp::Relabel { index_a, index_b } => {
1960 let node_a = &nodes_a[*index_a];
1961 let node_b = &nodes_b[*index_b];
1962 let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
1963 changes.push(ASTChange {
1964 change_type: ChangeType::Update,
1965 node_kind: NodeKind::Statement,
1966 name: Some(stmt_kind.to_string()),
1967 old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
1968 new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
1969 old_text: Some(node_a.label.clone()),
1970 new_text: Some(node_b.label.clone()),
1971 similarity: None,
1972 children: None,
1973 base_changes: None,
1974 });
1975 }
1976 }
1977 }
1978
1979 changes
1980}
1981
1982const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
1984
1985impl DiffArgs {
1986 fn run_statement_level_diff(&self) -> Result<DiffReport> {
1998 let lang = Language::from_path(&self.file_a).ok_or_else(|| {
2000 let ext = self
2001 .file_a
2002 .extension()
2003 .map(|e| e.to_string_lossy().to_string())
2004 .unwrap_or_else(|| "unknown".to_string());
2005 RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
2006 })?;
2007
2008 let source_a = fs::read_to_string(&self.file_a)?;
2010 let source_b = fs::read_to_string(&self.file_b)?;
2011
2012 let pool = ParserPool::new();
2014 let tree_a = pool.parse(&source_a, lang).map_err(|e| {
2015 RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
2016 })?;
2017 let tree_b = pool.parse(&source_b, lang).map_err(|e| {
2018 RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
2019 })?;
2020
2021 let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2023 let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2024
2025 let statement_kinds = get_statement_node_kinds(lang);
2026
2027 let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
2029 .iter()
2030 .enumerate()
2031 .map(|(i, n)| (n.name.as_str(), (i, n)))
2032 .collect();
2033
2034 let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
2035 let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
2036 let mut changes = Vec::new();
2037
2038 for (i, func_a) in funcs_a.iter().enumerate() {
2040 if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2041 matched_a[i] = true;
2042 matched_b[j] = true;
2043
2044 if func_a.normalized_body != func_b.normalized_body {
2046 let func_node_a =
2048 find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2049 let func_node_b =
2050 find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2051
2052 let stmt_children = match (func_node_a, func_node_b) {
2053 (Some(node_a), Some(node_b)) => {
2054 let tree_a_stmts = extract_statement_tree(
2056 node_a,
2057 source_a.as_bytes(),
2058 lang,
2059 statement_kinds,
2060 );
2061 let tree_b_stmts = extract_statement_tree(
2062 node_b,
2063 source_b.as_bytes(),
2064 lang,
2065 statement_kinds,
2066 );
2067
2068 let count_a = count_tree_nodes(&tree_a_stmts);
2069 let count_b = count_tree_nodes(&tree_b_stmts);
2070
2071 if count_a > STATEMENT_FALLBACK_THRESHOLD
2073 || count_b > STATEMENT_FALLBACK_THRESHOLD
2074 {
2075 None
2077 } else {
2078 let po_a = flatten_postorder(&tree_a_stmts);
2080 let po_b = flatten_postorder(&tree_b_stmts);
2081
2082 let edit_ops = zhang_shasha(&po_a, &po_b);
2083
2084 if edit_ops.is_empty() {
2085 None
2086 } else {
2087 let stmt_changes = edit_ops_to_ast_changes(
2088 &edit_ops,
2089 &po_a,
2090 &po_b,
2091 &self.file_a,
2092 &self.file_b,
2093 );
2094 if stmt_changes.is_empty() {
2095 None
2096 } else {
2097 Some(stmt_changes)
2098 }
2099 }
2100 }
2101 }
2102 _ => None,
2103 };
2104
2105 changes.push(ASTChange {
2106 change_type: ChangeType::Update,
2107 node_kind: func_a.kind,
2108 name: Some(func_a.name.clone()),
2109 old_location: Some(Location::with_column(
2110 self.file_a.display().to_string(),
2111 func_a.line,
2112 func_a.column,
2113 )),
2114 new_location: Some(Location::with_column(
2115 self.file_b.display().to_string(),
2116 func_b.line,
2117 func_b.column,
2118 )),
2119 old_text: Some(func_a.body.clone()),
2120 new_text: Some(func_b.body.clone()),
2121 similarity: Some(compute_similarity(
2122 &func_a.normalized_body,
2123 &func_b.normalized_body,
2124 )),
2125 children: stmt_children,
2126 base_changes: None,
2127 });
2128 }
2129 }
2130 }
2131
2132 let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2134 .iter()
2135 .enumerate()
2136 .filter(|(i, _)| !matched_a[*i])
2137 .collect();
2138 let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2139 .iter()
2140 .enumerate()
2141 .filter(|(i, _)| !matched_b[*i])
2142 .collect();
2143
2144 let mut used_b = vec![false; unmatched_b.len()];
2145
2146 for (_, func_a) in &unmatched_a {
2147 let mut best_match: Option<(usize, f64)> = None;
2148 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2149 if used_b[j] || func_a.kind != func_b.kind {
2150 continue;
2151 }
2152 let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2153 if sim >= RENAME_SIMILARITY_THRESHOLD
2154 && (best_match.is_none() || sim > best_match.unwrap().1)
2155 {
2156 best_match = Some((j, sim));
2157 }
2158 }
2159
2160 if let Some((j, sim)) = best_match {
2161 let (_, func_b) = unmatched_b[j];
2162 used_b[j] = true;
2163 changes.push(ASTChange {
2164 change_type: ChangeType::Rename,
2165 node_kind: func_a.kind,
2166 name: Some(func_a.name.clone()),
2167 old_location: Some(Location::with_column(
2168 self.file_a.display().to_string(),
2169 func_a.line,
2170 func_a.column,
2171 )),
2172 new_location: Some(Location::with_column(
2173 self.file_b.display().to_string(),
2174 func_b.line,
2175 func_b.column,
2176 )),
2177 old_text: Some(func_a.name.clone()),
2178 new_text: Some(func_b.name.clone()),
2179 similarity: Some(sim),
2180 children: None,
2181 base_changes: None,
2182 });
2183 }
2184 }
2185
2186 for (_, func_a) in &unmatched_a {
2188 let is_renamed = changes.iter().any(|c| {
2189 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2190 });
2191 if !is_renamed {
2192 changes.push(ASTChange {
2193 change_type: ChangeType::Delete,
2194 node_kind: func_a.kind,
2195 name: Some(func_a.name.clone()),
2196 old_location: Some(Location::with_column(
2197 self.file_a.display().to_string(),
2198 func_a.line,
2199 func_a.column,
2200 )),
2201 new_location: None,
2202 old_text: None,
2203 new_text: None,
2204 similarity: None,
2205 children: None,
2206 base_changes: None,
2207 });
2208 }
2209 }
2210
2211 for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2213 if !used_b[j] {
2214 changes.push(ASTChange {
2215 change_type: ChangeType::Insert,
2216 node_kind: func_b.kind,
2217 name: Some(func_b.name.clone()),
2218 old_location: None,
2219 new_location: Some(Location::with_column(
2220 self.file_b.display().to_string(),
2221 func_b.line,
2222 func_b.column,
2223 )),
2224 old_text: None,
2225 new_text: None,
2226 similarity: None,
2227 children: None,
2228 base_changes: None,
2229 });
2230 }
2231 }
2232
2233 let mut summary = DiffSummary::default();
2235 for change in &changes {
2236 summary.total_changes += 1;
2237 if change.change_type != ChangeType::Format {
2238 summary.semantic_changes += 1;
2239 }
2240 match change.change_type {
2241 ChangeType::Insert => summary.inserts += 1,
2242 ChangeType::Delete => summary.deletes += 1,
2243 ChangeType::Update => summary.updates += 1,
2244 ChangeType::Move => summary.moves += 1,
2245 ChangeType::Rename => summary.renames += 1,
2246 ChangeType::Format => summary.formats += 1,
2247 ChangeType::Extract => summary.extracts += 1,
2248 ChangeType::Inline => {}
2249 }
2250 }
2251
2252 changes.sort_by_key(|c| match c.change_type {
2254 ChangeType::Delete => 0,
2255 ChangeType::Rename => 1,
2256 ChangeType::Update => 2,
2257 ChangeType::Move => 3,
2258 ChangeType::Insert => 4,
2259 _ => 5,
2260 });
2261
2262 Ok(DiffReport {
2263 file_a: self.file_a.display().to_string(),
2264 file_b: self.file_b.display().to_string(),
2265 identical: changes.is_empty(),
2266 changes,
2267 summary: Some(summary),
2268 granularity: DiffGranularity::Statement,
2269 file_changes: None,
2270 module_changes: None,
2271 import_graph_summary: None,
2272 arch_changes: None,
2273 arch_summary: None,
2274 })
2275 }
2276}
2277
2278fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2280 let func_kinds = get_function_node_kinds(lang);
2281 find_function_node_recursive(root, target_line, func_kinds)
2282}
2283
2284fn find_function_node_recursive<'a>(
2285 node: Node<'a>,
2286 target_line: u32,
2287 func_kinds: &[&str],
2288) -> Option<Node<'a>> {
2289 let line = node.start_position().row as u32 + 1;
2290
2291 if func_kinds.contains(&node.kind()) && line == target_line {
2292 return Some(node);
2293 }
2294
2295 let mut cursor = node.walk();
2296 for child in node.children(&mut cursor) {
2297 if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2298 return Some(found);
2299 }
2300 }
2301
2302 None
2303}
2304
2305#[derive(Debug, Clone)]
2311struct ClassNode {
2312 name: String,
2314 line: u32,
2316 end_line: u32,
2318 column: u32,
2320 body: String,
2322 normalized_body: String,
2324 methods: Vec<ExtractedNode>,
2326 fields: Vec<FieldNode>,
2328 bases: Vec<String>,
2330}
2331
2332#[derive(Debug, Clone)]
2334struct FieldNode {
2335 name: String,
2337 line: u32,
2339 column: u32,
2341 body: String,
2343 normalized_body: String,
2345}
2346
2347pub fn run_class_diff(file_a: &Path, file_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2352 if !file_a.exists() {
2354 return Err(RemainingError::file_not_found(file_a).into());
2355 }
2356 if !file_b.exists() {
2357 return Err(RemainingError::file_not_found(file_b).into());
2358 }
2359
2360 let lang = Language::from_path(file_a).ok_or_else(|| {
2362 let ext = file_a
2363 .extension()
2364 .map(|e| e.to_string_lossy().to_string())
2365 .unwrap_or_else(|| "unknown".to_string());
2366 RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2367 })?;
2368
2369 let source_a = fs::read_to_string(file_a)?;
2371 let source_b = fs::read_to_string(file_b)?;
2372
2373 let pool = ParserPool::new();
2375 let tree_a = pool
2376 .parse(&source_a, lang)
2377 .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2378 let tree_b = pool
2379 .parse(&source_b, lang)
2380 .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2381
2382 let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2384 let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2385
2386 let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2388
2389 let mut summary = DiffSummary::default();
2391 for change in &changes {
2392 summary.total_changes += 1;
2393 if change.change_type != ChangeType::Format {
2394 summary.semantic_changes += 1;
2395 }
2396 match change.change_type {
2397 ChangeType::Insert => summary.inserts += 1,
2398 ChangeType::Delete => summary.deletes += 1,
2399 ChangeType::Update => summary.updates += 1,
2400 ChangeType::Move => summary.moves += 1,
2401 ChangeType::Rename => summary.renames += 1,
2402 ChangeType::Format => summary.formats += 1,
2403 ChangeType::Extract => summary.extracts += 1,
2404 ChangeType::Inline => {}
2405 }
2406 }
2407
2408 let report = DiffReport {
2409 file_a: file_a.display().to_string(),
2410 file_b: file_b.display().to_string(),
2411 identical: changes.is_empty(),
2412 changes,
2413 summary: Some(summary),
2414 granularity: DiffGranularity::Class,
2415 file_changes: None,
2416 module_changes: None,
2417 import_graph_summary: None,
2418 arch_changes: None,
2419 arch_summary: None,
2420 };
2421
2422 Ok(report)
2423}
2424
2425fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2428 let files_a = collect_source_files(dir_a)?;
2429 let files_b = collect_source_files(dir_b)?;
2430
2431 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2432 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2433
2434 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2435
2436 let mut all_changes = Vec::new();
2437
2438 for rel_path in all_paths {
2439 match (map_a.get(rel_path), map_b.get(rel_path)) {
2440 (Some(path_a), Some(path_b)) => {
2441 match run_class_diff(path_a, path_b, semantic_only) {
2443 Ok(sub_report) => all_changes.extend(sub_report.changes),
2444 Err(_) => continue, }
2446 }
2447 (None, Some(_)) | (Some(_), None) => {
2448 continue;
2450 }
2451 (None, None) => unreachable!(),
2452 }
2453 }
2454
2455 let mut summary = DiffSummary::default();
2456 for change in &all_changes {
2457 summary.total_changes += 1;
2458 if change.change_type != ChangeType::Format {
2459 summary.semantic_changes += 1;
2460 }
2461 match change.change_type {
2462 ChangeType::Insert => summary.inserts += 1,
2463 ChangeType::Delete => summary.deletes += 1,
2464 ChangeType::Update => summary.updates += 1,
2465 ChangeType::Move => summary.moves += 1,
2466 ChangeType::Rename => summary.renames += 1,
2467 ChangeType::Format => summary.formats += 1,
2468 ChangeType::Extract => summary.extracts += 1,
2469 ChangeType::Inline => {}
2470 }
2471 }
2472
2473 Ok(DiffReport {
2474 file_a: dir_a.display().to_string(),
2475 file_b: dir_b.display().to_string(),
2476 identical: all_changes.is_empty(),
2477 changes: all_changes,
2478 summary: Some(summary),
2479 granularity: DiffGranularity::Class,
2480 file_changes: None,
2481 module_changes: None,
2482 import_graph_summary: None,
2483 arch_changes: None,
2484 arch_summary: None,
2485 })
2486}
2487
2488fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2490 let mut classes = Vec::new();
2491 let class_kinds = get_class_node_kinds(lang);
2492 let func_kinds = get_function_node_kinds(lang);
2493 let body_kinds = get_class_body_kinds(lang);
2494
2495 extract_class_nodes_recursive(
2496 root,
2497 source,
2498 &mut classes,
2499 lang,
2500 func_kinds,
2501 class_kinds,
2502 body_kinds,
2503 );
2504
2505 if lang == Language::Go {
2508 associate_go_receiver_methods(root, source, lang, &mut classes);
2509 }
2510
2511 classes
2512}
2513
2514fn associate_go_receiver_methods(
2517 root: Node,
2518 source: &[u8],
2519 lang: Language,
2520 classes: &mut [ClassNode],
2521) {
2522 let source_str = std::str::from_utf8(source).unwrap_or("");
2523 let mut cursor = root.walk();
2524 for child in root.children(&mut cursor) {
2525 if child.kind() != "method_declaration" {
2526 continue;
2527 }
2528 let receiver_type = match extract_go_receiver_type(child, source) {
2530 Some(name) => name,
2531 None => continue,
2532 };
2533
2534 let method_name = match get_function_name(child, lang, source_str) {
2536 Some(name) => name,
2537 None => continue,
2538 };
2539
2540 let params = child
2541 .child_by_field_name("parameters")
2542 .map(|p| node_text(p, source).to_string())
2543 .unwrap_or_default();
2544
2545 let line = child.start_position().row as u32 + 1;
2546 let end_line = child.end_position().row as u32 + 1;
2547 let column = child.start_position().column as u32;
2548 let body = node_text(child, source).to_string();
2549
2550 let extracted =
2551 ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2552 .with_params(params)
2553 .with_method_kind();
2554
2555 for class in classes.iter_mut() {
2557 if class.name == receiver_type {
2558 class.methods.push(extracted);
2559 break;
2560 }
2561 }
2562 }
2563}
2564
2565fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2570 let receiver = method_node.child_by_field_name("receiver")?;
2572 let mut recv_cursor = receiver.walk();
2573 for recv_child in receiver.children(&mut recv_cursor) {
2574 if recv_child.kind() == "parameter_declaration" {
2575 if let Some(type_node) = recv_child.child_by_field_name("type") {
2576 return extract_go_type_identifier(type_node, source);
2577 }
2578 }
2579 }
2580 None
2581}
2582
2583fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2586 match type_node.kind() {
2587 "type_identifier" => Some(node_text(type_node, source).to_string()),
2588 "pointer_type" => {
2589 let mut cursor = type_node.walk();
2591 for child in type_node.children(&mut cursor) {
2592 if child.is_named() {
2593 return extract_go_type_identifier(child, source);
2594 }
2595 }
2596 None
2597 }
2598 _ => None,
2599 }
2600}
2601
2602fn extract_class_nodes_recursive(
2603 node: Node,
2604 source: &[u8],
2605 classes: &mut Vec<ClassNode>,
2606 lang: Language,
2607 func_kinds: &[&str],
2608 class_kinds: &[&str],
2609 body_kinds: &[&str],
2610) {
2611 let kind = node.kind();
2612
2613 if class_kinds.contains(&kind) {
2614 if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2615 classes.push(class_node);
2616 }
2617 return; }
2619
2620 for child in node.children(&mut node.walk()) {
2621 extract_class_nodes_recursive(
2622 child,
2623 source,
2624 classes,
2625 lang,
2626 func_kinds,
2627 class_kinds,
2628 body_kinds,
2629 );
2630 }
2631}
2632
2633fn build_class_node(
2635 node: Node,
2636 source: &[u8],
2637 lang: Language,
2638 func_kinds: &[&str],
2639 body_kinds: &[&str],
2640) -> Option<ClassNode> {
2641 let class_name = node
2643 .child_by_field_name("name")
2644 .map(|n| node_text(n, source).to_string())
2645 .or_else(|| {
2646 if lang == Language::Go && node.kind() == "type_declaration" {
2649 let mut cursor = node.walk();
2650 for child in node.children(&mut cursor) {
2651 if child.kind() == "type_spec" {
2652 if let Some(name_node) = child.child_by_field_name("name") {
2653 return Some(node_text(name_node, source).to_string());
2654 }
2655 }
2656 }
2657 }
2658 let mut cursor = node.walk();
2660 for child in node.children(&mut cursor) {
2661 if child.kind() == "identifier"
2662 || child.kind() == "type_identifier"
2663 || child.kind() == "constant"
2664 {
2665 return Some(node_text(child, source).to_string());
2666 }
2667 }
2668 None
2669 })?;
2670
2671 if class_name.is_empty() {
2672 return None;
2673 }
2674
2675 let line = node.start_position().row as u32 + 1;
2676 let end_line = node.end_position().row as u32 + 1;
2677 let column = node.start_position().column as u32;
2678 let body = node_text(node, source).to_string();
2679 let normalized_body = normalize_body(&body);
2680
2681 let bases = extract_bases(node, source, lang);
2683
2684 let mut methods = Vec::new();
2686 let mut fields = Vec::new();
2687
2688 for child in node.children(&mut node.walk()) {
2689 if body_kinds.contains(&child.kind()) {
2690 extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2691 }
2692 }
2693
2694 Some(ClassNode {
2695 name: class_name,
2696 line,
2697 end_line,
2698 column,
2699 body,
2700 normalized_body,
2701 methods,
2702 fields,
2703 bases,
2704 })
2705}
2706
2707fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2709 let mut bases = Vec::new();
2710
2711 match lang {
2712 Language::Python => {
2713 if let Some(superclasses) = node.child_by_field_name("superclasses") {
2716 for child in superclasses.children(&mut superclasses.walk()) {
2717 let text = node_text(child, source).trim().to_string();
2718 if !text.is_empty() && text != "(" && text != ")" && text != "," {
2719 bases.push(text);
2720 }
2721 }
2722 }
2723 }
2724 _ => {
2725 }
2728 }
2729
2730 bases
2731}
2732
2733fn extract_class_members(
2735 body_node: Node,
2736 source: &[u8],
2737 lang: Language,
2738 func_kinds: &[&str],
2739 methods: &mut Vec<ExtractedNode>,
2740 fields: &mut Vec<FieldNode>,
2741) {
2742 for child in body_node.children(&mut body_node.walk()) {
2743 let kind = child.kind();
2744
2745 if func_kinds.contains(&kind) {
2747 let source_str = std::str::from_utf8(source).unwrap_or("");
2748 if let Some(func_name) = get_function_name(child, lang, source_str) {
2749 let params = child
2750 .child_by_field_name("parameters")
2751 .or_else(|| child.child_by_field_name("formal_parameters"))
2752 .map(|p| node_text(p, source).to_string())
2753 .unwrap_or_default();
2754
2755 let line = child.start_position().row as u32 + 1;
2756 let end_line = child.end_position().row as u32 + 1;
2757 let column = child.start_position().column as u32;
2758 let body = node_text(child, source).to_string();
2759
2760 let extracted =
2761 ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2762 .with_params(params)
2763 .with_method_kind();
2764
2765 methods.push(extracted);
2766 }
2767 }
2768 else if kind == "expression_statement" {
2770 if let Some(field) = extract_field_from_statement(child, source, lang) {
2771 fields.push(field);
2772 }
2773 }
2774 }
2775}
2776
2777fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2779 for child in node.children(&mut node.walk()) {
2781 if child.kind() == "assignment" {
2782 if let Some(left) = child.child_by_field_name("left") {
2784 let name = node_text(left, source).trim().to_string();
2785 if !name.is_empty() && !name.contains('.') {
2786 let line = node.start_position().row as u32 + 1;
2788 let column = node.start_position().column as u32;
2789 let body = node_text(node, source).to_string();
2790 let normalized_body = body.trim().to_string();
2791
2792 return Some(FieldNode {
2793 name,
2794 line,
2795 column,
2796 body,
2797 normalized_body,
2798 });
2799 }
2800 }
2801 }
2802 }
2803 None
2804}
2805
2806fn detect_class_changes(
2808 classes_a: &[ClassNode],
2809 classes_b: &[ClassNode],
2810 file_a: &Path,
2811 file_b: &Path,
2812 _semantic_only: bool,
2813) -> Vec<ASTChange> {
2814 let mut changes = Vec::new();
2815
2816 let map_b: HashMap<&str, &ClassNode> = classes_b.iter().map(|c| (c.name.as_str(), c)).collect();
2818
2819 let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2821 let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2822
2823 for (i, class_a) in classes_a.iter().enumerate() {
2825 let _ = class_a.end_line;
2826 let _ = &class_a.body;
2827 let _ = &class_a.normalized_body;
2828 if let Some(&class_b) = map_b.get(class_a.name.as_str()) {
2829 matched_a[i] = true;
2830 if let Some(j) = classes_b.iter().position(|c| c.name == class_a.name) {
2831 matched_b[j] = true;
2832 }
2833
2834 if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2836 changes.push(change);
2837 }
2838 }
2839 }
2840
2841 let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2843 .iter()
2844 .enumerate()
2845 .filter(|(i, _)| !matched_a[*i])
2846 .collect();
2847 let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2848 .iter()
2849 .enumerate()
2850 .filter(|(i, _)| !matched_b[*i])
2851 .collect();
2852
2853 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2855
2856 for (_, class_a) in &unmatched_a {
2857 let mut best_match: Option<(usize, f64)> = None;
2858
2859 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2860 if used_b[j] {
2861 continue;
2862 }
2863
2864 let similarity = compute_class_similarity(class_a, class_b);
2865 if similarity >= RENAME_SIMILARITY_THRESHOLD
2866 && (best_match.is_none() || similarity > best_match.unwrap().1)
2867 {
2868 best_match = Some((j, similarity));
2869 }
2870 }
2871
2872 if let Some((j, similarity)) = best_match {
2873 let (_, class_b) = unmatched_b[j];
2874 used_b[j] = true;
2875
2876 changes.push(ASTChange {
2877 change_type: ChangeType::Rename,
2878 node_kind: NodeKind::Class,
2879 name: Some(class_a.name.clone()),
2880 old_location: Some(Location::with_column(
2881 file_a.display().to_string(),
2882 class_a.line,
2883 class_a.column,
2884 )),
2885 new_location: Some(Location::with_column(
2886 file_b.display().to_string(),
2887 class_b.line,
2888 class_b.column,
2889 )),
2890 old_text: Some(class_a.name.clone()),
2891 new_text: Some(class_b.name.clone()),
2892 similarity: Some(similarity),
2893 children: None,
2894 base_changes: None,
2895 });
2896 }
2897 }
2898
2899 for (_, class_a) in &unmatched_a {
2901 let is_renamed = changes
2902 .iter()
2903 .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
2904 if !is_renamed {
2905 changes.push(ASTChange {
2906 change_type: ChangeType::Delete,
2907 node_kind: NodeKind::Class,
2908 name: Some(class_a.name.clone()),
2909 old_location: Some(Location::with_column(
2910 file_a.display().to_string(),
2911 class_a.line,
2912 class_a.column,
2913 )),
2914 new_location: None,
2915 old_text: None,
2916 new_text: None,
2917 similarity: None,
2918 children: None,
2919 base_changes: None,
2920 });
2921 }
2922 }
2923
2924 for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2926 if !used_b[j] {
2927 changes.push(ASTChange {
2928 change_type: ChangeType::Insert,
2929 node_kind: NodeKind::Class,
2930 name: Some(class_b.name.clone()),
2931 old_location: None,
2932 new_location: Some(Location::with_column(
2933 file_b.display().to_string(),
2934 class_b.line,
2935 class_b.column,
2936 )),
2937 old_text: None,
2938 new_text: None,
2939 similarity: None,
2940 children: None,
2941 base_changes: None,
2942 });
2943 }
2944 }
2945
2946 changes.sort_by_key(|c| match c.change_type {
2948 ChangeType::Delete => 0,
2949 ChangeType::Rename => 1,
2950 ChangeType::Update => 2,
2951 ChangeType::Move => 3,
2952 ChangeType::Insert => 4,
2953 _ => 5,
2954 });
2955
2956 changes
2957}
2958
2959fn diff_class_pair(
2961 class_a: &ClassNode,
2962 class_b: &ClassNode,
2963 file_a: &Path,
2964 file_b: &Path,
2965) -> Option<ASTChange> {
2966 let mut children = Vec::new();
2967 let mut has_changes = false;
2968
2969 diff_methods(
2971 &class_a.methods,
2972 &class_b.methods,
2973 file_a,
2974 file_b,
2975 &mut children,
2976 );
2977
2978 diff_fields(
2980 &class_a.fields,
2981 &class_b.fields,
2982 file_a,
2983 file_b,
2984 &mut children,
2985 );
2986
2987 let base_changes = diff_bases(&class_a.bases, &class_b.bases);
2989
2990 if !children.is_empty() {
2991 has_changes = true;
2992 }
2993 if base_changes.is_some() {
2994 has_changes = true;
2995 }
2996
2997 if !has_changes {
2998 return None; }
3000
3001 Some(ASTChange {
3002 change_type: ChangeType::Update,
3003 node_kind: NodeKind::Class,
3004 name: Some(class_a.name.clone()),
3005 old_location: Some(Location::with_column(
3006 file_a.display().to_string(),
3007 class_a.line,
3008 class_a.column,
3009 )),
3010 new_location: Some(Location::with_column(
3011 file_b.display().to_string(),
3012 class_b.line,
3013 class_b.column,
3014 )),
3015 old_text: None,
3016 new_text: None,
3017 similarity: None,
3018 children: if children.is_empty() {
3019 None
3020 } else {
3021 Some(children)
3022 },
3023 base_changes,
3024 })
3025}
3026
3027fn diff_methods(
3029 methods_a: &[ExtractedNode],
3030 methods_b: &[ExtractedNode],
3031 file_a: &Path,
3032 file_b: &Path,
3033 children: &mut Vec<ASTChange>,
3034) {
3035 let map_b: HashMap<&str, &ExtractedNode> =
3036 methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3037
3038 let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3039 let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3040
3041 for (i, method_a) in methods_a.iter().enumerate() {
3043 if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3044 matched_a[i] = true;
3045 if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3046 matched_b[j] = true;
3047 }
3048
3049 if method_a.normalized_body != method_b.normalized_body {
3051 children.push(ASTChange {
3052 change_type: ChangeType::Update,
3053 node_kind: NodeKind::Method,
3054 name: Some(method_a.name.clone()),
3055 old_location: Some(Location::with_column(
3056 file_a.display().to_string(),
3057 method_a.line,
3058 method_a.column,
3059 )),
3060 new_location: Some(Location::with_column(
3061 file_b.display().to_string(),
3062 method_b.line,
3063 method_b.column,
3064 )),
3065 old_text: None,
3066 new_text: None,
3067 similarity: Some(compute_similarity(
3068 &method_a.normalized_body,
3069 &method_b.normalized_body,
3070 )),
3071 children: None,
3072 base_changes: None,
3073 });
3074 }
3075 }
3076 }
3077
3078 let unmatched_a: Vec<&ExtractedNode> = methods_a
3080 .iter()
3081 .enumerate()
3082 .filter(|(i, _)| !matched_a[*i])
3083 .map(|(_, m)| m)
3084 .collect();
3085 let unmatched_b: Vec<&ExtractedNode> = methods_b
3086 .iter()
3087 .enumerate()
3088 .filter(|(i, _)| !matched_b[*i])
3089 .map(|(_, m)| m)
3090 .collect();
3091
3092 let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3094
3095 for method_a in &unmatched_a {
3096 let mut best_match: Option<(usize, f64)> = None;
3097
3098 for (j, method_b) in unmatched_b.iter().enumerate() {
3099 if used_b[j] {
3100 continue;
3101 }
3102 let similarity =
3103 compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3104 if similarity >= RENAME_SIMILARITY_THRESHOLD
3105 && (best_match.is_none() || similarity > best_match.unwrap().1)
3106 {
3107 best_match = Some((j, similarity));
3108 }
3109 }
3110
3111 if let Some((j, similarity)) = best_match {
3112 let method_b = unmatched_b[j];
3113 used_b[j] = true;
3114
3115 children.push(ASTChange {
3116 change_type: ChangeType::Rename,
3117 node_kind: NodeKind::Method,
3118 name: Some(method_a.name.clone()),
3119 old_location: Some(Location::with_column(
3120 file_a.display().to_string(),
3121 method_a.line,
3122 method_a.column,
3123 )),
3124 new_location: Some(Location::with_column(
3125 file_b.display().to_string(),
3126 method_b.line,
3127 method_b.column,
3128 )),
3129 old_text: Some(method_a.name.clone()),
3130 new_text: Some(method_b.name.clone()),
3131 similarity: Some(similarity),
3132 children: None,
3133 base_changes: None,
3134 });
3135 }
3136 }
3137
3138 for method_a in &unmatched_a {
3140 let is_renamed = children.iter().any(|c| {
3141 c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3142 });
3143 if !is_renamed {
3144 children.push(ASTChange {
3145 change_type: ChangeType::Delete,
3146 node_kind: NodeKind::Method,
3147 name: Some(method_a.name.clone()),
3148 old_location: Some(Location::with_column(
3149 file_a.display().to_string(),
3150 method_a.line,
3151 method_a.column,
3152 )),
3153 new_location: None,
3154 old_text: None,
3155 new_text: None,
3156 similarity: None,
3157 children: None,
3158 base_changes: None,
3159 });
3160 }
3161 }
3162
3163 for (j, method_b) in unmatched_b.iter().enumerate() {
3165 if !used_b[j] {
3166 children.push(ASTChange {
3167 change_type: ChangeType::Insert,
3168 node_kind: NodeKind::Method,
3169 name: Some(method_b.name.clone()),
3170 old_location: None,
3171 new_location: Some(Location::with_column(
3172 file_b.display().to_string(),
3173 method_b.line,
3174 method_b.column,
3175 )),
3176 old_text: None,
3177 new_text: None,
3178 similarity: None,
3179 children: None,
3180 base_changes: None,
3181 });
3182 }
3183 }
3184}
3185
3186fn diff_fields(
3188 fields_a: &[FieldNode],
3189 fields_b: &[FieldNode],
3190 file_a: &Path,
3191 file_b: &Path,
3192 children: &mut Vec<ASTChange>,
3193) {
3194 let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3195
3196 let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3197 let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3198
3199 for (i, field_a) in fields_a.iter().enumerate() {
3201 if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3202 matched_a[i] = true;
3203 if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3204 matched_b[j] = true;
3205 }
3206
3207 if field_a.normalized_body != field_b.normalized_body {
3209 children.push(ASTChange {
3210 change_type: ChangeType::Update,
3211 node_kind: NodeKind::Field,
3212 name: Some(field_a.name.clone()),
3213 old_location: Some(Location::with_column(
3214 file_a.display().to_string(),
3215 field_a.line,
3216 field_a.column,
3217 )),
3218 new_location: Some(Location::with_column(
3219 file_b.display().to_string(),
3220 field_b.line,
3221 field_b.column,
3222 )),
3223 old_text: Some(field_a.body.trim().to_string()),
3224 new_text: Some(field_b.body.trim().to_string()),
3225 similarity: None,
3226 children: None,
3227 base_changes: None,
3228 });
3229 }
3230 }
3231 }
3232
3233 for (i, field_a) in fields_a.iter().enumerate() {
3235 if !matched_a[i] {
3236 children.push(ASTChange {
3237 change_type: ChangeType::Delete,
3238 node_kind: NodeKind::Field,
3239 name: Some(field_a.name.clone()),
3240 old_location: Some(Location::with_column(
3241 file_a.display().to_string(),
3242 field_a.line,
3243 field_a.column,
3244 )),
3245 new_location: None,
3246 old_text: None,
3247 new_text: None,
3248 similarity: None,
3249 children: None,
3250 base_changes: None,
3251 });
3252 }
3253 }
3254
3255 for (j, field_b) in fields_b.iter().enumerate() {
3257 if !matched_b[j] {
3258 children.push(ASTChange {
3259 change_type: ChangeType::Insert,
3260 node_kind: NodeKind::Field,
3261 name: Some(field_b.name.clone()),
3262 old_location: None,
3263 new_location: Some(Location::with_column(
3264 file_b.display().to_string(),
3265 field_b.line,
3266 field_b.column,
3267 )),
3268 old_text: None,
3269 new_text: None,
3270 similarity: None,
3271 children: None,
3272 base_changes: None,
3273 });
3274 }
3275 }
3276}
3277
3278fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3280 let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3281 let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3282
3283 let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3284 let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3285
3286 if added.is_empty() && removed.is_empty() {
3287 None
3288 } else {
3289 Some(BaseChanges { added, removed })
3290 }
3291}
3292
3293fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3295 let method_sigs_a: std::collections::HashSet<String> = class_a
3297 .methods
3298 .iter()
3299 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3300 .collect();
3301 let method_sigs_b: std::collections::HashSet<String> = class_b
3302 .methods
3303 .iter()
3304 .map(|m| format!("{}:{}", m.name, m.normalized_body))
3305 .collect();
3306
3307 let field_sigs_a: std::collections::HashSet<String> = class_a
3308 .fields
3309 .iter()
3310 .map(|f| f.normalized_body.clone())
3311 .collect();
3312 let field_sigs_b: std::collections::HashSet<String> = class_b
3313 .fields
3314 .iter()
3315 .map(|f| f.normalized_body.clone())
3316 .collect();
3317
3318 let all_a: std::collections::HashSet<&String> =
3320 method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3321 let all_b: std::collections::HashSet<&String> =
3322 method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3323
3324 if all_a.is_empty() && all_b.is_empty() {
3325 return 1.0;
3327 }
3328
3329 let intersection = all_a.intersection(&all_b).count();
3330 let union = all_a.union(&all_b).count();
3331
3332 if union == 0 {
3333 0.0
3334 } else {
3335 intersection as f64 / union as f64
3336 }
3337}
3338
3339const SOURCE_EXTENSIONS: &[&str] = &[
3345 "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3346 "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3347];
3348
3349fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3351 let mut files = Vec::new();
3352 collect_source_files_recursive(root, root, &mut files)?;
3353 files.sort_by(|a, b| a.0.cmp(&b.0));
3354 Ok(files)
3355}
3356
3357fn collect_source_files_recursive(
3358 root: &Path,
3359 current: &Path,
3360 files: &mut Vec<(String, PathBuf)>,
3361) -> Result<()> {
3362 for entry in fs::read_dir(current)? {
3363 let entry = entry?;
3364 let path = entry.path();
3365 if path.is_dir() {
3366 collect_source_files_recursive(root, &path, files)?;
3367 } else if path.is_file() {
3368 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3369 if SOURCE_EXTENSIONS.contains(&ext) {
3370 let rel = path
3371 .strip_prefix(root)
3372 .unwrap_or(&path)
3373 .to_string_lossy()
3374 .replace('\\', "/");
3375 files.push((rel, path));
3376 }
3377 }
3378 }
3379 }
3380 Ok(())
3381}
3382
3383fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3389 let lang = match Language::from_path(path) {
3390 Some(l) => l,
3391 None => {
3392 let content = fs::read_to_string(path)?;
3394 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3395 content.hash(&mut hasher);
3396 return Ok((hasher.finish(), vec![]));
3397 }
3398 };
3399
3400 let source = fs::read_to_string(path)?;
3401 let pool = ParserPool::new();
3402 let tree = match pool.parse(&source, lang) {
3403 Ok(t) => t,
3404 Err(_) => {
3405 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3407 source.hash(&mut hasher);
3408 return Ok((hasher.finish(), vec![]));
3409 }
3410 };
3411
3412 let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3413
3414 let mut signatures: Vec<String> = nodes
3418 .iter()
3419 .map(|n| {
3420 let kind = match n.kind {
3421 NodeKind::Function => "fn",
3422 NodeKind::Class => "class",
3423 NodeKind::Method => "method",
3424 NodeKind::Field => "field",
3425 _ => "other",
3426 };
3427 let sig = if n.params.is_empty() {
3428 format!("{}:{}", kind, n.name)
3429 } else {
3430 format!("{}:{}({})", kind, n.name, n.params)
3431 };
3432 let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3434 n.normalized_body.hash(&mut body_hasher);
3435 format!("{}|{}", sig, body_hasher.finish())
3436 })
3437 .collect();
3438 signatures.sort();
3439
3440 let mut hasher = std::collections::hash_map::DefaultHasher::new();
3441 for sig in &signatures {
3442 sig.hash(&mut hasher);
3443 }
3444 let fingerprint = hasher.finish();
3445
3446 Ok((fingerprint, signatures))
3447}
3448
3449fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3451 let files_a = collect_source_files(dir_a)?;
3452 let files_b = collect_source_files(dir_b)?;
3453
3454 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3456 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3457
3458 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3459
3460 let mut file_changes = Vec::new();
3461 let mut has_any_change = false;
3462
3463 for rel_path in all_paths {
3464 match (map_a.get(rel_path), map_b.get(rel_path)) {
3465 (Some(path_a), Some(path_b)) => {
3466 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3468 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3469
3470 if fp_a == fp_b {
3471 } else {
3474 has_any_change = true;
3475 let set_a: HashSet<&String> = sigs_a.iter().collect();
3477 let set_b: HashSet<&String> = sigs_b.iter().collect();
3478 let changed: Vec<String> = set_a
3479 .symmetric_difference(&set_b)
3480 .map(|s| (*s).clone())
3481 .collect();
3482
3483 file_changes.push(FileLevelChange {
3484 relative_path: rel_path.to_string(),
3485 change_type: ChangeType::Update,
3486 old_fingerprint: Some(fp_a),
3487 new_fingerprint: Some(fp_b),
3488 signature_changes: if changed.is_empty() {
3489 None
3490 } else {
3491 Some(changed)
3492 },
3493 });
3494 }
3495 }
3496 (None, Some(path_b)) => {
3497 has_any_change = true;
3499 let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3500 file_changes.push(FileLevelChange {
3501 relative_path: rel_path.to_string(),
3502 change_type: ChangeType::Insert,
3503 old_fingerprint: None,
3504 new_fingerprint: Some(fp_b),
3505 signature_changes: None,
3506 });
3507 }
3508 (Some(path_a), None) => {
3509 has_any_change = true;
3511 let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3512 file_changes.push(FileLevelChange {
3513 relative_path: rel_path.to_string(),
3514 change_type: ChangeType::Delete,
3515 old_fingerprint: Some(fp_a),
3516 new_fingerprint: None,
3517 signature_changes: None,
3518 });
3519 }
3520 (None, None) => unreachable!(),
3521 }
3522 }
3523
3524 Ok(DiffReport {
3525 file_a: dir_a.display().to_string(),
3526 file_b: dir_b.display().to_string(),
3527 identical: !has_any_change,
3528 changes: Vec::new(),
3529 summary: None,
3530 granularity: DiffGranularity::File,
3531 file_changes: Some(file_changes),
3532 module_changes: None,
3533 import_graph_summary: None,
3534 arch_changes: None,
3535 arch_summary: None,
3536 })
3537}
3538
3539#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3545struct InternalImportEdge {
3546 source_file: String,
3547 target_module: String,
3548 imported_names: Vec<String>,
3549}
3550
3551fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3557 let mut edges = Vec::new();
3558
3559 let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3561 for cap in from_re.captures_iter(source) {
3562 let target = cap[1].to_string();
3563 let names_str = &cap[2];
3564 let names: Vec<String> = names_str
3565 .split(',')
3566 .map(|n| n.trim().to_string())
3567 .filter(|n| !n.is_empty())
3568 .collect();
3569 edges.push(InternalImportEdge {
3570 source_file: relative_path.to_string(),
3571 target_module: target,
3572 imported_names: names,
3573 });
3574 }
3575
3576 let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3578 for cap in import_re.captures_iter(source) {
3579 let target = cap[1].to_string();
3580 edges.push(InternalImportEdge {
3581 source_file: relative_path.to_string(),
3582 target_module: target,
3583 imported_names: vec![],
3584 });
3585 }
3586
3587 edges
3588}
3589
3590fn parse_file_imports(
3595 registry: &LanguageRegistry,
3596 source: &str,
3597 full_path: &Path,
3598 rel_path: &str,
3599) -> Vec<InternalImportEdge> {
3600 let ext = match full_path.extension().and_then(|e| e.to_str()) {
3601 Some(e) => format!(".{}", e),
3602 None => return Vec::new(),
3603 };
3604
3605 let is_python = ext == ".py" || ext == ".pyi";
3606
3607 if let Some(handler) = registry.get_by_extension(&ext) {
3609 if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3610 return import_defs
3611 .into_iter()
3612 .map(|def| InternalImportEdge {
3613 source_file: rel_path.to_string(),
3614 target_module: def.module,
3615 imported_names: def.names,
3616 })
3617 .collect();
3618 }
3619 }
3620
3621 if is_python {
3623 return parse_python_imports(source, rel_path);
3624 }
3625
3626 Vec::new()
3627}
3628
3629fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3637 let files = collect_source_files(root)?;
3638 let registry = LanguageRegistry::with_defaults();
3639 let mut all_edges = Vec::new();
3640
3641 for (rel_path, full_path) in &files {
3642 let source = fs::read_to_string(full_path)?;
3643 let edges = parse_file_imports(®istry, &source, full_path, rel_path);
3644 all_edges.extend(edges);
3645 }
3646
3647 Ok(all_edges)
3648}
3649
3650fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3652 ImportEdge {
3653 source_file: edge.source_file.clone(),
3654 target_module: edge.target_module.clone(),
3655 imported_names: edge.imported_names.clone(),
3656 }
3657}
3658
3659fn edge_key(edge: &InternalImportEdge) -> String {
3661 format!(
3662 "{}->{}:{}",
3663 edge.source_file,
3664 edge.target_module,
3665 edge.imported_names.join(",")
3666 )
3667}
3668
3669fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3671 let edges_a = build_import_graph(dir_a)?;
3673 let edges_b = build_import_graph(dir_b)?;
3674
3675 let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3677 let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3678
3679 let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3681 let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3682
3683 let added_edges: Vec<&InternalImportEdge> = edges_b
3685 .iter()
3686 .filter(|e| added_keys.contains(&edge_key(e)))
3687 .collect();
3688 let removed_edges: Vec<&InternalImportEdge> = edges_a
3689 .iter()
3690 .filter(|e| removed_keys.contains(&edge_key(e)))
3691 .collect();
3692
3693 let files_a = collect_source_files(dir_a)?;
3695 let files_b = collect_source_files(dir_b)?;
3696 let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3697 let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3698 let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3699
3700 let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3702 let mut modules_with_import_changes = 0usize;
3703
3704 for rel_path in &all_paths {
3705 let in_a = map_a.contains_key(rel_path);
3706 let in_b = map_b.contains_key(rel_path);
3707
3708 let change_type = if !in_a && in_b {
3710 ChangeType::Insert
3711 } else if in_a && !in_b {
3712 ChangeType::Delete
3713 } else {
3714 ChangeType::Update
3715 };
3716
3717 let mod_added: Vec<ImportEdge> = added_edges
3719 .iter()
3720 .filter(|e| e.source_file == *rel_path)
3721 .map(|e| to_public_edge(e))
3722 .collect();
3723 let mod_removed: Vec<ImportEdge> = removed_edges
3724 .iter()
3725 .filter(|e| e.source_file == *rel_path)
3726 .map(|e| to_public_edge(e))
3727 .collect();
3728
3729 let file_change = if in_a && in_b {
3731 let path_a = map_a[rel_path];
3732 let path_b = map_b[rel_path];
3733 let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3734 let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3735 if fp_a != fp_b {
3736 let set_a: HashSet<&String> = sigs_a.iter().collect();
3737 let set_b: HashSet<&String> = sigs_b.iter().collect();
3738 let changed: Vec<String> = set_a
3739 .symmetric_difference(&set_b)
3740 .map(|s| (*s).clone())
3741 .collect();
3742 Some(FileLevelChange {
3743 relative_path: rel_path.to_string(),
3744 change_type: ChangeType::Update,
3745 old_fingerprint: Some(fp_a),
3746 new_fingerprint: Some(fp_b),
3747 signature_changes: if changed.is_empty() {
3748 None
3749 } else {
3750 Some(changed)
3751 },
3752 })
3753 } else {
3754 None
3755 }
3756 } else {
3757 None
3758 };
3759
3760 let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3762 let has_file_change = file_change.is_some();
3763 let is_new_or_deleted =
3764 change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3765
3766 if has_import_changes || has_file_change || is_new_or_deleted {
3767 if has_import_changes {
3768 modules_with_import_changes += 1;
3769 }
3770
3771 let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3773 edges_b
3775 .iter()
3776 .filter(|e| e.source_file == *rel_path)
3777 .map(to_public_edge)
3778 .collect()
3779 } else {
3780 mod_added
3781 };
3782 let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3784 edges_a
3785 .iter()
3786 .filter(|e| e.source_file == *rel_path)
3787 .map(to_public_edge)
3788 .collect()
3789 } else {
3790 mod_removed
3791 };
3792
3793 let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3795 if has_expanded_imports && !has_import_changes {
3796 modules_with_import_changes += 1;
3797 }
3798
3799 module_changes.push(ModuleLevelChange {
3800 module_path: rel_path.to_string(),
3801 change_type,
3802 imports_added: final_added,
3803 imports_removed: final_removed,
3804 file_change,
3805 });
3806 }
3807 }
3808
3809 let summary = ImportGraphSummary {
3810 total_edges_a: edges_a.len(),
3811 total_edges_b: edges_b.len(),
3812 edges_added: added_keys.len(),
3813 edges_removed: removed_keys.len(),
3814 modules_with_import_changes,
3815 };
3816
3817 let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3818
3819 Ok(DiffReport {
3820 file_a: dir_a.display().to_string(),
3821 file_b: dir_b.display().to_string(),
3822 identical,
3823 changes: Vec::new(),
3824 summary: None,
3825 granularity: DiffGranularity::Module,
3826 file_changes: None,
3827 module_changes: Some(module_changes),
3828 import_graph_summary: Some(summary),
3829 arch_changes: None,
3830 arch_summary: None,
3831 })
3832}
3833
3834fn classify_directory_layer(dir_name: &str) -> String {
3840 let lower = dir_name.to_lowercase();
3841 match lower.as_str() {
3842 "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3843 "core" | "models" | "domain" | "entities" => "core".to_string(),
3844 "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3845 "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3846 "services" | "service" => "service".to_string(),
3847 "tests" | "test" | "spec" | "specs" => "test".to_string(),
3848 "config" | "settings" | "conf" => "config".to_string(),
3849 "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3850 _ => "other".to_string(),
3851 }
3852}
3853
3854fn classify_by_import_flow(
3862 dir_name: &str,
3863 edges: &[InternalImportEdge],
3864 all_dirs: &HashSet<String>,
3865) -> String {
3866 let fan_out: usize = edges
3868 .iter()
3869 .filter(|e| {
3870 e.source_file
3871 .split('/')
3872 .next()
3873 .map(|d| d == dir_name)
3874 .unwrap_or(false)
3875 })
3876 .filter(|e| {
3877 let target_first = e
3879 .target_module
3880 .split('/')
3881 .next()
3882 .or_else(|| e.target_module.split('.').next())
3883 .unwrap_or("");
3884 all_dirs.contains(target_first) && target_first != dir_name
3885 })
3886 .map(|e| e.target_module.clone())
3887 .collect::<HashSet<_>>()
3888 .len();
3889
3890 let fan_in: usize = edges
3892 .iter()
3893 .filter(|e| {
3894 let source_dir = e.source_file.split('/').next().unwrap_or("");
3895 source_dir != dir_name
3896 })
3897 .filter(|e| {
3898 let target_first = e
3899 .target_module
3900 .split('/')
3901 .next()
3902 .or_else(|| e.target_module.split('.').next())
3903 .unwrap_or("");
3904 target_first == dir_name
3905 })
3906 .count();
3907
3908 if fan_in == 0 && fan_out == 0 {
3909 return "other".to_string();
3910 }
3911
3912 if fan_out > 0 && fan_in == 0 {
3914 "entry".to_string()
3915 } else if fan_in > fan_out * 2 {
3916 "utility".to_string()
3917 } else if fan_out > fan_in * 2 {
3918 "entry".to_string()
3919 } else {
3920 "service".to_string()
3921 }
3922}
3923
3924fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
3931 let mut dirs: HashMap<String, String> = HashMap::new();
3932 let files = collect_source_files(root)?;
3933
3934 for (rel_path, _) in &files {
3936 if let Some(first_dir) = rel_path.split('/').next() {
3937 if rel_path.contains('/') && !dirs.contains_key(first_dir) {
3938 let layer = classify_directory_layer(first_dir);
3939 dirs.insert(first_dir.to_string(), layer);
3940 }
3941 }
3942 }
3943
3944 let other_dirs: Vec<String> = dirs
3946 .iter()
3947 .filter(|(_, layer)| *layer == "other")
3948 .map(|(name, _)| name.clone())
3949 .collect();
3950
3951 if !other_dirs.is_empty() {
3952 if let Ok(edges) = build_import_graph(root) {
3954 let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
3955 for dir_name in &other_dirs {
3956 let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
3957 if inferred != "other" {
3958 dirs.insert(dir_name.clone(), inferred);
3959 }
3960 }
3961 }
3962 }
3963
3964 Ok(dirs)
3965}
3966
3967fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3969 let dirs_a = collect_arch_directories(dir_a)?;
3970 let dirs_b = collect_arch_directories(dir_b)?;
3971
3972 let all_dirs: BTreeSet<&str> = dirs_a
3973 .keys()
3974 .chain(dirs_b.keys())
3975 .map(|s| s.as_str())
3976 .collect();
3977
3978 let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
3979 let mut directories_added = 0usize;
3980 let mut directories_removed = 0usize;
3981 let mut layer_migrations = 0usize;
3982 let mut changed_dirs = 0usize;
3983 let total_dirs = all_dirs.len();
3984
3985 for dir_name in &all_dirs {
3986 let in_a = dirs_a.get(*dir_name);
3987 let in_b = dirs_b.get(*dir_name);
3988
3989 match (in_a, in_b) {
3990 (Some(layer_a), Some(layer_b)) => {
3991 if layer_a != layer_b {
3992 changed_dirs += 1;
3994 layer_migrations += 1;
3995 arch_changes.push(ArchLevelChange {
3996 directory: dir_name.to_string(),
3997 change_type: ArchChangeType::LayerMigration,
3998 old_layer: Some(layer_a.clone()),
3999 new_layer: Some(layer_b.clone()),
4000 migrated_functions: Vec::new(),
4001 });
4002 }
4003 }
4005 (None, Some(layer_b)) => {
4006 changed_dirs += 1;
4008 directories_added += 1;
4009 arch_changes.push(ArchLevelChange {
4010 directory: dir_name.to_string(),
4011 change_type: ArchChangeType::Added,
4012 old_layer: None,
4013 new_layer: Some(layer_b.clone()),
4014 migrated_functions: Vec::new(),
4015 });
4016 }
4017 (Some(layer_a), None) => {
4018 changed_dirs += 1;
4020 directories_removed += 1;
4021 arch_changes.push(ArchLevelChange {
4022 directory: dir_name.to_string(),
4023 change_type: ArchChangeType::Removed,
4024 old_layer: Some(layer_a.clone()),
4025 new_layer: None,
4026 migrated_functions: Vec::new(),
4027 });
4028 }
4029 (None, None) => unreachable!(),
4030 }
4031 }
4032
4033 let stability_score = if total_dirs == 0 {
4034 1.0
4035 } else {
4036 1.0 - (changed_dirs as f64 / total_dirs as f64)
4037 };
4038
4039 let summary = ArchDiffSummary {
4040 layer_migrations,
4041 directories_added,
4042 directories_removed,
4043 cycles_introduced: 0,
4044 cycles_resolved: 0,
4045 stability_score,
4046 };
4047
4048 let identical = arch_changes.is_empty();
4049
4050 Ok(DiffReport {
4051 file_a: dir_a.display().to_string(),
4052 file_b: dir_b.display().to_string(),
4053 identical,
4054 changes: Vec::new(),
4055 summary: None,
4056 granularity: DiffGranularity::Architecture,
4057 file_changes: None,
4058 module_changes: None,
4059 import_graph_summary: None,
4060 arch_changes: Some(arch_changes),
4061 arch_summary: Some(summary),
4062 })
4063}
4064
4065#[cfg(test)]
4070mod tests {
4071 use super::*;
4072
4073 const SAMPLE_A: &str = r#"
4074def original_function(x):
4075 return x * 2
4076
4077def renamed_later(a, b):
4078 return a + b
4079
4080def will_be_deleted():
4081 return "goodbye"
4082
4083class OriginalClass:
4084 def method_one(self):
4085 return 1
4086"#;
4087
4088 const SAMPLE_B: &str = r#"
4089def original_function(x):
4090 # Modified implementation
4091 return x * 3
4092
4093def better_name(a, b):
4094 return a + b
4095
4096def new_function():
4097 return "hello"
4098
4099class OriginalClass:
4100 def method_one(self):
4101 return 1
4102
4103 def method_two(self):
4104 return 2
4105"#;
4106
4107 fn parse_python(source: &str) -> tree_sitter::Tree {
4109 let pool = ParserPool::new();
4110 pool.parse(source, Language::Python).unwrap()
4111 }
4112
4113 #[test]
4114 fn test_extract_nodes() {
4115 let tree = parse_python(SAMPLE_A);
4116 let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4117
4118 assert!(
4120 nodes.len() >= 5,
4121 "Expected at least 5 nodes, got {}",
4122 nodes.len()
4123 );
4124
4125 let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4126 assert!(names.contains(&"original_function"));
4127 assert!(names.contains(&"renamed_later"));
4128 assert!(names.contains(&"will_be_deleted"));
4129 assert!(names.contains(&"OriginalClass"));
4130 assert!(names.contains(&"method_one"));
4131 }
4132
4133 #[test]
4134 fn test_detect_update() {
4135 let tree_a = parse_python(SAMPLE_A);
4136 let tree_b = parse_python(SAMPLE_B);
4137
4138 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4139 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4140
4141 let file_a = PathBuf::from("a.py");
4142 let file_b = PathBuf::from("b.py");
4143 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4144
4145 let updates: Vec<_> = changes
4147 .iter()
4148 .filter(|c| c.change_type == ChangeType::Update)
4149 .collect();
4150 assert!(!updates.is_empty(), "Should detect at least one update");
4151 assert!(
4152 updates
4153 .iter()
4154 .any(|c| c.name.as_deref() == Some("original_function")),
4155 "original_function should be marked as updated"
4156 );
4157 }
4158
4159 #[test]
4160 fn test_detect_insert() {
4161 let tree_a = parse_python(SAMPLE_A);
4162 let tree_b = parse_python(SAMPLE_B);
4163
4164 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4165 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4166
4167 let file_a = PathBuf::from("a.py");
4168 let file_b = PathBuf::from("b.py");
4169 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4170
4171 let inserts: Vec<_> = changes
4173 .iter()
4174 .filter(|c| c.change_type == ChangeType::Insert)
4175 .collect();
4176 assert!(!inserts.is_empty(), "Should detect insertions");
4177 }
4178
4179 #[test]
4180 fn test_detect_delete() {
4181 let tree_a = parse_python(SAMPLE_A);
4182 let tree_b = parse_python(SAMPLE_B);
4183
4184 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4185 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4186
4187 let file_a = PathBuf::from("a.py");
4188 let file_b = PathBuf::from("b.py");
4189 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4190
4191 let deletes: Vec<_> = changes
4193 .iter()
4194 .filter(|c| c.change_type == ChangeType::Delete)
4195 .collect();
4196 assert!(!deletes.is_empty(), "Should detect deletions");
4197 assert!(
4198 deletes
4199 .iter()
4200 .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4201 "will_be_deleted should be marked as deleted"
4202 );
4203 }
4204
4205 #[test]
4206 fn test_detect_rename() {
4207 let tree_a = parse_python(SAMPLE_A);
4208 let tree_b = parse_python(SAMPLE_B);
4209
4210 let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4211 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4212
4213 let file_a = PathBuf::from("a.py");
4214 let file_b = PathBuf::from("b.py");
4215 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4216
4217 let renames: Vec<_> = changes
4219 .iter()
4220 .filter(|c| c.change_type == ChangeType::Rename)
4221 .collect();
4222 assert!(!renames.is_empty(), "Should detect renames");
4223 }
4224
4225 #[test]
4226 fn test_identical_files() {
4227 let tree_a = parse_python(SAMPLE_A);
4228 let tree_b = parse_python(SAMPLE_A); let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4231 let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4232
4233 let file_a = PathBuf::from("a.py");
4234 let file_b = PathBuf::from("b.py");
4235 let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); assert!(
4238 changes.is_empty(),
4239 "Identical files should have no semantic changes"
4240 );
4241 }
4242
4243 #[test]
4244 fn test_compute_similarity() {
4245 assert_eq!(compute_similarity("abc", "abc"), 1.0);
4246 assert_eq!(compute_similarity("", ""), 1.0); assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); }
4249
4250 #[test]
4251 fn test_normalize_body() {
4252 let body = "def foo():\n # pure comment line\n return 1 # inline comment";
4254 let normalized = normalize_body(body);
4255 assert!(!normalized.contains('#'), "Comments should be removed");
4258 assert!(
4259 !normalized.contains("def foo"),
4260 "Signature should be skipped"
4261 );
4262 assert!(normalized.contains("return 1"), "Body should remain");
4263 }
4264
4265 #[test]
4270 fn test_format_diff_text_renders_file_changes() {
4271 let mut report = DiffReport::new("dir_a/", "dir_b/");
4272 report.identical = false;
4273 report.file_changes = Some(vec![
4274 FileLevelChange {
4275 relative_path: "src/main.py".to_string(),
4276 change_type: ChangeType::Update,
4277 old_fingerprint: Some(12345),
4278 new_fingerprint: Some(67890),
4279 signature_changes: Some(vec!["fn foo()".to_string()]),
4280 },
4281 FileLevelChange {
4282 relative_path: "src/new_module.py".to_string(),
4283 change_type: ChangeType::Insert,
4284 old_fingerprint: None,
4285 new_fingerprint: Some(11111),
4286 signature_changes: None,
4287 },
4288 FileLevelChange {
4289 relative_path: "src/removed.py".to_string(),
4290 change_type: ChangeType::Delete,
4291 old_fingerprint: Some(99999),
4292 new_fingerprint: None,
4293 signature_changes: None,
4294 },
4295 ]);
4296
4297 let text = format_diff_text(&report);
4298 assert!(
4299 text.contains("File-Level Changes"),
4300 "Should have file-level section header"
4301 );
4302 assert!(text.contains("src/main.py"), "Should mention updated file");
4303 assert!(
4304 text.contains("src/new_module.py"),
4305 "Should mention added file"
4306 );
4307 assert!(
4308 text.contains("src/removed.py"),
4309 "Should mention removed file"
4310 );
4311 }
4312
4313 #[test]
4314 fn test_format_diff_text_renders_module_changes() {
4315 let mut report = DiffReport::new("dir_a/", "dir_b/");
4316 report.identical = false;
4317 report.module_changes = Some(vec![ModuleLevelChange {
4318 module_path: "src/utils.py".to_string(),
4319 change_type: ChangeType::Update,
4320 imports_added: vec![ImportEdge {
4321 source_file: "src/utils.py".to_string(),
4322 target_module: "os.path".to_string(),
4323 imported_names: vec!["join".to_string()],
4324 }],
4325 imports_removed: vec![],
4326 file_change: None,
4327 }]);
4328
4329 let text = format_diff_text(&report);
4330 assert!(
4331 text.contains("Module-Level Changes"),
4332 "Should have module-level section header"
4333 );
4334 assert!(
4335 text.contains("src/utils.py"),
4336 "Should mention the module path"
4337 );
4338 assert!(
4339 text.contains("os.path"),
4340 "Should mention added import target"
4341 );
4342 }
4343
4344 #[test]
4345 fn test_format_diff_text_renders_import_graph_summary() {
4346 let mut report = DiffReport::new("dir_a/", "dir_b/");
4347 report.identical = false;
4348 report.import_graph_summary = Some(ImportGraphSummary {
4349 total_edges_a: 10,
4350 total_edges_b: 15,
4351 edges_added: 7,
4352 edges_removed: 2,
4353 modules_with_import_changes: 3,
4354 });
4355
4356 let text = format_diff_text(&report);
4357 assert!(
4358 text.contains("Import Graph"),
4359 "Should have import graph section"
4360 );
4361 assert!(text.contains("7"), "Should show edges added");
4362 assert!(text.contains("2"), "Should show edges removed");
4363 }
4364
4365 #[test]
4366 fn test_format_diff_text_renders_arch_changes() {
4367 let mut report = DiffReport::new("dir_a/", "dir_b/");
4368 report.identical = false;
4369 report.arch_changes = Some(vec![
4370 ArchLevelChange {
4371 directory: "src/api/".to_string(),
4372 change_type: ArchChangeType::LayerMigration,
4373 old_layer: Some("presentation".to_string()),
4374 new_layer: Some("business".to_string()),
4375 migrated_functions: vec!["handle_request".to_string()],
4376 },
4377 ArchLevelChange {
4378 directory: "src/new_service/".to_string(),
4379 change_type: ArchChangeType::Added,
4380 old_layer: None,
4381 new_layer: Some("service".to_string()),
4382 migrated_functions: vec![],
4383 },
4384 ]);
4385
4386 let text = format_diff_text(&report);
4387 assert!(
4388 text.contains("Architecture-Level Changes"),
4389 "Should have arch section header"
4390 );
4391 assert!(
4392 text.contains("src/api/"),
4393 "Should mention migrated directory"
4394 );
4395 assert!(text.contains("presentation"), "Should show old layer");
4396 assert!(text.contains("business"), "Should show new layer");
4397 assert!(
4398 text.contains("src/new_service/"),
4399 "Should mention added directory"
4400 );
4401 }
4402
4403 #[test]
4404 fn test_format_diff_text_renders_arch_summary() {
4405 let mut report = DiffReport::new("dir_a/", "dir_b/");
4406 report.identical = false;
4407 report.arch_summary = Some(ArchDiffSummary {
4408 layer_migrations: 2,
4409 directories_added: 1,
4410 directories_removed: 0,
4411 cycles_introduced: 1,
4412 cycles_resolved: 0,
4413 stability_score: 0.75,
4414 });
4415
4416 let text = format_diff_text(&report);
4417 assert!(
4418 text.contains("Architecture Summary"),
4419 "Should have arch summary section"
4420 );
4421 assert!(text.contains("0.75"), "Should show stability score");
4422 }
4423
4424 #[test]
4425 fn test_format_diff_text_identical_skips_higher_levels() {
4426 let mut report = DiffReport::new("a.py", "b.py");
4429 report.identical = true;
4430 report.file_changes = Some(vec![FileLevelChange {
4431 relative_path: "should_not_appear.py".to_string(),
4432 change_type: ChangeType::Insert,
4433 old_fingerprint: None,
4434 new_fingerprint: Some(1),
4435 signature_changes: None,
4436 }]);
4437
4438 let text = format_diff_text(&report);
4439 assert!(
4440 !text.contains("should_not_appear"),
4441 "Identical report should skip all change sections"
4442 );
4443 assert!(
4444 text.contains("No structural changes"),
4445 "Should show identical message"
4446 );
4447 }
4448}