Skip to main content

tldr_cli/commands/remaining/
diff.rs

1//! Diff command - AST-aware structural diff
2//!
3//! Compares two source files at the AST level, detecting:
4//! - Insert: new function/class/method
5//! - Delete: removed function/class/method
6//! - Update: modified body
7//! - Move: same content, different location
8//! - Rename: same body, different name
9//!
10//! # Example
11//!
12//! ```bash
13//! tldr diff old.py new.py
14//! tldr diff old.py new.py --semantic-only
15//! tldr diff old.py new.py --format text
16//! ```
17
18use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35    ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36    DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37    Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41// =============================================================================
42// Constants
43// =============================================================================
44
45/// Similarity threshold for detecting renames (0.0-1.0)
46const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48// =============================================================================
49// CLI Arguments
50// =============================================================================
51
52/// AST-aware structural diff between two files
53///
54/// Compares two source files at the AST level, detecting structural changes
55/// like inserted, deleted, updated, moved, and renamed functions/classes.
56///
57/// # Example
58///
59/// ```bash
60/// tldr diff old.py new.py
61/// tldr diff old.py new.py --semantic-only
62/// ```
63#[derive(Debug, Args)]
64pub struct DiffArgs {
65    /// First file (or directory for L6/L7/L8) to compare
66    pub file_a: PathBuf,
67
68    /// Second file (or directory for L6/L7/L8) to compare
69    pub file_b: PathBuf,
70
71    /// Diff granularity level
72    #[arg(long, short = 'g', default_value = "function")]
73    pub granularity: DiffGranularity,
74
75    /// Exclude formatting-only changes (comments, whitespace)
76    #[arg(long)]
77    pub semantic_only: bool,
78
79    /// Output file (optional, stdout if not specified)
80    #[arg(long, short = 'O')]
81    pub output: Option<PathBuf>,
82}
83
84// =============================================================================
85// Extracted Function Info
86// =============================================================================
87
88/// Information about an extracted function/class/method
89#[derive(Debug, Clone)]
90struct ExtractedNode {
91    /// Name of the function/class
92    name: String,
93    /// Kind of node
94    kind: NodeKind,
95    /// Line number (1-indexed)
96    line: u32,
97    /// End line number (1-indexed)
98    end_line: u32,
99    /// Column
100    column: u32,
101    /// Full source text (body)
102    body: String,
103    /// Normalized body (whitespace-insensitive)
104    normalized_body: String,
105    /// Parameters (for functions)
106    params: String,
107    /// Whether this is a method (inside a class)
108    is_method: bool,
109}
110
111impl ExtractedNode {
112    fn new(
113        name: impl Into<String>,
114        kind: NodeKind,
115        line: u32,
116        end_line: u32,
117        column: u32,
118        body: impl Into<String>,
119    ) -> Self {
120        let body_str: String = body.into();
121        let normalized = normalize_body(&body_str);
122        Self {
123            name: name.into(),
124            kind,
125            line,
126            end_line,
127            column,
128            body: body_str,
129            normalized_body: normalized,
130            params: String::new(),
131            is_method: false,
132        }
133    }
134
135    fn with_params(mut self, params: impl Into<String>) -> Self {
136        self.params = params.into();
137        self
138    }
139
140    fn with_method_kind(mut self) -> Self {
141        self.is_method = true;
142        if self.kind == NodeKind::Function {
143            self.kind = NodeKind::Method;
144        }
145        self
146    }
147}
148
149/// Normalize body for comparison (remove whitespace variations and comments)
150/// For rename detection, we skip the first line (function/class signature)
151/// and only compare the actual body content.
152fn normalize_body(body: &str) -> String {
153    body.lines()
154        .skip(1) // Skip signature line (def foo(): or class Bar:)
155        .map(|line| {
156            // Strip inline comments (simple approach: truncate at #)
157            let stripped = if let Some(pos) = line.find('#') {
158                // Make sure it's not inside a string
159                // Simple heuristic: if there's a # before any quote, strip it
160                let before_hash = &line[..pos];
161                let single_quotes = before_hash.matches('\'').count();
162                let double_quotes = before_hash.matches('"').count();
163                // If quotes are balanced (even count), it's a real comment
164                if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165                    &line[..pos]
166                } else {
167                    line
168                }
169            } else {
170                line
171            };
172            stripped.trim()
173        })
174        .filter(|line| !line.is_empty())
175        .collect::<Vec<_>>()
176        .join("\n")
177}
178
179// =============================================================================
180// Implementation
181// =============================================================================
182
183impl DiffArgs {
184    /// Run the diff command and return the structured report.
185    ///
186    /// This is the internal workhorse: it dispatches to the appropriate
187    /// algorithm based on `self.granularity` and returns a `DiffReport`
188    /// without any output formatting.
189    pub fn run_to_report(&self) -> Result<DiffReport> {
190        // Validate paths exist
191        if !self.file_a.exists() {
192            return Err(RemainingError::file_not_found(&self.file_a).into());
193        }
194        if !self.file_b.exists() {
195            return Err(RemainingError::file_not_found(&self.file_b).into());
196        }
197
198        match self.granularity {
199            DiffGranularity::File => {
200                // L6: directory-level structural fingerprint diff
201                if !self.file_a.is_dir() || !self.file_b.is_dir() {
202                    bail!("File-level (L6) diff requires directories, not individual files");
203                }
204                run_file_level_diff(&self.file_a, &self.file_b)
205            }
206            DiffGranularity::Module => {
207                // L7: module-level import graph diff
208                if !self.file_a.is_dir() || !self.file_b.is_dir() {
209                    bail!("Module-level (L7) diff requires directories, not individual files");
210                }
211                run_module_level_diff(&self.file_a, &self.file_b)
212            }
213            DiffGranularity::Architecture => {
214                // L8: architecture-level diff
215                if !self.file_a.is_dir() || !self.file_b.is_dir() {
216                    bail!(
217                        "Architecture-level (L8) diff requires directories, not individual files"
218                    );
219                }
220                run_arch_level_diff(&self.file_a, &self.file_b)
221            }
222            DiffGranularity::Class => {
223                // L5: class-level diff (supports both files and directories)
224                if self.file_a.is_dir() && self.file_b.is_dir() {
225                    run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226                } else {
227                    run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228                }
229            }
230            DiffGranularity::Statement => {
231                // L3: statement-level diff (Zhang-Shasha tree edit distance)
232                self.run_statement_level_diff()
233            }
234            DiffGranularity::Token => {
235                // L1: token-level diff using difftastic graph-based algorithm
236                self.run_token_level_diff()
237            }
238            DiffGranularity::Expression => {
239                // L2: expression-level diff (stub -- uses L1 until Phase 6)
240                self.run_expression_level_diff()
241            }
242            _ => {
243                // L4 and below: function-level diff (original behavior)
244                self.run_function_level_diff()
245            }
246        }
247    }
248
249    /// Run the diff command with output formatting.
250    pub fn run(&self, format: OutputFormat) -> Result<()> {
251        let report = self.run_to_report()?;
252
253        // Output
254        match format {
255            OutputFormat::Json => {
256                let json = serde_json::to_string_pretty(&report)?;
257                if let Some(ref output_path) = self.output {
258                    fs::write(output_path, &json)?;
259                } else {
260                    println!("{}", json);
261                }
262            }
263            OutputFormat::Text => {
264                let text = format_diff_text(&report);
265                if let Some(ref output_path) = self.output {
266                    fs::write(output_path, &text)?;
267                } else {
268                    println!("{}", text);
269                }
270            }
271            OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272                // Other formats not supported for diff, fall back to JSON
273                let json = serde_json::to_string_pretty(&report)?;
274                println!("{}", json);
275            }
276        }
277
278        Ok(())
279    }
280
281    /// Original L4 function-level diff implementation.
282    fn run_function_level_diff(&self) -> Result<DiffReport> {
283        // Detect language from file_a extension
284        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285            let ext = self
286                .file_a
287                .extension()
288                .map(|e| e.to_string_lossy().to_string())
289                .unwrap_or_else(|| "unknown".to_string());
290            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291        })?;
292
293        // Read file contents
294        let source_a = fs::read_to_string(&self.file_a)?;
295        let source_b = fs::read_to_string(&self.file_b)?;
296
297        // Parse both files using language-aware parser
298        let pool = ParserPool::new();
299        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301        })?;
302        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304        })?;
305
306        // Extract nodes from both files
307        let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308        let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310        // Detect changes
311        let changes = detect_changes(
312            &nodes_a,
313            &nodes_b,
314            &self.file_a,
315            &self.file_b,
316            self.semantic_only,
317        );
318
319        // Build summary
320        let mut summary = DiffSummary::default();
321        for change in &changes {
322            summary.total_changes += 1;
323            if change.change_type != ChangeType::Format {
324                summary.semantic_changes += 1;
325            }
326            match change.change_type {
327                ChangeType::Insert => summary.inserts += 1,
328                ChangeType::Delete => summary.deletes += 1,
329                ChangeType::Update => summary.updates += 1,
330                ChangeType::Move => summary.moves += 1,
331                ChangeType::Rename => summary.renames += 1,
332                ChangeType::Format => summary.formats += 1,
333                ChangeType::Extract => summary.extracts += 1,
334                ChangeType::Inline => {}
335            }
336        }
337
338        // Build report
339        let report = DiffReport {
340            file_a: self.file_a.display().to_string(),
341            file_b: self.file_b.display().to_string(),
342            identical: changes.is_empty(),
343            changes,
344            summary: Some(summary),
345            granularity: self.granularity,
346            file_changes: None,
347            module_changes: None,
348            import_graph_summary: None,
349            arch_changes: None,
350            arch_summary: None,
351        };
352
353        Ok(report)
354    }
355
356    /// L1 Token-level diff using difftastic's graph-based algorithm.
357    ///
358    /// Pipeline:
359    /// 1. Read files and detect language
360    /// 2. Parse with tree-sitter
361    /// 3. Convert to difftastic Syntax trees
362    /// 4. Run unchanged marking, Dijkstra graph diff, slider fixup
363    /// 5. Convert ChangeMap to DiffReport via changemap_to_report
364    fn run_token_level_diff(&self) -> Result<DiffReport> {
365        use super::difftastic;
366        use typed_arena::Arena;
367
368        // Detect language from file_a extension
369        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370            let ext = self
371                .file_a
372                .extension()
373                .map(|e| e.to_string_lossy().to_string())
374                .unwrap_or_else(|| "unknown".to_string());
375            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376        })?;
377
378        // Read file contents
379        let lhs_src = fs::read_to_string(&self.file_a)?;
380        let rhs_src = fs::read_to_string(&self.file_b)?;
381
382        // Get language config for difftastic tree-sitter conversion
383        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385        // Parse both files using existing tree-sitter infrastructure
386        let pool = ParserPool::new();
387        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389        })?;
390        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392        })?;
393
394        // Convert tree-sitter trees to difftastic Syntax trees
395        let arena = Arena::new();
396        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398        );
399
400        // Run diff pipeline
401        let mut change_map = difftastic::changes::ChangeMap::default();
402
403        // Phase 1: Mark unchanged nodes (structural matching)
404        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406        // Phase 2: Run Dijkstra graph diff on each changed chunk
407        for (lhs_chunk, rhs_chunk) in &chunks {
408            match (lhs_chunk.first(), rhs_chunk.first()) {
409                (Some(lhs_first), Some(rhs_first)) => {
410                    if difftastic::dijkstra::mark_syntax(
411                        Some(*lhs_first),
412                        Some(*rhs_first),
413                        &mut change_map,
414                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415                    )
416                    .is_err()
417                    {
418                        // Graph limit exceeded -- mark all nodes as Novel
419                        for node in lhs_chunk {
420                            difftastic::changes::insert_deep_novel(node, &mut change_map);
421                        }
422                        for node in rhs_chunk {
423                            difftastic::changes::insert_deep_novel(node, &mut change_map);
424                        }
425                    }
426                }
427                (Some(_), None) => {
428                    // LHS has nodes, RHS is empty -- all LHS nodes are Novel (deleted)
429                    for node in lhs_chunk {
430                        difftastic::changes::insert_deep_novel(node, &mut change_map);
431                    }
432                }
433                (None, Some(_)) => {
434                    // RHS has nodes, LHS is empty -- all RHS nodes are Novel (inserted)
435                    for node in rhs_chunk {
436                        difftastic::changes::insert_deep_novel(node, &mut change_map);
437                    }
438                }
439                (None, None) => {
440                    // Both sides empty -- nothing to do
441                }
442            }
443        }
444
445        // Phase 3: Fix sliders for better alignment
446        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449        // Convert to DiffReport
450        let fa = self.file_a.display().to_string();
451        let fb = self.file_b.display().to_string();
452        Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453            &lhs_nodes,
454            &rhs_nodes,
455            &change_map,
456            &fa,
457            &fb,
458        ))
459    }
460
461    /// L2 Expression-level diff using difftastic with expression grouping.
462    ///
463    /// Same diff pipeline as L1 (unchanged marking, Dijkstra, slider fixup)
464    /// but converts the ChangeMap via `changemap_to_l2_report`, which groups
465    /// token changes under their nearest `Syntax::List` parent.
466    fn run_expression_level_diff(&self) -> Result<DiffReport> {
467        use super::difftastic;
468        use typed_arena::Arena;
469
470        // Detect language from file_a extension
471        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472            let ext = self
473                .file_a
474                .extension()
475                .map(|e| e.to_string_lossy().to_string())
476                .unwrap_or_else(|| "unknown".to_string());
477            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478        })?;
479
480        // Read file contents
481        let lhs_src = fs::read_to_string(&self.file_a)?;
482        let rhs_src = fs::read_to_string(&self.file_b)?;
483
484        // Get language config for difftastic tree-sitter conversion
485        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487        // Parse both files using existing tree-sitter infrastructure
488        let pool = ParserPool::new();
489        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491        })?;
492        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494        })?;
495
496        // Convert tree-sitter trees to difftastic Syntax trees
497        let arena = Arena::new();
498        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500        );
501
502        // Run diff pipeline
503        let mut change_map = difftastic::changes::ChangeMap::default();
504
505        // Phase 1: Mark unchanged nodes (structural matching)
506        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508        // Phase 2: Run Dijkstra graph diff on each changed chunk
509        for (lhs_chunk, rhs_chunk) in &chunks {
510            match (lhs_chunk.first(), rhs_chunk.first()) {
511                (Some(lhs_first), Some(rhs_first)) => {
512                    if difftastic::dijkstra::mark_syntax(
513                        Some(*lhs_first),
514                        Some(*rhs_first),
515                        &mut change_map,
516                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517                    )
518                    .is_err()
519                    {
520                        for node in lhs_chunk {
521                            difftastic::changes::insert_deep_novel(node, &mut change_map);
522                        }
523                        for node in rhs_chunk {
524                            difftastic::changes::insert_deep_novel(node, &mut change_map);
525                        }
526                    }
527                }
528                (Some(_), None) => {
529                    for node in lhs_chunk {
530                        difftastic::changes::insert_deep_novel(node, &mut change_map);
531                    }
532                }
533                (None, Some(_)) => {
534                    for node in rhs_chunk {
535                        difftastic::changes::insert_deep_novel(node, &mut change_map);
536                    }
537                }
538                (None, None) => {}
539            }
540        }
541
542        // Phase 3: Fix sliders for better alignment
543        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546        // Convert to DiffReport using L2 expression grouping
547        let fa = self.file_a.display().to_string();
548        let fb = self.file_b.display().to_string();
549        Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550            &lhs_nodes,
551            &rhs_nodes,
552            &change_map,
553            &fa,
554            &fb,
555        ))
556    }
557}
558
559// =============================================================================
560// Tree-sitter Parsing
561// =============================================================================
562
563/// Get text for a node from source
564fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565    node.utf8_text(source).unwrap_or("")
566}
567
568/// Get the class-like node kinds for each language
569fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570    match language {
571        Language::Python => &["class_definition"],
572        Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573        Language::Go => &["type_declaration"],
574        Language::Rust => &["struct_item", "enum_item", "impl_item"],
575        Language::Java => &[
576            "class_declaration",
577            "interface_declaration",
578            "enum_declaration",
579        ],
580        Language::C => &["struct_specifier", "enum_specifier"],
581        Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582        Language::Ruby => &["class", "module"],
583        Language::Php => &["class_declaration", "interface_declaration"],
584        Language::CSharp => &[
585            "class_declaration",
586            "interface_declaration",
587            "struct_declaration",
588        ],
589        Language::Kotlin => &["class_declaration", "object_declaration"],
590        Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591        Language::Swift => &[
592            "class_declaration",
593            "struct_declaration",
594            "protocol_declaration",
595        ],
596        Language::Elixir => &["call"],         // defmodule is a call
597        Language::Lua | Language::Luau => &[], // Lua has no class syntax
598        Language::Ocaml => &["module_definition", "type_definition"],
599    }
600}
601
602/// Get the node kinds that represent class body containers for method extraction
603fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604    match language {
605        Language::Python => &["block"],
606        Language::TypeScript | Language::JavaScript => &["class_body"],
607        Language::Go => &[], // Go methods are not nested in type declarations
608        Language::Rust => &["declaration_list"], // impl_item body
609        Language::Java => &["class_body"],
610        Language::C | Language::Cpp => &["field_declaration_list"],
611        Language::Ruby => &["body_statement"],
612        Language::Php => &["declaration_list"],
613        Language::CSharp => &["declaration_list"],
614        Language::Kotlin => &["class_body"],
615        Language::Scala => &["template_body"],
616        Language::Swift => &["class_body"],
617        Language::Elixir => &["do_block"],
618        Language::Lua | Language::Luau => &[],
619        Language::Ocaml => &[],
620    }
621}
622
623// =============================================================================
624// Node Extraction
625// =============================================================================
626
627/// Extract all functions, classes, and methods from AST
628fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629    let mut nodes = Vec::new();
630    let kinds = NodeKindSets {
631        func: get_function_node_kinds(lang),
632        class: get_class_node_kinds(lang),
633        body: get_class_body_kinds(lang),
634    };
635    extract_nodes_recursive(root, source, &mut nodes, false, lang, &kinds);
636    nodes
637}
638
639struct NodeKindSets<'a> {
640    func: &'a [&'a str],
641    class: &'a [&'a str],
642    body: &'a [&'a str],
643}
644
645fn extract_nodes_recursive(
646    node: Node,
647    source: &[u8],
648    nodes: &mut Vec<ExtractedNode>,
649    in_class: bool,
650    lang: Language,
651    kinds: &NodeKindSets<'_>,
652) {
653    let kind = node.kind();
654
655    // OCaml-specific: function-kinds are `value_definition` AND
656    // `let_binding`. The tree-sitter shape is:
657    //   value_definition -> let_binding -> pattern: <name>
658    // Plus, `let_binding` ALSO appears nested inside expressions
659    // (`let _ = expr in body`), where it is NOT a function definition.
660    // VAL-018: filter to top-level value_definition only, and require a
661    // parameter (mirrors `extract_ocaml_functions` in
662    // crates/tldr-core/src/ast/extractor.rs:1132). Skip nested
663    // let_bindings inside function bodies and anonymous `_` bindings.
664    if lang == Language::Ocaml && kind == "value_definition" {
665        for child in node.children(&mut node.walk()) {
666            if child.kind() == "let_binding" && ocaml_let_binding_is_function(child) {
667                if let Some(extracted) = extract_function_node(child, source, in_class, lang) {
668                    // Skip anonymous `_` patterns and `()` unit bindings.
669                    if extracted.name != "_" && extracted.name != "()" && !extracted.name.is_empty()
670                    {
671                        nodes.push(extracted);
672                    }
673                }
674            }
675        }
676        // Don't recurse — we've already extracted the function. Inner
677        // let-bindings (e.g. `let _ = helper () in ...`) are body
678        // expressions, not functions.
679        return;
680    }
681    if lang == Language::Ocaml && kind == "let_binding" {
682        // Bare let_binding outside a value_definition: only valid as a
683        // top-level definition without a wrapping value_definition,
684        // which is not the canonical form. Don't extract; recurse normally.
685        // (Tree-sitter usually wraps top-level lets in value_definition.)
686        for child in node.children(&mut node.walk()) {
687            extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
688        }
689        return;
690    }
691
692    // AGG13-11 (quality-metrics-and-schema-v1): OCaml interface files
693    // (.mli) declare exported functions/values via `val name : type`,
694    // which tree-sitter parses as `value_specification`. Pre-fix the
695    // diff extractor only knew about `value_definition` (the .ml
696    // `let name ... = body` form), so .mli files extracted ZERO
697    // function nodes. That made `tldr diff dag.ml dag.mli` report
698    // `identical: true` (or near-identical) even when the impl was
699    // 102 LOC and the interface was 16 LOC. Treating each `val`
700    // declaration as a function lets the diff pair .ml `let` bindings
701    // against their .mli `val` declarations and surface the body
702    // diff (deleted on the .mli side, present on the .ml side).
703    if lang == Language::Ocaml && kind == "value_specification" {
704        if let Some(extracted) = extract_ocaml_value_spec(node, source) {
705            nodes.push(extracted);
706        }
707        return;
708    }
709
710    // Check if this is a function node
711    if kinds.func.contains(&kind) {
712        if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
713            nodes.push(extracted);
714        }
715    }
716    // Check if this is a class node
717    else if kinds.class.contains(&kind) {
718        if let Some(extracted) = extract_class_node(node, source, lang) {
719            nodes.push(extracted);
720        }
721        // Extract methods inside the class body
722        for child in node.children(&mut node.walk()) {
723            if kinds.body.contains(&child.kind()) {
724                extract_nodes_recursive(child, source, nodes, true, lang, kinds);
725            }
726        }
727        return; // Don't recurse further - we handled the body
728    }
729
730    // Recurse into children
731    for child in node.children(&mut node.walk()) {
732        extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
733    }
734}
735
736/// True if an OCaml `let_binding` node has at least one `parameter`
737/// child — i.e. it's a function definition rather than a value binding.
738/// Mirrors `ocaml_binding_has_params_simple` in
739/// `crates/tldr-core/src/ast/extractor.rs:1158`.
740fn ocaml_let_binding_is_function(node: Node) -> bool {
741    for child in node.children(&mut node.walk()) {
742        if child.kind() == "parameter" {
743            return true;
744        }
745    }
746    false
747}
748
749/// Extract an OCaml `value_specification` (the `val name : type` form
750/// found in .mli interface files) as an [`ExtractedNode`]. AGG13-11
751/// (quality-metrics-and-schema-v1): without this, .mli files surface
752/// zero function nodes in the diff extractor and any `tldr diff
753/// foo.ml foo.mli` reports identical=true (or near-identical),
754/// missing the actual implementation/interface delta.
755///
756/// Tree-sitter-ocaml shape:
757/// `value_specification` -> "val" `value_name` ":" `typed` (optional `type_constraint`)
758/// We use `value_name` for the extracted node's `name` so it pairs
759/// against the matching `let_binding` (.ml side).
760fn extract_ocaml_value_spec(node: Node, source: &[u8]) -> Option<ExtractedNode> {
761    // Find the value_name child (the actual function/value name).
762    let mut name = None;
763    for child in node.children(&mut node.walk()) {
764        if child.kind() == "value_name" {
765            name = Some(node_text(child, source).to_string());
766            break;
767        }
768    }
769    let name = name?;
770    if name.is_empty() {
771        return None;
772    }
773
774    let line = node.start_position().row as u32 + 1;
775    let end_line = node.end_position().row as u32 + 1;
776    let column = node.start_position().column as u32;
777    let body = node_text(node, source).to_string();
778
779    Some(ExtractedNode::new(
780        name,
781        NodeKind::Function,
782        line,
783        end_line,
784        column,
785        body,
786    ))
787}
788
789fn extract_function_node(
790    node: Node,
791    source: &[u8],
792    is_method: bool,
793    lang: Language,
794) -> Option<ExtractedNode> {
795    // Use language-aware name extraction from function_finder
796    let source_str = std::str::from_utf8(source).unwrap_or("");
797    let func_name = get_function_name(node, lang, source_str)?;
798
799    // Try to extract parameters (varies by language but most use "parameters" or "formal_parameters")
800    let params = node
801        .child_by_field_name("parameters")
802        .or_else(|| node.child_by_field_name("formal_parameters"))
803        .map(|p| node_text(p, source).to_string())
804        .unwrap_or_default();
805
806    let line = node.start_position().row as u32 + 1;
807    let end_line = node.end_position().row as u32 + 1;
808    let column = node.start_position().column as u32;
809    let body = node_text(node, source).to_string();
810
811    let mut extracted =
812        ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
813            .with_params(params);
814
815    if is_method {
816        extracted = extracted.with_method_kind();
817    }
818
819    Some(extracted)
820}
821
822fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
823    // Get class name - most languages use "name" field
824    let class_name = node
825        .child_by_field_name("name")
826        .map(|n| node_text(n, source).to_string())
827        .or_else(|| {
828            // Fallback: search for first identifier child
829            let mut cursor = node.walk();
830            for child in node.children(&mut cursor) {
831                if child.kind() == "identifier"
832                    || child.kind() == "type_identifier"
833                    || child.kind() == "constant"
834                {
835                    return Some(node_text(child, source).to_string());
836                }
837            }
838            None
839        })?;
840
841    // Skip empty names
842    if class_name.is_empty() {
843        return None;
844    }
845
846    // For Elixir defmodule, filter to only actual module definitions
847    if lang == Language::Elixir && node.kind() == "call" {
848        let first_child = node.child(0)?;
849        let first_text = node_text(first_child, source);
850        if first_text != "defmodule" {
851            return None;
852        }
853        // Module name is in the arguments
854        if let Some(args) = node.child(1) {
855            let name = node_text(args, source).to_string();
856            if !name.is_empty() {
857                let line = node.start_position().row as u32 + 1;
858                let end_line = node.end_position().row as u32 + 1;
859                let column = node.start_position().column as u32;
860                let body = node_text(node, source).to_string();
861                return Some(ExtractedNode::new(
862                    name,
863                    NodeKind::Class,
864                    line,
865                    end_line,
866                    column,
867                    body,
868                ));
869            }
870        }
871        return None;
872    }
873
874    let line = node.start_position().row as u32 + 1;
875    let end_line = node.end_position().row as u32 + 1;
876    let column = node.start_position().column as u32;
877    let body = node_text(node, source).to_string();
878
879    Some(ExtractedNode::new(
880        class_name,
881        NodeKind::Class,
882        line,
883        end_line,
884        column,
885        body,
886    ))
887}
888
889// =============================================================================
890// Change Detection
891// =============================================================================
892
893/// Detect changes between two sets of nodes
894fn detect_changes(
895    nodes_a: &[ExtractedNode],
896    nodes_b: &[ExtractedNode],
897    file_a: &Path,
898    file_b: &Path,
899    semantic_only: bool,
900) -> Vec<ASTChange> {
901    let mut changes = Vec::new();
902
903    // real-repo-fixes-v1 (P9.BUG-R8): build a multi-value index keyed by
904    // node name so overloads (`@overload def locate_app(...)` × N) and
905    // duplicate-named methods across classes (`__init__` in flask's
906    // `ScriptInfo` vs `AppGroup`) pair up by structural identity instead
907    // of collapsing into a single map entry. The previous
908    // `HashMap<&str, &ExtractedNode>` kept only the *last* node per name,
909    // so `tldr diff <file> <file>` falsely reported every overload as an
910    // update and every duplicate-named method as moved.
911    let mut index_b: HashMap<&str, Vec<usize>> = HashMap::new();
912    for (j, n) in nodes_b.iter().enumerate() {
913        index_b.entry(n.name.as_str()).or_default().push(j);
914    }
915
916    // Track which nodes have been matched
917    let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
918    let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
919
920    // First pass: exact name matches with stable best-of pairing.
921    //
922    // For each A node, pick the unmatched B node with the same name that
923    // best matches by (kind, body, line) — in that priority. Self-diff
924    // (every A == every B) lands on the line-aligned twin every time, so
925    // `total_changes == 0` and `identical == true`.
926    for (i, node_a) in nodes_a.iter().enumerate() {
927        // Reserved field on `ExtractedNode` — kept because callers may
928        // surface it in future. See struct comment.
929        let _ = node_a.end_line;
930        let candidates = match index_b.get(node_a.name.as_str()) {
931            Some(c) => c,
932            None => continue,
933        };
934
935        let chosen = candidates
936            .iter()
937            .copied()
938            .filter(|&j| !matched_b[j])
939            .min_by_key(|&j| {
940                let n_b = &nodes_b[j];
941                // Lower is better. Priority order: same kind, then exact
942                // body match, then closest line. is_method tie-break
943                // distinguishes `__init__` of class A vs class B in the
944                // common case where each class has its own.
945                let kind_mismatch = (node_a.kind != n_b.kind) as u32;
946                let method_mismatch = (node_a.is_method != n_b.is_method) as u32;
947                let body_mismatch = (node_a.normalized_body != n_b.normalized_body) as u32;
948                let line_diff =
949                    (node_a.line as i64 - n_b.line as i64).unsigned_abs() as u32;
950                (kind_mismatch, method_mismatch, body_mismatch, line_diff)
951            });
952
953        if let Some(j) = chosen {
954            matched_a[i] = true;
955            matched_b[j] = true;
956            let node_b = &nodes_b[j];
957
958            // Check if body changed
959            if node_a.normalized_body != node_b.normalized_body {
960                // It's an update
961                changes.push(ASTChange {
962                    change_type: ChangeType::Update,
963                    node_kind: node_a.kind,
964                    name: Some(node_a.name.clone()),
965                    old_location: Some(Location::with_column(
966                        file_a.display().to_string(),
967                        node_a.line,
968                        node_a.column,
969                    )),
970                    new_location: Some(Location::with_column(
971                        file_b.display().to_string(),
972                        node_b.line,
973                        node_b.column,
974                    )),
975                    old_text: Some(node_a.body.clone()),
976                    new_text: Some(node_b.body.clone()),
977                    similarity: Some(compute_similarity(
978                        &node_a.normalized_body,
979                        &node_b.normalized_body,
980                    )),
981                    children: None,
982                    base_changes: None,
983                });
984            } else if node_a.line != node_b.line && !semantic_only {
985                // Same content but moved - only report if not semantic_only
986                changes.push(ASTChange {
987                    change_type: ChangeType::Move,
988                    node_kind: node_a.kind,
989                    name: Some(node_a.name.clone()),
990                    old_location: Some(Location::with_column(
991                        file_a.display().to_string(),
992                        node_a.line,
993                        node_a.column,
994                    )),
995                    new_location: Some(Location::with_column(
996                        file_b.display().to_string(),
997                        node_b.line,
998                        node_b.column,
999                    )),
1000                    old_text: None,
1001                    new_text: None,
1002                    similarity: Some(1.0),
1003                    children: None,
1004                    base_changes: None,
1005                });
1006            }
1007        }
1008    }
1009
1010    // Collect unmatched nodes
1011    let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
1012        .iter()
1013        .enumerate()
1014        .filter(|(i, _)| !matched_a[*i])
1015        .collect();
1016    let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
1017        .iter()
1018        .enumerate()
1019        .filter(|(i, _)| !matched_b[*i])
1020        .collect();
1021
1022    // Second pass: detect renames (same body, different name)
1023    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
1024
1025    for (_, node_a) in &unmatched_a {
1026        let mut best_match: Option<(usize, f64)> = None;
1027
1028        for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1029            if used_b[j] {
1030                continue;
1031            }
1032            if node_a.kind != node_b.kind {
1033                continue;
1034            }
1035
1036            let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
1037            if similarity >= RENAME_SIMILARITY_THRESHOLD
1038                && (best_match.is_none() || similarity > best_match.unwrap().1)
1039            {
1040                best_match = Some((j, similarity));
1041            }
1042        }
1043
1044        if let Some((j, similarity)) = best_match {
1045            let (_, node_b) = unmatched_b[j];
1046            used_b[j] = true;
1047
1048            // Mark as renamed
1049            changes.push(ASTChange {
1050                change_type: ChangeType::Rename,
1051                node_kind: node_a.kind,
1052                name: Some(node_a.name.clone()),
1053                old_location: Some(Location::with_column(
1054                    file_a.display().to_string(),
1055                    node_a.line,
1056                    node_a.column,
1057                )),
1058                new_location: Some(Location::with_column(
1059                    file_b.display().to_string(),
1060                    node_b.line,
1061                    node_b.column,
1062                )),
1063                old_text: Some(node_a.name.clone()),
1064                new_text: Some(node_b.name.clone()),
1065                similarity: Some(similarity),
1066                children: None,
1067                base_changes: None,
1068            });
1069        }
1070    }
1071
1072    // Remaining unmatched in A are deletes
1073    for (_, node_a) in &unmatched_a {
1074        // Check if already matched as rename
1075        let is_renamed = changes
1076            .iter()
1077            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
1078        if !is_renamed {
1079            changes.push(ASTChange {
1080                change_type: ChangeType::Delete,
1081                node_kind: node_a.kind,
1082                name: Some(node_a.name.clone()),
1083                old_location: Some(Location::with_column(
1084                    file_a.display().to_string(),
1085                    node_a.line,
1086                    node_a.column,
1087                )),
1088                new_location: None,
1089                old_text: None,
1090                new_text: None,
1091                similarity: None,
1092                children: None,
1093                base_changes: None,
1094            });
1095        }
1096    }
1097
1098    // Remaining unmatched in B are inserts
1099    for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1100        if !used_b[j] {
1101            changes.push(ASTChange {
1102                change_type: ChangeType::Insert,
1103                node_kind: node_b.kind,
1104                name: Some(node_b.name.clone()),
1105                old_location: None,
1106                new_location: Some(Location::with_column(
1107                    file_b.display().to_string(),
1108                    node_b.line,
1109                    node_b.column,
1110                )),
1111                old_text: None,
1112                new_text: None,
1113                similarity: None,
1114                children: None,
1115                base_changes: None,
1116            });
1117        }
1118    }
1119
1120    // Sort changes: deletes, renames, updates, inserts
1121    changes.sort_by_key(|c| match c.change_type {
1122        ChangeType::Delete => 0,
1123        ChangeType::Rename => 1,
1124        ChangeType::Update => 2,
1125        ChangeType::Move => 3,
1126        ChangeType::Insert => 4,
1127        _ => 5,
1128    });
1129
1130    changes
1131}
1132
1133// =============================================================================
1134// Similarity Computation
1135// =============================================================================
1136
1137/// Compute similarity between two strings using Jaccard on lines,
1138/// with a character-level fallback for short/single-line bodies.
1139fn compute_similarity(a: &str, b: &str) -> f64 {
1140    if a == b {
1141        return 1.0;
1142    }
1143    if a.is_empty() || b.is_empty() {
1144        return 0.0;
1145    }
1146
1147    // Jaccard similarity on lines
1148    let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1149    let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1150
1151    let intersection = lines_a.intersection(&lines_b).count();
1152    let union = lines_a.union(&lines_b).count();
1153
1154    let line_sim = if union == 0 {
1155        0.0
1156    } else {
1157        intersection as f64 / union as f64
1158    };
1159
1160    // For short bodies (few lines), also compute character-level similarity
1161    // to avoid 0.0 when a single line was slightly modified
1162    if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1163        return char_jaccard_similarity(a, b);
1164    }
1165
1166    line_sim
1167}
1168
1169/// Character-level Jaccard similarity (bigrams).
1170fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1171    if a.len() < 2 || b.len() < 2 {
1172        return if a == b { 1.0 } else { 0.0 };
1173    }
1174
1175    let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1176    let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1177
1178    let intersection = bigrams_a.intersection(&bigrams_b).count();
1179    let union = bigrams_a.union(&bigrams_b).count();
1180
1181    if union == 0 {
1182        0.0
1183    } else {
1184        intersection as f64 / union as f64
1185    }
1186}
1187
1188// =============================================================================
1189// Text Formatting
1190// =============================================================================
1191
1192/// Format diff report as human-readable text
1193fn format_diff_text(report: &DiffReport) -> String {
1194    let mut out = String::new();
1195
1196    out.push_str("Diff Report\n");
1197    out.push_str("===========\n\n");
1198    out.push_str(&format!("File A: {}\n", report.file_a));
1199    out.push_str(&format!("File B: {}\n", report.file_b));
1200    out.push_str(&format!("Identical: {}\n\n", report.identical));
1201
1202    if report.identical {
1203        out.push_str("No structural changes detected.\n");
1204        return out;
1205    }
1206
1207    out.push_str("Changes:\n");
1208    out.push_str("--------\n");
1209
1210    for change in &report.changes {
1211        let change_type = match change.change_type {
1212            ChangeType::Insert => "+",
1213            ChangeType::Delete => "-",
1214            ChangeType::Update => "~",
1215            ChangeType::Move => ">",
1216            ChangeType::Rename => "R",
1217            ChangeType::Format => "F",
1218            ChangeType::Extract => "E",
1219            ChangeType::Inline => "I",
1220        };
1221
1222        let kind = match change.node_kind {
1223            NodeKind::Function => "function",
1224            NodeKind::Class => "class",
1225            NodeKind::Method => "method",
1226            NodeKind::Field => "field",
1227            NodeKind::Statement => "statement",
1228            NodeKind::Expression => "expression",
1229            NodeKind::Block => "block",
1230        };
1231
1232        let name = change.name.as_deref().unwrap_or("<unknown>");
1233
1234        match change.change_type {
1235            ChangeType::Insert => {
1236                if let Some(ref loc) = change.new_location {
1237                    out.push_str(&format!(
1238                        "  {} {} {} at {}:{}\n",
1239                        change_type, kind, name, loc.file, loc.line
1240                    ));
1241                }
1242            }
1243            ChangeType::Delete => {
1244                if let Some(ref loc) = change.old_location {
1245                    out.push_str(&format!(
1246                        "  {} {} {} at {}:{}\n",
1247                        change_type, kind, name, loc.file, loc.line
1248                    ));
1249                }
1250            }
1251            ChangeType::Update | ChangeType::Move => {
1252                if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1253                {
1254                    out.push_str(&format!(
1255                        "  {} {} {} from {}:{} to {}:{}\n",
1256                        change_type, kind, name, old.file, old.line, new.file, new.line
1257                    ));
1258                }
1259            }
1260            ChangeType::Rename => {
1261                let old_name = change.old_text.as_deref().unwrap_or(name);
1262                let new_name = change.new_text.as_deref().unwrap_or(name);
1263                out.push_str(&format!(
1264                    "  {} {} {} -> {}\n",
1265                    change_type, kind, old_name, new_name
1266                ));
1267            }
1268            _ => {
1269                out.push_str(&format!("  {} {} {}\n", change_type, kind, name));
1270            }
1271        }
1272    }
1273
1274    if let Some(ref summary) = report.summary {
1275        out.push_str("\nSummary:\n");
1276        out.push_str("--------\n");
1277        out.push_str(&format!("  Total changes: {}\n", summary.total_changes));
1278        out.push_str(&format!(
1279            "  Semantic changes: {}\n",
1280            summary.semantic_changes
1281        ));
1282        out.push_str(&format!("  Inserts: {}\n", summary.inserts));
1283        out.push_str(&format!("  Deletes: {}\n", summary.deletes));
1284        out.push_str(&format!("  Updates: {}\n", summary.updates));
1285        out.push_str(&format!("  Renames: {}\n", summary.renames));
1286        out.push_str(&format!("  Moves: {}\n", summary.moves));
1287    }
1288
1289    // L6: File-level structural changes
1290    if let Some(ref file_changes) = report.file_changes {
1291        out.push_str("\nFile-Level Changes:\n");
1292        out.push_str("-------------------\n");
1293        for fc in file_changes {
1294            let change_type = match fc.change_type {
1295                ChangeType::Insert => "+",
1296                ChangeType::Delete => "-",
1297                ChangeType::Update => "~",
1298                _ => "?",
1299            };
1300            out.push_str(&format!("  {} {}\n", change_type, fc.relative_path));
1301            if let Some(ref sigs) = fc.signature_changes {
1302                for sig in sigs {
1303                    out.push_str(&format!("      changed: {}\n", sig));
1304                }
1305            }
1306        }
1307    }
1308
1309    // L7: Module-level changes
1310    if let Some(ref module_changes) = report.module_changes {
1311        out.push_str("\nModule-Level Changes:\n");
1312        out.push_str("---------------------\n");
1313        for mc in module_changes {
1314            let change_type = match mc.change_type {
1315                ChangeType::Insert => "+",
1316                ChangeType::Delete => "-",
1317                ChangeType::Update => "~",
1318                _ => "?",
1319            };
1320            out.push_str(&format!("  {} {}\n", change_type, mc.module_path));
1321            for edge in &mc.imports_added {
1322                let names = if edge.imported_names.is_empty() {
1323                    String::new()
1324                } else {
1325                    format!(" ({})", edge.imported_names.join(", "))
1326                };
1327                out.push_str(&format!("      + import {}{}\n", edge.target_module, names));
1328            }
1329            for edge in &mc.imports_removed {
1330                let names = if edge.imported_names.is_empty() {
1331                    String::new()
1332                } else {
1333                    format!(" ({})", edge.imported_names.join(", "))
1334                };
1335                out.push_str(&format!("      - import {}{}\n", edge.target_module, names));
1336            }
1337        }
1338    }
1339
1340    // L7: Import graph summary
1341    if let Some(ref igs) = report.import_graph_summary {
1342        out.push_str("\nImport Graph Summary:\n");
1343        out.push_str("---------------------\n");
1344        out.push_str(&format!("  Edges in A: {}\n", igs.total_edges_a));
1345        out.push_str(&format!("  Edges in B: {}\n", igs.total_edges_b));
1346        out.push_str(&format!("  Edges added: {}\n", igs.edges_added));
1347        out.push_str(&format!("  Edges removed: {}\n", igs.edges_removed));
1348        out.push_str(&format!(
1349            "  Modules with import changes: {}\n",
1350            igs.modules_with_import_changes
1351        ));
1352    }
1353
1354    // L8: Architecture-level changes
1355    if let Some(ref arch_changes) = report.arch_changes {
1356        out.push_str("\nArchitecture-Level Changes:\n");
1357        out.push_str("---------------------------\n");
1358        for ac in arch_changes {
1359            let change_label = match ac.change_type {
1360                ArchChangeType::LayerMigration => "migration",
1361                ArchChangeType::Added => "added",
1362                ArchChangeType::Removed => "removed",
1363                ArchChangeType::CompositionChanged => "composition changed",
1364                ArchChangeType::CycleIntroduced => "cycle introduced",
1365                ArchChangeType::CycleResolved => "cycle resolved",
1366            };
1367            out.push_str(&format!("  [{}] {}\n", change_label, ac.directory));
1368            if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1369                out.push_str(&format!("      {} -> {}\n", old, new));
1370            } else if let Some(ref new) = ac.new_layer {
1371                out.push_str(&format!("      -> {}\n", new));
1372            } else if let Some(ref old) = ac.old_layer {
1373                out.push_str(&format!("      {} ->\n", old));
1374            }
1375            if !ac.migrated_functions.is_empty() {
1376                out.push_str(&format!(
1377                    "      migrated: {}\n",
1378                    ac.migrated_functions.join(", ")
1379                ));
1380            }
1381        }
1382    }
1383
1384    // L8: Architecture diff summary
1385    if let Some(ref arch_summary) = report.arch_summary {
1386        out.push_str("\nArchitecture Summary:\n");
1387        out.push_str("---------------------\n");
1388        out.push_str(&format!(
1389            "  Layer migrations: {}\n",
1390            arch_summary.layer_migrations
1391        ));
1392        out.push_str(&format!(
1393            "  Directories added: {}\n",
1394            arch_summary.directories_added
1395        ));
1396        out.push_str(&format!(
1397            "  Directories removed: {}\n",
1398            arch_summary.directories_removed
1399        ));
1400        out.push_str(&format!(
1401            "  Cycles introduced: {}\n",
1402            arch_summary.cycles_introduced
1403        ));
1404        out.push_str(&format!(
1405            "  Cycles resolved: {}\n",
1406            arch_summary.cycles_resolved
1407        ));
1408        out.push_str(&format!(
1409            "  Stability score: {}\n",
1410            arch_summary.stability_score
1411        ));
1412    }
1413
1414    out
1415}
1416
1417// =============================================================================
1418// Statement-Level Diff (L3) - Zhang-Shasha Tree Edit Distance
1419// =============================================================================
1420
1421/// Statement node kinds per language for tree extraction.
1422fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1423    match lang {
1424        Language::Python => &[
1425            "return_statement",
1426            "if_statement",
1427            "for_statement",
1428            "while_statement",
1429            "expression_statement",
1430            "assert_statement",
1431            "raise_statement",
1432            "try_statement",
1433            "with_statement",
1434            "assignment",
1435            "augmented_assignment",
1436            "delete_statement",
1437            "pass_statement",
1438            "break_statement",
1439            "continue_statement",
1440        ],
1441        Language::TypeScript | Language::JavaScript => &[
1442            "return_statement",
1443            "if_statement",
1444            "for_statement",
1445            "for_in_statement",
1446            "while_statement",
1447            "do_statement",
1448            "expression_statement",
1449            "variable_declaration",
1450            "lexical_declaration",
1451            "throw_statement",
1452            "try_statement",
1453            "switch_statement",
1454            "break_statement",
1455            "continue_statement",
1456        ],
1457        Language::Go => &[
1458            "return_statement",
1459            "if_statement",
1460            "for_statement",
1461            "expression_statement",
1462            "short_var_declaration",
1463            "var_declaration",
1464            "assignment_statement",
1465            "go_statement",
1466            "defer_statement",
1467            "select_statement",
1468            "switch_statement",
1469        ],
1470        Language::Rust => &[
1471            "let_declaration",
1472            "expression_statement",
1473            "return_expression",
1474            "if_expression",
1475            "for_expression",
1476            "while_expression",
1477            "loop_expression",
1478            "match_expression",
1479        ],
1480        Language::Java => &[
1481            "return_statement",
1482            "if_statement",
1483            "for_statement",
1484            "enhanced_for_statement",
1485            "while_statement",
1486            "do_statement",
1487            "expression_statement",
1488            "local_variable_declaration",
1489            "throw_statement",
1490            "try_statement",
1491            "switch_expression",
1492        ],
1493        Language::C | Language::Cpp => &[
1494            "return_statement",
1495            "if_statement",
1496            "for_statement",
1497            "while_statement",
1498            "do_statement",
1499            "expression_statement",
1500            "declaration",
1501            "switch_statement",
1502        ],
1503        Language::Ruby => &[
1504            "return",
1505            "if",
1506            "unless",
1507            "for",
1508            "while",
1509            "until",
1510            "assignment",
1511            "call",
1512            "begin",
1513        ],
1514        Language::Php => &[
1515            "return_statement",
1516            "if_statement",
1517            "for_statement",
1518            "foreach_statement",
1519            "while_statement",
1520            "expression_statement",
1521            "echo_statement",
1522            "throw_expression",
1523            "try_statement",
1524        ],
1525        Language::CSharp => &[
1526            "return_statement",
1527            "if_statement",
1528            "for_statement",
1529            "foreach_statement",
1530            "while_statement",
1531            "expression_statement",
1532            "local_declaration_statement",
1533            "throw_statement",
1534            "try_statement",
1535        ],
1536        Language::Kotlin => &[
1537            "property_declaration",
1538            "assignment",
1539            "if_expression",
1540            "for_statement",
1541            "while_statement",
1542            "do_while_statement",
1543            "return_expression",
1544            "throw_expression",
1545            "try_expression",
1546        ],
1547        Language::Scala => &[
1548            "val_definition",
1549            "var_definition",
1550            "if_expression",
1551            "for_expression",
1552            "while_expression",
1553            "return_expression",
1554            "throw_expression",
1555            "try_expression",
1556            "call_expression",
1557        ],
1558        Language::Swift => &[
1559            "value_binding_pattern",
1560            "if_statement",
1561            "for_in_statement",
1562            "while_statement",
1563            "return_statement",
1564            "throw_statement",
1565            "guard_statement",
1566            "switch_statement",
1567        ],
1568        Language::Elixir => &["call", "if", "case", "cond"],
1569        Language::Lua | Language::Luau => &[
1570            "return_statement",
1571            "if_statement",
1572            "for_statement",
1573            "while_statement",
1574            "variable_declaration",
1575            "assignment_statement",
1576            "function_call",
1577        ],
1578        Language::Ocaml => &[
1579            "let_binding",
1580            "if_expression",
1581            "match_expression",
1582            "application",
1583        ],
1584    }
1585}
1586
1587/// A labeled tree node for the Zhang-Shasha tree edit distance algorithm.
1588#[derive(Debug, Clone)]
1589struct LabeledTreeNode {
1590    /// Node label: "node_kind:significant_text"
1591    label: String,
1592    /// Children (ordered)
1593    children: Vec<LabeledTreeNode>,
1594    /// Source line number (1-indexed) for mapping back to locations
1595    line: u32,
1596}
1597
1598/// Flattened node in postorder for Zhang-Shasha.
1599#[derive(Debug, Clone)]
1600struct PostorderNode {
1601    label: String,
1602    line: u32,
1603    /// Index of leftmost leaf descendant in the postorder array
1604    leftmost_leaf: usize,
1605}
1606
1607/// Edit operation from Zhang-Shasha.
1608#[derive(Debug, Clone)]
1609enum EditOp {
1610    /// Delete node from tree A (index in postorder of A)
1611    Delete { index_a: usize },
1612    /// Insert node from tree B (index in postorder of B)
1613    Insert { index_b: usize },
1614    /// Relabel (update) node A[i] -> B[j]
1615    Relabel { index_a: usize, index_b: usize },
1616}
1617
1618/// Build a labeled tree from a tree-sitter function body node.
1619///
1620/// Walks the AST and picks out statement-level nodes, building an ordered
1621/// tree where each statement is a node and nested statements (e.g., inside
1622/// if-bodies) become children.
1623fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1624    let label = build_node_label(node, source);
1625    let line = node.start_position().row as u32 + 1;
1626
1627    let mut children = Vec::new();
1628    let mut cursor = node.walk();
1629    for child in node.children(&mut cursor) {
1630        if statement_kinds.contains(&child.kind()) {
1631            // This child is a statement node - add it and recurse into its body
1632            children.push(build_labeled_tree(child, source, statement_kinds));
1633        } else {
1634            // Not a statement node - look deeper for nested statements
1635            let nested = collect_nested_statements(child, source, statement_kinds);
1636            children.extend(nested);
1637        }
1638    }
1639
1640    LabeledTreeNode {
1641        label,
1642        children,
1643        line,
1644    }
1645}
1646
1647/// Collect statement nodes from non-statement intermediate nodes.
1648fn collect_nested_statements(
1649    node: Node,
1650    source: &[u8],
1651    statement_kinds: &[&str],
1652) -> Vec<LabeledTreeNode> {
1653    let mut result = Vec::new();
1654    let mut cursor = node.walk();
1655    for child in node.children(&mut cursor) {
1656        if statement_kinds.contains(&child.kind()) {
1657            result.push(build_labeled_tree(child, source, statement_kinds));
1658        } else {
1659            result.extend(collect_nested_statements(child, source, statement_kinds));
1660        }
1661    }
1662    result
1663}
1664
1665/// Build a label string for a tree-sitter node.
1666///
1667/// Format: "node_kind:significant_tokens" where significant tokens
1668/// are identifiers and operators (not whitespace or delimiters).
1669fn build_node_label(node: Node, source: &[u8]) -> String {
1670    let kind = node.kind();
1671    let text = node.utf8_text(source).unwrap_or("");
1672
1673    // Extract significant tokens: identifiers, operators, literals
1674    // We take just the first line for conciseness and strip whitespace
1675    let first_line = text.lines().next().unwrap_or("").trim();
1676
1677    // Truncate to avoid huge labels
1678    let significant = if first_line.len() > 120 {
1679        &first_line[..120]
1680    } else {
1681        first_line
1682    };
1683
1684    format!("{}:{}", kind, significant)
1685}
1686
1687/// Extract statement-level subtree from a function body node.
1688///
1689/// Finds the function body (block node) and builds a labeled tree
1690/// from the statements within it.
1691fn extract_statement_tree(
1692    func_node: Node,
1693    source: &[u8],
1694    lang: Language,
1695    statement_kinds: &[&str],
1696) -> LabeledTreeNode {
1697    // Find the function body node
1698    let body_node = find_function_body(func_node, lang);
1699
1700    match body_node {
1701        Some(body) => {
1702            // Build a root node representing the function body
1703            let mut children = Vec::new();
1704            let mut cursor = body.walk();
1705            for child in body.children(&mut cursor) {
1706                if statement_kinds.contains(&child.kind()) {
1707                    children.push(build_labeled_tree(child, source, statement_kinds));
1708                } else {
1709                    children.extend(collect_nested_statements(child, source, statement_kinds));
1710                }
1711            }
1712
1713            LabeledTreeNode {
1714                label: format!("body:{}", func_node.kind()),
1715                children,
1716                line: body.start_position().row as u32 + 1,
1717            }
1718        }
1719        None => {
1720            // Fallback: treat the entire function node as the body
1721            build_labeled_tree(func_node, source, statement_kinds)
1722        }
1723    }
1724}
1725
1726/// Find the body/block node within a function definition.
1727fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1728    // Try common field names
1729    if let Some(body) = func_node.child_by_field_name("body") {
1730        return Some(body);
1731    }
1732    if let Some(body) = func_node.child_by_field_name("block") {
1733        return Some(body);
1734    }
1735
1736    // Language-specific body detection
1737    let body_kinds = match lang {
1738        Language::Python => &["block"][..],
1739        Language::TypeScript | Language::JavaScript => &["statement_block"],
1740        Language::Go => &["block"],
1741        Language::Rust => &["block"],
1742        Language::Java => &["block"],
1743        Language::C | Language::Cpp => &["compound_statement"],
1744        Language::Ruby => &["body_statement"],
1745        Language::Php => &["compound_statement"],
1746        Language::CSharp => &["block"],
1747        Language::Kotlin => &["function_body"],
1748        Language::Scala => &["block", "indented_block"],
1749        Language::Swift => &["function_body"],
1750        Language::Elixir => &["do_block"],
1751        Language::Lua | Language::Luau => &["block"],
1752        Language::Ocaml => &["let_binding"],
1753    };
1754
1755    let mut cursor = func_node.walk();
1756    let found = func_node
1757        .children(&mut cursor)
1758        .find(|&child| body_kinds.contains(&child.kind()));
1759    found
1760}
1761
1762/// Count total nodes in a labeled tree.
1763fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1764    1 + tree.children.iter().map(count_tree_nodes).sum::<usize>()
1765}
1766
1767// =============================================================================
1768// Zhang-Shasha Tree Edit Distance
1769// =============================================================================
1770
1771/// Flatten a labeled tree into postorder traversal, computing leftmost leaf descendants.
1772fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1773    let mut nodes = Vec::new();
1774    flatten_postorder_recursive(tree, &mut nodes);
1775    nodes
1776}
1777
1778fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1779    if tree.children.is_empty() {
1780        // Leaf node: leftmost leaf is itself
1781        let idx = nodes.len();
1782        nodes.push(PostorderNode {
1783            label: tree.label.clone(),
1784            line: tree.line,
1785            leftmost_leaf: idx,
1786        });
1787        return idx;
1788    }
1789
1790    // Process children first (postorder)
1791    let mut first_child_leftmost = usize::MAX;
1792    for (i, child) in tree.children.iter().enumerate() {
1793        let child_leftmost = flatten_postorder_recursive(child, nodes);
1794        if i == 0 {
1795            first_child_leftmost = child_leftmost;
1796        }
1797    }
1798
1799    // Now add this node
1800    nodes.push(PostorderNode {
1801        label: tree.label.clone(),
1802        line: tree.line,
1803        leftmost_leaf: first_child_leftmost,
1804    });
1805
1806    // The leftmost leaf of this node is the leftmost leaf of its first child
1807    first_child_leftmost
1808}
1809
1810/// Compute keyroots from a postorder traversal.
1811///
1812/// A keyroot is a node whose leftmost-leaf is different from its parent's
1813/// leftmost-leaf, OR the root node. In practice, we collect the rightmost
1814/// node at each unique leftmost-leaf value.
1815fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1816    let n = nodes.len();
1817    if n == 0 {
1818        return Vec::new();
1819    }
1820
1821    // For each unique leftmost leaf value, keep the highest index (rightmost occurrence)
1822    let mut lr_map: HashMap<usize, usize> = HashMap::new();
1823    for (i, node) in nodes.iter().enumerate() {
1824        lr_map.insert(node.leftmost_leaf, i);
1825    }
1826
1827    let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1828    keyroots.sort();
1829    keyroots
1830}
1831
1832/// Run the Zhang-Shasha tree edit distance algorithm.
1833///
1834/// Returns the edit operations (edit script).
1835///
1836/// Costs: Delete = 1, Insert = 1, Relabel = 0 (same label) or 1 (different label).
1837fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1838    let na = nodes_a.len();
1839    let nb = nodes_b.len();
1840
1841    if na == 0 && nb == 0 {
1842        return Vec::new();
1843    }
1844    if na == 0 {
1845        // All inserts
1846        return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1847    }
1848    if nb == 0 {
1849        // All deletes
1850        return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1851    }
1852
1853    let keyroots_a = compute_keyroots(nodes_a);
1854    let keyroots_b = compute_keyroots(nodes_b);
1855
1856    // Tree distance matrix (1-indexed, 0 means empty tree)
1857    let mut td = vec![vec![0usize; nb + 1]; na + 1];
1858    // Track operations: 0=relabel/match, 1=delete, 2=insert, 3=tree-match
1859    let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1860
1861    for &kr_a in &keyroots_a {
1862        for &kr_b in &keyroots_b {
1863            let la = nodes_a[kr_a].leftmost_leaf;
1864            let lb = nodes_b[kr_b].leftmost_leaf;
1865
1866            let rows = kr_a - la + 2;
1867            let cols = kr_b - lb + 2;
1868            let mut fd = vec![vec![0usize; cols]; rows];
1869
1870            // Base cases
1871            for i in 1..rows {
1872                fd[i][0] = fd[i - 1][0] + 1;
1873            }
1874            for j in 1..cols {
1875                fd[0][j] = fd[0][j - 1] + 1;
1876            }
1877
1878            for i in 1..rows {
1879                for j in 1..cols {
1880                    let idx_a = la + i - 1;
1881                    let idx_b = lb + j - 1;
1882
1883                    let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1884                        0
1885                    } else {
1886                        1
1887                    };
1888
1889                    if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1890                        let delete = fd[i - 1][j] + 1;
1891                        let insert = fd[i][j - 1] + 1;
1892                        let relabel = fd[i - 1][j - 1] + cost_relabel;
1893
1894                        if relabel <= delete && relabel <= insert {
1895                            fd[i][j] = relabel;
1896                            td[idx_a + 1][idx_b + 1] = relabel;
1897                            td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1898                        } else if delete <= insert {
1899                            fd[i][j] = delete;
1900                            td[idx_a + 1][idx_b + 1] = delete;
1901                            td_ops[idx_a + 1][idx_b + 1] = 1;
1902                        } else {
1903                            fd[i][j] = insert;
1904                            td[idx_a + 1][idx_b + 1] = insert;
1905                            td_ops[idx_a + 1][idx_b + 1] = 2;
1906                        }
1907                    } else {
1908                        let p = nodes_a[idx_a].leftmost_leaf - la;
1909                        let q = nodes_b[idx_b].leftmost_leaf - lb;
1910
1911                        let delete = fd[i - 1][j] + 1;
1912                        let insert = fd[i][j - 1] + 1;
1913                        let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1914
1915                        if tree_match <= delete && tree_match <= insert {
1916                            fd[i][j] = tree_match;
1917                        } else if delete <= insert {
1918                            fd[i][j] = delete;
1919                        } else {
1920                            fd[i][j] = insert;
1921                        }
1922                    }
1923                }
1924            }
1925        }
1926    }
1927
1928    // Extract edit script using sequence alignment on postorder nodes
1929    // guided by the tree distance computation
1930    let mut ops = Vec::new();
1931    derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1932    ops
1933}
1934
1935/// Derive edit operations using DP on the postorder sequences.
1936///
1937/// This produces the edit script by sequence-aligning the postorder
1938/// traversals, which captures the essential edit operations.
1939fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1940    let na = nodes_a.len();
1941    let nb = nodes_b.len();
1942
1943    let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1944    let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1945
1946    for i in 1..=na {
1947        dp[i][0] = i;
1948        choice[i][0] = 1;
1949    }
1950    for j in 1..=nb {
1951        dp[0][j] = j;
1952        choice[0][j] = 2;
1953    }
1954
1955    for i in 1..=na {
1956        for j in 1..=nb {
1957            let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1958                0
1959            } else {
1960                1
1961            };
1962
1963            let del = dp[i - 1][j] + 1;
1964            let ins = dp[i][j - 1] + 1;
1965            let sub = dp[i - 1][j - 1] + cost;
1966
1967            if sub <= del && sub <= ins {
1968                dp[i][j] = sub;
1969                choice[i][j] = if cost == 0 { 0 } else { 3 };
1970            } else if del <= ins {
1971                dp[i][j] = del;
1972                choice[i][j] = 1;
1973            } else {
1974                dp[i][j] = ins;
1975                choice[i][j] = 2;
1976            }
1977        }
1978    }
1979
1980    // Backtrack
1981    let mut i = na;
1982    let mut j = nb;
1983    let mut rev_ops = Vec::new();
1984
1985    while i > 0 || j > 0 {
1986        if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1987            if choice[i][j] == 3 {
1988                rev_ops.push(EditOp::Relabel {
1989                    index_a: i - 1,
1990                    index_b: j - 1,
1991                });
1992            }
1993            i -= 1;
1994            j -= 1;
1995        } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1996            rev_ops.push(EditOp::Delete { index_a: i - 1 });
1997            i -= 1;
1998        } else if j > 0 {
1999            rev_ops.push(EditOp::Insert { index_b: j - 1 });
2000            j -= 1;
2001        }
2002    }
2003
2004    rev_ops.reverse();
2005    ops.extend(rev_ops);
2006}
2007
2008/// Convert Zhang-Shasha edit operations into ASTChange records.
2009fn edit_ops_to_ast_changes(
2010    ops: &[EditOp],
2011    nodes_a: &[PostorderNode],
2012    nodes_b: &[PostorderNode],
2013    file_a: &Path,
2014    file_b: &Path,
2015) -> Vec<ASTChange> {
2016    let mut changes = Vec::new();
2017
2018    for op in ops {
2019        match op {
2020            EditOp::Delete { index_a } => {
2021                let node = &nodes_a[*index_a];
2022                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
2023                changes.push(ASTChange {
2024                    change_type: ChangeType::Delete,
2025                    node_kind: NodeKind::Statement,
2026                    name: Some(stmt_kind.to_string()),
2027                    old_location: Some(Location::new(file_a.display().to_string(), node.line)),
2028                    new_location: None,
2029                    old_text: Some(node.label.clone()),
2030                    new_text: None,
2031                    similarity: None,
2032                    children: None,
2033                    base_changes: None,
2034                });
2035            }
2036            EditOp::Insert { index_b } => {
2037                let node = &nodes_b[*index_b];
2038                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
2039                changes.push(ASTChange {
2040                    change_type: ChangeType::Insert,
2041                    node_kind: NodeKind::Statement,
2042                    name: Some(stmt_kind.to_string()),
2043                    old_location: None,
2044                    new_location: Some(Location::new(file_b.display().to_string(), node.line)),
2045                    old_text: None,
2046                    new_text: Some(node.label.clone()),
2047                    similarity: None,
2048                    children: None,
2049                    base_changes: None,
2050                });
2051            }
2052            EditOp::Relabel { index_a, index_b } => {
2053                let node_a = &nodes_a[*index_a];
2054                let node_b = &nodes_b[*index_b];
2055                let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
2056                changes.push(ASTChange {
2057                    change_type: ChangeType::Update,
2058                    node_kind: NodeKind::Statement,
2059                    name: Some(stmt_kind.to_string()),
2060                    old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
2061                    new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
2062                    old_text: Some(node_a.label.clone()),
2063                    new_text: Some(node_b.label.clone()),
2064                    similarity: None,
2065                    children: None,
2066                    base_changes: None,
2067                });
2068            }
2069        }
2070    }
2071
2072    changes
2073}
2074
2075/// Maximum number of statements before falling back to L4-style Jaccard.
2076const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
2077
2078impl DiffArgs {
2079    /// L3 Statement-level diff: Zhang-Shasha tree edit distance within matched functions.
2080    ///
2081    /// Algorithm:
2082    /// 1. Parse both files and extract functions (reusing L4 infrastructure)
2083    /// 2. Match functions by name
2084    /// 3. For each matched pair with different bodies:
2085    ///    a. Extract statement subtrees from tree-sitter AST
2086    ///    b. Build labeled trees from statement nodes
2087    ///    c. Run Zhang-Shasha tree edit distance
2088    ///    d. Convert edit script to ASTChange children
2089    /// 4. For unmatched functions: report as function-level Insert/Delete
2090    fn run_statement_level_diff(&self) -> Result<DiffReport> {
2091        // Detect language
2092        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
2093            let ext = self
2094                .file_a
2095                .extension()
2096                .map(|e| e.to_string_lossy().to_string())
2097                .unwrap_or_else(|| "unknown".to_string());
2098            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
2099        })?;
2100
2101        // Read file contents
2102        let source_a = fs::read_to_string(&self.file_a)?;
2103        let source_b = fs::read_to_string(&self.file_b)?;
2104
2105        // Parse both files
2106        let pool = ParserPool::new();
2107        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
2108            RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
2109        })?;
2110        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
2111            RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
2112        })?;
2113
2114        // Extract function nodes (reuse L4 infrastructure)
2115        let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2116        let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2117
2118        let statement_kinds = get_statement_node_kinds(lang);
2119
2120        // Build name lookup maps
2121        let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
2122            .iter()
2123            .enumerate()
2124            .map(|(i, n)| (n.name.as_str(), (i, n)))
2125            .collect();
2126
2127        let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
2128        let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
2129        let mut changes = Vec::new();
2130
2131        // Pass 1: Match functions by name and compute statement-level diffs
2132        for (i, func_a) in funcs_a.iter().enumerate() {
2133            if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2134                matched_a[i] = true;
2135                matched_b[j] = true;
2136
2137                // Check if bodies differ
2138                if func_a.normalized_body != func_b.normalized_body {
2139                    // Find the function nodes in the parsed trees
2140                    let func_node_a =
2141                        find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2142                    let func_node_b =
2143                        find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2144
2145                    let stmt_children = match (func_node_a, func_node_b) {
2146                        (Some(node_a), Some(node_b)) => {
2147                            // Build statement trees
2148                            let tree_a_stmts = extract_statement_tree(
2149                                node_a,
2150                                source_a.as_bytes(),
2151                                lang,
2152                                statement_kinds,
2153                            );
2154                            let tree_b_stmts = extract_statement_tree(
2155                                node_b,
2156                                source_b.as_bytes(),
2157                                lang,
2158                                statement_kinds,
2159                            );
2160
2161                            let count_a = count_tree_nodes(&tree_a_stmts);
2162                            let count_b = count_tree_nodes(&tree_b_stmts);
2163
2164                            // Check fallback threshold
2165                            if count_a > STATEMENT_FALLBACK_THRESHOLD
2166                                || count_b > STATEMENT_FALLBACK_THRESHOLD
2167                            {
2168                                // Fall back to L4-style (no statement children)
2169                                None
2170                            } else {
2171                                // Flatten to postorder and run Zhang-Shasha
2172                                let po_a = flatten_postorder(&tree_a_stmts);
2173                                let po_b = flatten_postorder(&tree_b_stmts);
2174
2175                                let edit_ops = zhang_shasha(&po_a, &po_b);
2176
2177                                if edit_ops.is_empty() {
2178                                    None
2179                                } else {
2180                                    let stmt_changes = edit_ops_to_ast_changes(
2181                                        &edit_ops,
2182                                        &po_a,
2183                                        &po_b,
2184                                        &self.file_a,
2185                                        &self.file_b,
2186                                    );
2187                                    if stmt_changes.is_empty() {
2188                                        None
2189                                    } else {
2190                                        Some(stmt_changes)
2191                                    }
2192                                }
2193                            }
2194                        }
2195                        _ => None,
2196                    };
2197
2198                    changes.push(ASTChange {
2199                        change_type: ChangeType::Update,
2200                        node_kind: func_a.kind,
2201                        name: Some(func_a.name.clone()),
2202                        old_location: Some(Location::with_column(
2203                            self.file_a.display().to_string(),
2204                            func_a.line,
2205                            func_a.column,
2206                        )),
2207                        new_location: Some(Location::with_column(
2208                            self.file_b.display().to_string(),
2209                            func_b.line,
2210                            func_b.column,
2211                        )),
2212                        old_text: Some(func_a.body.clone()),
2213                        new_text: Some(func_b.body.clone()),
2214                        similarity: Some(compute_similarity(
2215                            &func_a.normalized_body,
2216                            &func_b.normalized_body,
2217                        )),
2218                        children: stmt_children,
2219                        base_changes: None,
2220                    });
2221                }
2222            }
2223        }
2224
2225        // Pass 2: Detect renames among unmatched functions
2226        let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2227            .iter()
2228            .enumerate()
2229            .filter(|(i, _)| !matched_a[*i])
2230            .collect();
2231        let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2232            .iter()
2233            .enumerate()
2234            .filter(|(i, _)| !matched_b[*i])
2235            .collect();
2236
2237        let mut used_b = vec![false; unmatched_b.len()];
2238
2239        for (_, func_a) in &unmatched_a {
2240            let mut best_match: Option<(usize, f64)> = None;
2241            for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2242                if used_b[j] || func_a.kind != func_b.kind {
2243                    continue;
2244                }
2245                let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2246                if sim >= RENAME_SIMILARITY_THRESHOLD
2247                    && (best_match.is_none() || sim > best_match.unwrap().1)
2248                {
2249                    best_match = Some((j, sim));
2250                }
2251            }
2252
2253            if let Some((j, sim)) = best_match {
2254                let (_, func_b) = unmatched_b[j];
2255                used_b[j] = true;
2256                changes.push(ASTChange {
2257                    change_type: ChangeType::Rename,
2258                    node_kind: func_a.kind,
2259                    name: Some(func_a.name.clone()),
2260                    old_location: Some(Location::with_column(
2261                        self.file_a.display().to_string(),
2262                        func_a.line,
2263                        func_a.column,
2264                    )),
2265                    new_location: Some(Location::with_column(
2266                        self.file_b.display().to_string(),
2267                        func_b.line,
2268                        func_b.column,
2269                    )),
2270                    old_text: Some(func_a.name.clone()),
2271                    new_text: Some(func_b.name.clone()),
2272                    similarity: Some(sim),
2273                    children: None,
2274                    base_changes: None,
2275                });
2276            }
2277        }
2278
2279        // Pass 3: Remaining unmatched in A are Deletes
2280        for (_, func_a) in &unmatched_a {
2281            let is_renamed = changes.iter().any(|c| {
2282                c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2283            });
2284            if !is_renamed {
2285                changes.push(ASTChange {
2286                    change_type: ChangeType::Delete,
2287                    node_kind: func_a.kind,
2288                    name: Some(func_a.name.clone()),
2289                    old_location: Some(Location::with_column(
2290                        self.file_a.display().to_string(),
2291                        func_a.line,
2292                        func_a.column,
2293                    )),
2294                    new_location: None,
2295                    old_text: None,
2296                    new_text: None,
2297                    similarity: None,
2298                    children: None,
2299                    base_changes: None,
2300                });
2301            }
2302        }
2303
2304        // Pass 4: Remaining unmatched in B are Inserts
2305        for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2306            if !used_b[j] {
2307                changes.push(ASTChange {
2308                    change_type: ChangeType::Insert,
2309                    node_kind: func_b.kind,
2310                    name: Some(func_b.name.clone()),
2311                    old_location: None,
2312                    new_location: Some(Location::with_column(
2313                        self.file_b.display().to_string(),
2314                        func_b.line,
2315                        func_b.column,
2316                    )),
2317                    old_text: None,
2318                    new_text: None,
2319                    similarity: None,
2320                    children: None,
2321                    base_changes: None,
2322                });
2323            }
2324        }
2325
2326        // Build summary
2327        let mut summary = DiffSummary::default();
2328        for change in &changes {
2329            summary.total_changes += 1;
2330            if change.change_type != ChangeType::Format {
2331                summary.semantic_changes += 1;
2332            }
2333            match change.change_type {
2334                ChangeType::Insert => summary.inserts += 1,
2335                ChangeType::Delete => summary.deletes += 1,
2336                ChangeType::Update => summary.updates += 1,
2337                ChangeType::Move => summary.moves += 1,
2338                ChangeType::Rename => summary.renames += 1,
2339                ChangeType::Format => summary.formats += 1,
2340                ChangeType::Extract => summary.extracts += 1,
2341                ChangeType::Inline => {}
2342            }
2343        }
2344
2345        // Sort changes
2346        changes.sort_by_key(|c| match c.change_type {
2347            ChangeType::Delete => 0,
2348            ChangeType::Rename => 1,
2349            ChangeType::Update => 2,
2350            ChangeType::Move => 3,
2351            ChangeType::Insert => 4,
2352            _ => 5,
2353        });
2354
2355        Ok(DiffReport {
2356            file_a: self.file_a.display().to_string(),
2357            file_b: self.file_b.display().to_string(),
2358            identical: changes.is_empty(),
2359            changes,
2360            summary: Some(summary),
2361            granularity: DiffGranularity::Statement,
2362            file_changes: None,
2363            module_changes: None,
2364            import_graph_summary: None,
2365            arch_changes: None,
2366            arch_summary: None,
2367        })
2368    }
2369}
2370
2371/// Find a function tree-sitter node by its start line number.
2372fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2373    let func_kinds = get_function_node_kinds(lang);
2374    find_function_node_recursive(root, target_line, func_kinds)
2375}
2376
2377fn find_function_node_recursive<'a>(
2378    node: Node<'a>,
2379    target_line: u32,
2380    func_kinds: &[&str],
2381) -> Option<Node<'a>> {
2382    let line = node.start_position().row as u32 + 1;
2383
2384    if func_kinds.contains(&node.kind()) && line == target_line {
2385        return Some(node);
2386    }
2387
2388    let mut cursor = node.walk();
2389    for child in node.children(&mut cursor) {
2390        if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2391            return Some(found);
2392        }
2393    }
2394
2395    None
2396}
2397
2398// =============================================================================
2399// Class-Level Diff (L5)
2400// =============================================================================
2401
2402/// Information about a class extracted from AST for class-level diffing.
2403#[derive(Debug, Clone)]
2404struct ClassNode {
2405    /// Class name
2406    name: String,
2407    /// Line number (1-indexed)
2408    line: u32,
2409    /// End line number (1-indexed)
2410    end_line: u32,
2411    /// Column
2412    column: u32,
2413    /// Full source text
2414    body: String,
2415    /// Normalized body for comparison
2416    normalized_body: String,
2417    /// Methods within this class
2418    methods: Vec<ExtractedNode>,
2419    /// Class-level fields (assignments in class body)
2420    fields: Vec<FieldNode>,
2421    /// Base classes
2422    bases: Vec<String>,
2423}
2424
2425/// A class-level field (class variable assignment).
2426#[derive(Debug, Clone)]
2427struct FieldNode {
2428    /// Field name
2429    name: String,
2430    /// Line number
2431    line: u32,
2432    /// Column
2433    column: u32,
2434    /// Full text of the assignment
2435    body: String,
2436    /// Normalized body
2437    normalized_body: String,
2438}
2439
2440/// Run a class-level diff between two files.
2441///
2442/// This is the L5 diff algorithm. It extracts classes from both files,
2443/// matches them by name, and then diffs their members (methods, fields, bases).
2444pub fn run_class_diff(file_a: &Path, file_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2445    // Validate files exist
2446    if !file_a.exists() {
2447        return Err(RemainingError::file_not_found(file_a).into());
2448    }
2449    if !file_b.exists() {
2450        return Err(RemainingError::file_not_found(file_b).into());
2451    }
2452
2453    // Detect language from file_a extension
2454    let lang = Language::from_path(file_a).ok_or_else(|| {
2455        let ext = file_a
2456            .extension()
2457            .map(|e| e.to_string_lossy().to_string())
2458            .unwrap_or_else(|| "unknown".to_string());
2459        RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2460    })?;
2461
2462    // Read file contents
2463    let source_a = fs::read_to_string(file_a)?;
2464    let source_b = fs::read_to_string(file_b)?;
2465
2466    // Parse both files
2467    let pool = ParserPool::new();
2468    let tree_a = pool
2469        .parse(&source_a, lang)
2470        .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2471    let tree_b = pool
2472        .parse(&source_b, lang)
2473        .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2474
2475    // Extract class information from both files
2476    let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2477    let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2478
2479    // Detect class-level changes
2480    let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2481
2482    // Build summary
2483    let mut summary = DiffSummary::default();
2484    for change in &changes {
2485        summary.total_changes += 1;
2486        if change.change_type != ChangeType::Format {
2487            summary.semantic_changes += 1;
2488        }
2489        match change.change_type {
2490            ChangeType::Insert => summary.inserts += 1,
2491            ChangeType::Delete => summary.deletes += 1,
2492            ChangeType::Update => summary.updates += 1,
2493            ChangeType::Move => summary.moves += 1,
2494            ChangeType::Rename => summary.renames += 1,
2495            ChangeType::Format => summary.formats += 1,
2496            ChangeType::Extract => summary.extracts += 1,
2497            ChangeType::Inline => {}
2498        }
2499    }
2500
2501    let report = DiffReport {
2502        file_a: file_a.display().to_string(),
2503        file_b: file_b.display().to_string(),
2504        identical: changes.is_empty(),
2505        changes,
2506        summary: Some(summary),
2507        granularity: DiffGranularity::Class,
2508        file_changes: None,
2509        module_changes: None,
2510        import_graph_summary: None,
2511        arch_changes: None,
2512        arch_summary: None,
2513    };
2514
2515    Ok(report)
2516}
2517
2518/// Run class-level diff across two directories, pairing files by relative path.
2519/// Skips files with unsupported language extensions.
2520fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2521    let files_a = collect_source_files(dir_a)?;
2522    let files_b = collect_source_files(dir_b)?;
2523
2524    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2525    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2526
2527    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2528
2529    let mut all_changes = Vec::new();
2530
2531    for rel_path in all_paths {
2532        match (map_a.get(rel_path), map_b.get(rel_path)) {
2533            (Some(path_a), Some(path_b)) => {
2534                // File exists in both -- run class diff, skip on language error
2535                match run_class_diff(path_a, path_b, semantic_only) {
2536                    Ok(sub_report) => all_changes.extend(sub_report.changes),
2537                    Err(_) => continue, // unsupported language, skip
2538                }
2539            }
2540            (None, Some(_)) | (Some(_), None) => {
2541                // Added or removed file -- skip at class level (L6 handles file-level adds/removes)
2542                continue;
2543            }
2544            (None, None) => unreachable!(),
2545        }
2546    }
2547
2548    let mut summary = DiffSummary::default();
2549    for change in &all_changes {
2550        summary.total_changes += 1;
2551        if change.change_type != ChangeType::Format {
2552            summary.semantic_changes += 1;
2553        }
2554        match change.change_type {
2555            ChangeType::Insert => summary.inserts += 1,
2556            ChangeType::Delete => summary.deletes += 1,
2557            ChangeType::Update => summary.updates += 1,
2558            ChangeType::Move => summary.moves += 1,
2559            ChangeType::Rename => summary.renames += 1,
2560            ChangeType::Format => summary.formats += 1,
2561            ChangeType::Extract => summary.extracts += 1,
2562            ChangeType::Inline => {}
2563        }
2564    }
2565
2566    Ok(DiffReport {
2567        file_a: dir_a.display().to_string(),
2568        file_b: dir_b.display().to_string(),
2569        identical: all_changes.is_empty(),
2570        changes: all_changes,
2571        summary: Some(summary),
2572        granularity: DiffGranularity::Class,
2573        file_changes: None,
2574        module_changes: None,
2575        import_graph_summary: None,
2576        arch_changes: None,
2577        arch_summary: None,
2578    })
2579}
2580
2581/// Extract class nodes with their members from the AST.
2582fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2583    let mut classes = Vec::new();
2584    let class_kinds = get_class_node_kinds(lang);
2585    let func_kinds = get_function_node_kinds(lang);
2586    let body_kinds = get_class_body_kinds(lang);
2587
2588    extract_class_nodes_recursive(
2589        root,
2590        source,
2591        &mut classes,
2592        lang,
2593        func_kinds,
2594        class_kinds,
2595        body_kinds,
2596    );
2597
2598    // Go: methods are declared at file level with receiver syntax, not inside the struct.
2599    // Scan root-level method_declaration nodes and associate them with their struct.
2600    if lang == Language::Go {
2601        associate_go_receiver_methods(root, source, lang, &mut classes);
2602    }
2603
2604    classes
2605}
2606
2607/// For Go, scan file-level `method_declaration` nodes, parse the receiver type,
2608/// and associate each method with the matching struct's ClassNode.
2609fn associate_go_receiver_methods(
2610    root: Node,
2611    source: &[u8],
2612    lang: Language,
2613    classes: &mut [ClassNode],
2614) {
2615    let source_str = std::str::from_utf8(source).unwrap_or("");
2616    let mut cursor = root.walk();
2617    for child in root.children(&mut cursor) {
2618        if child.kind() != "method_declaration" {
2619            continue;
2620        }
2621        // Extract receiver type name
2622        let receiver_type = match extract_go_receiver_type(child, source) {
2623            Some(name) => name,
2624            None => continue,
2625        };
2626
2627        // Extract method name and build an ExtractedNode
2628        let method_name = match get_function_name(child, lang, source_str) {
2629            Some(name) => name,
2630            None => continue,
2631        };
2632
2633        let params = child
2634            .child_by_field_name("parameters")
2635            .map(|p| node_text(p, source).to_string())
2636            .unwrap_or_default();
2637
2638        let line = child.start_position().row as u32 + 1;
2639        let end_line = child.end_position().row as u32 + 1;
2640        let column = child.start_position().column as u32;
2641        let body = node_text(child, source).to_string();
2642
2643        let extracted =
2644            ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2645                .with_params(params)
2646                .with_method_kind();
2647
2648        // Associate with matching struct
2649        for class in classes.iter_mut() {
2650            if class.name == receiver_type {
2651                class.methods.push(extracted);
2652                break;
2653            }
2654        }
2655    }
2656}
2657
2658/// Extract the receiver type name from a Go method_declaration node.
2659///
2660/// Handles both pointer receivers `(f *Foo)` and value receivers `(f Foo)`.
2661/// Returns the bare type name (e.g., "Foo") without the pointer `*`.
2662fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2663    // method_declaration -> receiver: parameter_list -> parameter_declaration -> type
2664    let receiver = method_node.child_by_field_name("receiver")?;
2665    let mut recv_cursor = receiver.walk();
2666    for recv_child in receiver.children(&mut recv_cursor) {
2667        if recv_child.kind() == "parameter_declaration" {
2668            if let Some(type_node) = recv_child.child_by_field_name("type") {
2669                return extract_go_type_identifier(type_node, source);
2670            }
2671        }
2672    }
2673    None
2674}
2675
2676/// Recursively extract the type_identifier from a Go type node,
2677/// handling pointer_type wrappers.
2678fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2679    match type_node.kind() {
2680        "type_identifier" => Some(node_text(type_node, source).to_string()),
2681        "pointer_type" => {
2682            // pointer_type has a single named child which is the underlying type
2683            let mut cursor = type_node.walk();
2684            for child in type_node.children(&mut cursor) {
2685                if child.is_named() {
2686                    return extract_go_type_identifier(child, source);
2687                }
2688            }
2689            None
2690        }
2691        _ => None,
2692    }
2693}
2694
2695fn extract_class_nodes_recursive(
2696    node: Node,
2697    source: &[u8],
2698    classes: &mut Vec<ClassNode>,
2699    lang: Language,
2700    func_kinds: &[&str],
2701    class_kinds: &[&str],
2702    body_kinds: &[&str],
2703) {
2704    let kind = node.kind();
2705
2706    if class_kinds.contains(&kind) {
2707        if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2708            classes.push(class_node);
2709        }
2710        return; // Don't recurse into class children for nested classes at this level
2711    }
2712
2713    for child in node.children(&mut node.walk()) {
2714        extract_class_nodes_recursive(
2715            child,
2716            source,
2717            classes,
2718            lang,
2719            func_kinds,
2720            class_kinds,
2721            body_kinds,
2722        );
2723    }
2724}
2725
2726/// Build a ClassNode from a tree-sitter class node.
2727fn build_class_node(
2728    node: Node,
2729    source: &[u8],
2730    lang: Language,
2731    func_kinds: &[&str],
2732    body_kinds: &[&str],
2733) -> Option<ClassNode> {
2734    // Get class name
2735    let class_name = node
2736        .child_by_field_name("name")
2737        .map(|n| node_text(n, source).to_string())
2738        .or_else(|| {
2739            // Go: type_declaration has no "name" field; the name is in
2740            // the child type_spec node's "name" field.
2741            if lang == Language::Go && node.kind() == "type_declaration" {
2742                let mut cursor = node.walk();
2743                for child in node.children(&mut cursor) {
2744                    if child.kind() == "type_spec" {
2745                        if let Some(name_node) = child.child_by_field_name("name") {
2746                            return Some(node_text(name_node, source).to_string());
2747                        }
2748                    }
2749                }
2750            }
2751            // Fallback: search for first identifier child
2752            let mut cursor = node.walk();
2753            for child in node.children(&mut cursor) {
2754                if child.kind() == "identifier"
2755                    || child.kind() == "type_identifier"
2756                    || child.kind() == "constant"
2757                {
2758                    return Some(node_text(child, source).to_string());
2759                }
2760            }
2761            None
2762        })?;
2763
2764    if class_name.is_empty() {
2765        return None;
2766    }
2767
2768    let line = node.start_position().row as u32 + 1;
2769    let end_line = node.end_position().row as u32 + 1;
2770    let column = node.start_position().column as u32;
2771    let body = node_text(node, source).to_string();
2772    let normalized_body = normalize_body(&body);
2773
2774    // Extract base classes
2775    let bases = extract_bases(node, source, lang);
2776
2777    // Extract methods and fields from class body
2778    let mut methods = Vec::new();
2779    let mut fields = Vec::new();
2780
2781    for child in node.children(&mut node.walk()) {
2782        if body_kinds.contains(&child.kind()) {
2783            extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2784        }
2785    }
2786
2787    Some(ClassNode {
2788        name: class_name,
2789        line,
2790        end_line,
2791        column,
2792        body,
2793        normalized_body,
2794        methods,
2795        fields,
2796        bases,
2797    })
2798}
2799
2800/// Extract base classes from a class definition node.
2801fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2802    let mut bases = Vec::new();
2803
2804    match lang {
2805        Language::Python => {
2806            // Python: class Foo(Base1, Base2):
2807            // Look for argument_list or superclasses
2808            if let Some(superclasses) = node.child_by_field_name("superclasses") {
2809                for child in superclasses.children(&mut superclasses.walk()) {
2810                    let text = node_text(child, source).trim().to_string();
2811                    if !text.is_empty() && text != "(" && text != ")" && text != "," {
2812                        bases.push(text);
2813                    }
2814                }
2815            }
2816        }
2817        _ => {
2818            // For other languages, base extraction would be different
2819            // For now, only Python is fully supported for class-level diff
2820        }
2821    }
2822
2823    bases
2824}
2825
2826/// Extract methods and fields from a class body.
2827fn extract_class_members(
2828    body_node: Node,
2829    source: &[u8],
2830    lang: Language,
2831    func_kinds: &[&str],
2832    methods: &mut Vec<ExtractedNode>,
2833    fields: &mut Vec<FieldNode>,
2834) {
2835    for child in body_node.children(&mut body_node.walk()) {
2836        let kind = child.kind();
2837
2838        // Extract methods
2839        if func_kinds.contains(&kind) {
2840            let source_str = std::str::from_utf8(source).unwrap_or("");
2841            if let Some(func_name) = get_function_name(child, lang, source_str) {
2842                let params = child
2843                    .child_by_field_name("parameters")
2844                    .or_else(|| child.child_by_field_name("formal_parameters"))
2845                    .map(|p| node_text(p, source).to_string())
2846                    .unwrap_or_default();
2847
2848                let line = child.start_position().row as u32 + 1;
2849                let end_line = child.end_position().row as u32 + 1;
2850                let column = child.start_position().column as u32;
2851                let body = node_text(child, source).to_string();
2852
2853                let extracted =
2854                    ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2855                        .with_params(params)
2856                        .with_method_kind();
2857
2858                methods.push(extracted);
2859            }
2860        }
2861        // Extract fields (Python: expression_statement with assignment)
2862        else if kind == "expression_statement" {
2863            if let Some(field) = extract_field_from_statement(child, source, lang) {
2864                fields.push(field);
2865            }
2866        }
2867    }
2868}
2869
2870/// Extract a field from a statement node (e.g., `timeout = 30`).
2871fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2872    // Look for assignment in this expression_statement
2873    for child in node.children(&mut node.walk()) {
2874        if child.kind() == "assignment" {
2875            // Get the left side (field name)
2876            if let Some(left) = child.child_by_field_name("left") {
2877                let name = node_text(left, source).trim().to_string();
2878                if !name.is_empty() && !name.contains('.') {
2879                    // Skip `self.x = ...` (those are instance vars, not class fields)
2880                    let line = node.start_position().row as u32 + 1;
2881                    let column = node.start_position().column as u32;
2882                    let body = node_text(node, source).to_string();
2883                    let normalized_body = body.trim().to_string();
2884
2885                    return Some(FieldNode {
2886                        name,
2887                        line,
2888                        column,
2889                        body,
2890                        normalized_body,
2891                    });
2892                }
2893            }
2894        }
2895    }
2896    None
2897}
2898
2899/// Detect changes between two sets of class nodes.
2900fn detect_class_changes(
2901    classes_a: &[ClassNode],
2902    classes_b: &[ClassNode],
2903    file_a: &Path,
2904    file_b: &Path,
2905    _semantic_only: bool,
2906) -> Vec<ASTChange> {
2907    let mut changes = Vec::new();
2908
2909    // review-followup-v1 (Concern 1): build a multi-value index keyed by class
2910    // name so duplicate class names (nested Python `Config` inside two
2911    // different parents, Kotlin / C# inner types, namespace-shadowing names)
2912    // pair up by structural identity instead of collapsing into a single
2913    // map entry. The previous `HashMap<&str, &ClassNode>` kept only the
2914    // *last* class per name, so `tldr diff <file> <file>` produced false
2915    // positives for files with duplicate class names. Mirrors the upgrade
2916    // applied to `detect_changes` in real-repo-fixes-v1 (P9.BUG-R8).
2917    let mut index_b: HashMap<&str, Vec<usize>> = HashMap::new();
2918    for (j, c) in classes_b.iter().enumerate() {
2919        index_b.entry(c.name.as_str()).or_default().push(j);
2920    }
2921
2922    // Track which classes have been matched
2923    let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2924    let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2925
2926    // First pass: exact name matches with stable best-of pairing.
2927    //
2928    // For each A class, pick the unmatched B class with the same name that
2929    // best matches by (body, line) — in that priority. Self-diff (every
2930    // A == every B) lands on the line-aligned twin every time, so two
2931    // duplicate-named classes pair to themselves and `total_changes == 0`.
2932    // The pairing key uses `normalized_body` and `end_line - line` span
2933    // alongside the start-line distance to break ties between two `Config`
2934    // classes in the same file.
2935    for (i, class_a) in classes_a.iter().enumerate() {
2936        let candidates = match index_b.get(class_a.name.as_str()) {
2937            Some(c) => c,
2938            None => continue,
2939        };
2940
2941        let chosen = candidates
2942            .iter()
2943            .copied()
2944            .filter(|&j| !matched_b[j])
2945            .min_by_key(|&j| {
2946                let c_b = &classes_b[j];
2947                // Lower is better. Priority order: same body shape, then
2948                // closest end-line span, then closest start-line.
2949                let body_mismatch = (class_a.normalized_body != c_b.normalized_body) as u32;
2950                let raw_body_mismatch = (class_a.body != c_b.body) as u32;
2951                let span_a = (class_a.end_line as i64 - class_a.line as i64).unsigned_abs() as u32;
2952                let span_b = (c_b.end_line as i64 - c_b.line as i64).unsigned_abs() as u32;
2953                let span_diff = (span_a as i64 - span_b as i64).unsigned_abs() as u32;
2954                let line_diff = (class_a.line as i64 - c_b.line as i64).unsigned_abs() as u32;
2955                (body_mismatch, raw_body_mismatch, span_diff, line_diff)
2956            });
2957
2958        if let Some(j) = chosen {
2959            matched_a[i] = true;
2960            matched_b[j] = true;
2961            let class_b = &classes_b[j];
2962
2963            // Diff the matched pair
2964            if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2965                changes.push(change);
2966            }
2967        }
2968    }
2969
2970    // Collect unmatched classes
2971    let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2972        .iter()
2973        .enumerate()
2974        .filter(|(i, _)| !matched_a[*i])
2975        .collect();
2976    let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2977        .iter()
2978        .enumerate()
2979        .filter(|(i, _)| !matched_b[*i])
2980        .collect();
2981
2982    // Second pass: detect renames (same member signatures, different name)
2983    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2984
2985    for (_, class_a) in &unmatched_a {
2986        let mut best_match: Option<(usize, f64)> = None;
2987
2988        for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2989            if used_b[j] {
2990                continue;
2991            }
2992
2993            let similarity = compute_class_similarity(class_a, class_b);
2994            if similarity >= RENAME_SIMILARITY_THRESHOLD
2995                && (best_match.is_none() || similarity > best_match.unwrap().1)
2996            {
2997                best_match = Some((j, similarity));
2998            }
2999        }
3000
3001        if let Some((j, similarity)) = best_match {
3002            let (_, class_b) = unmatched_b[j];
3003            used_b[j] = true;
3004
3005            changes.push(ASTChange {
3006                change_type: ChangeType::Rename,
3007                node_kind: NodeKind::Class,
3008                name: Some(class_a.name.clone()),
3009                old_location: Some(Location::with_column(
3010                    file_a.display().to_string(),
3011                    class_a.line,
3012                    class_a.column,
3013                )),
3014                new_location: Some(Location::with_column(
3015                    file_b.display().to_string(),
3016                    class_b.line,
3017                    class_b.column,
3018                )),
3019                old_text: Some(class_a.name.clone()),
3020                new_text: Some(class_b.name.clone()),
3021                similarity: Some(similarity),
3022                children: None,
3023                base_changes: None,
3024            });
3025        }
3026    }
3027
3028    // Remaining unmatched in A are deletes
3029    for (_, class_a) in &unmatched_a {
3030        let is_renamed = changes
3031            .iter()
3032            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
3033        if !is_renamed {
3034            changes.push(ASTChange {
3035                change_type: ChangeType::Delete,
3036                node_kind: NodeKind::Class,
3037                name: Some(class_a.name.clone()),
3038                old_location: Some(Location::with_column(
3039                    file_a.display().to_string(),
3040                    class_a.line,
3041                    class_a.column,
3042                )),
3043                new_location: None,
3044                old_text: None,
3045                new_text: None,
3046                similarity: None,
3047                children: None,
3048                base_changes: None,
3049            });
3050        }
3051    }
3052
3053    // Remaining unmatched in B are inserts
3054    for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
3055        if !used_b[j] {
3056            changes.push(ASTChange {
3057                change_type: ChangeType::Insert,
3058                node_kind: NodeKind::Class,
3059                name: Some(class_b.name.clone()),
3060                old_location: None,
3061                new_location: Some(Location::with_column(
3062                    file_b.display().to_string(),
3063                    class_b.line,
3064                    class_b.column,
3065                )),
3066                old_text: None,
3067                new_text: None,
3068                similarity: None,
3069                children: None,
3070                base_changes: None,
3071            });
3072        }
3073    }
3074
3075    // Sort changes: deletes first, then renames, updates, inserts
3076    changes.sort_by_key(|c| match c.change_type {
3077        ChangeType::Delete => 0,
3078        ChangeType::Rename => 1,
3079        ChangeType::Update => 2,
3080        ChangeType::Move => 3,
3081        ChangeType::Insert => 4,
3082        _ => 5,
3083    });
3084
3085    changes
3086}
3087
3088/// Diff two matched classes and produce an ASTChange if they differ.
3089fn diff_class_pair(
3090    class_a: &ClassNode,
3091    class_b: &ClassNode,
3092    file_a: &Path,
3093    file_b: &Path,
3094) -> Option<ASTChange> {
3095    let mut children = Vec::new();
3096    let mut has_changes = false;
3097
3098    // 1. Diff methods
3099    diff_methods(
3100        &class_a.methods,
3101        &class_b.methods,
3102        file_a,
3103        file_b,
3104        &mut children,
3105    );
3106
3107    // 2. Diff fields
3108    diff_fields(
3109        &class_a.fields,
3110        &class_b.fields,
3111        file_a,
3112        file_b,
3113        &mut children,
3114    );
3115
3116    // 3. Diff base classes
3117    let base_changes = diff_bases(&class_a.bases, &class_b.bases);
3118
3119    if !children.is_empty() {
3120        has_changes = true;
3121    }
3122    if base_changes.is_some() {
3123        has_changes = true;
3124    }
3125
3126    if !has_changes {
3127        return None; // Classes are identical
3128    }
3129
3130    Some(ASTChange {
3131        change_type: ChangeType::Update,
3132        node_kind: NodeKind::Class,
3133        name: Some(class_a.name.clone()),
3134        old_location: Some(Location::with_column(
3135            file_a.display().to_string(),
3136            class_a.line,
3137            class_a.column,
3138        )),
3139        new_location: Some(Location::with_column(
3140            file_b.display().to_string(),
3141            class_b.line,
3142            class_b.column,
3143        )),
3144        old_text: None,
3145        new_text: None,
3146        similarity: None,
3147        children: if children.is_empty() {
3148            None
3149        } else {
3150            Some(children)
3151        },
3152        base_changes,
3153    })
3154}
3155
3156/// Diff methods between two matched classes.
3157fn diff_methods(
3158    methods_a: &[ExtractedNode],
3159    methods_b: &[ExtractedNode],
3160    file_a: &Path,
3161    file_b: &Path,
3162    children: &mut Vec<ASTChange>,
3163) {
3164    let map_b: HashMap<&str, &ExtractedNode> =
3165        methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3166
3167    let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3168    let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3169
3170    // Exact name match
3171    for (i, method_a) in methods_a.iter().enumerate() {
3172        if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3173            matched_a[i] = true;
3174            if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3175                matched_b[j] = true;
3176            }
3177
3178            // Check if body changed
3179            if method_a.normalized_body != method_b.normalized_body {
3180                children.push(ASTChange {
3181                    change_type: ChangeType::Update,
3182                    node_kind: NodeKind::Method,
3183                    name: Some(method_a.name.clone()),
3184                    old_location: Some(Location::with_column(
3185                        file_a.display().to_string(),
3186                        method_a.line,
3187                        method_a.column,
3188                    )),
3189                    new_location: Some(Location::with_column(
3190                        file_b.display().to_string(),
3191                        method_b.line,
3192                        method_b.column,
3193                    )),
3194                    old_text: None,
3195                    new_text: None,
3196                    similarity: Some(compute_similarity(
3197                        &method_a.normalized_body,
3198                        &method_b.normalized_body,
3199                    )),
3200                    children: None,
3201                    base_changes: None,
3202                });
3203            }
3204        }
3205    }
3206
3207    // Collect unmatched
3208    let unmatched_a: Vec<&ExtractedNode> = methods_a
3209        .iter()
3210        .enumerate()
3211        .filter(|(i, _)| !matched_a[*i])
3212        .map(|(_, m)| m)
3213        .collect();
3214    let unmatched_b: Vec<&ExtractedNode> = methods_b
3215        .iter()
3216        .enumerate()
3217        .filter(|(i, _)| !matched_b[*i])
3218        .map(|(_, m)| m)
3219        .collect();
3220
3221    // Rename detection among unmatched methods
3222    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3223
3224    for method_a in &unmatched_a {
3225        let mut best_match: Option<(usize, f64)> = None;
3226
3227        for (j, method_b) in unmatched_b.iter().enumerate() {
3228            if used_b[j] {
3229                continue;
3230            }
3231            let similarity =
3232                compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3233            if similarity >= RENAME_SIMILARITY_THRESHOLD
3234                && (best_match.is_none() || similarity > best_match.unwrap().1)
3235            {
3236                best_match = Some((j, similarity));
3237            }
3238        }
3239
3240        if let Some((j, similarity)) = best_match {
3241            let method_b = unmatched_b[j];
3242            used_b[j] = true;
3243
3244            children.push(ASTChange {
3245                change_type: ChangeType::Rename,
3246                node_kind: NodeKind::Method,
3247                name: Some(method_a.name.clone()),
3248                old_location: Some(Location::with_column(
3249                    file_a.display().to_string(),
3250                    method_a.line,
3251                    method_a.column,
3252                )),
3253                new_location: Some(Location::with_column(
3254                    file_b.display().to_string(),
3255                    method_b.line,
3256                    method_b.column,
3257                )),
3258                old_text: Some(method_a.name.clone()),
3259                new_text: Some(method_b.name.clone()),
3260                similarity: Some(similarity),
3261                children: None,
3262                base_changes: None,
3263            });
3264        }
3265    }
3266
3267    // Remaining unmatched in A are deletes
3268    for method_a in &unmatched_a {
3269        let is_renamed = children.iter().any(|c| {
3270            c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3271        });
3272        if !is_renamed {
3273            children.push(ASTChange {
3274                change_type: ChangeType::Delete,
3275                node_kind: NodeKind::Method,
3276                name: Some(method_a.name.clone()),
3277                old_location: Some(Location::with_column(
3278                    file_a.display().to_string(),
3279                    method_a.line,
3280                    method_a.column,
3281                )),
3282                new_location: None,
3283                old_text: None,
3284                new_text: None,
3285                similarity: None,
3286                children: None,
3287                base_changes: None,
3288            });
3289        }
3290    }
3291
3292    // Remaining unmatched in B are inserts
3293    for (j, method_b) in unmatched_b.iter().enumerate() {
3294        if !used_b[j] {
3295            children.push(ASTChange {
3296                change_type: ChangeType::Insert,
3297                node_kind: NodeKind::Method,
3298                name: Some(method_b.name.clone()),
3299                old_location: None,
3300                new_location: Some(Location::with_column(
3301                    file_b.display().to_string(),
3302                    method_b.line,
3303                    method_b.column,
3304                )),
3305                old_text: None,
3306                new_text: None,
3307                similarity: None,
3308                children: None,
3309                base_changes: None,
3310            });
3311        }
3312    }
3313}
3314
3315/// Diff fields between two matched classes.
3316fn diff_fields(
3317    fields_a: &[FieldNode],
3318    fields_b: &[FieldNode],
3319    file_a: &Path,
3320    file_b: &Path,
3321    children: &mut Vec<ASTChange>,
3322) {
3323    let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3324
3325    let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3326    let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3327
3328    // Exact name match
3329    for (i, field_a) in fields_a.iter().enumerate() {
3330        if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3331            matched_a[i] = true;
3332            if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3333                matched_b[j] = true;
3334            }
3335
3336            // Check if value changed
3337            if field_a.normalized_body != field_b.normalized_body {
3338                children.push(ASTChange {
3339                    change_type: ChangeType::Update,
3340                    node_kind: NodeKind::Field,
3341                    name: Some(field_a.name.clone()),
3342                    old_location: Some(Location::with_column(
3343                        file_a.display().to_string(),
3344                        field_a.line,
3345                        field_a.column,
3346                    )),
3347                    new_location: Some(Location::with_column(
3348                        file_b.display().to_string(),
3349                        field_b.line,
3350                        field_b.column,
3351                    )),
3352                    old_text: Some(field_a.body.trim().to_string()),
3353                    new_text: Some(field_b.body.trim().to_string()),
3354                    similarity: None,
3355                    children: None,
3356                    base_changes: None,
3357                });
3358            }
3359        }
3360    }
3361
3362    // Remaining unmatched in A are deletes
3363    for (i, field_a) in fields_a.iter().enumerate() {
3364        if !matched_a[i] {
3365            children.push(ASTChange {
3366                change_type: ChangeType::Delete,
3367                node_kind: NodeKind::Field,
3368                name: Some(field_a.name.clone()),
3369                old_location: Some(Location::with_column(
3370                    file_a.display().to_string(),
3371                    field_a.line,
3372                    field_a.column,
3373                )),
3374                new_location: None,
3375                old_text: None,
3376                new_text: None,
3377                similarity: None,
3378                children: None,
3379                base_changes: None,
3380            });
3381        }
3382    }
3383
3384    // Remaining unmatched in B are inserts
3385    for (j, field_b) in fields_b.iter().enumerate() {
3386        if !matched_b[j] {
3387            children.push(ASTChange {
3388                change_type: ChangeType::Insert,
3389                node_kind: NodeKind::Field,
3390                name: Some(field_b.name.clone()),
3391                old_location: None,
3392                new_location: Some(Location::with_column(
3393                    file_b.display().to_string(),
3394                    field_b.line,
3395                    field_b.column,
3396                )),
3397                old_text: None,
3398                new_text: None,
3399                similarity: None,
3400                children: None,
3401                base_changes: None,
3402            });
3403        }
3404    }
3405}
3406
3407/// Diff base classes between two matched classes.
3408fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3409    let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3410    let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3411
3412    let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3413    let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3414
3415    if added.is_empty() && removed.is_empty() {
3416        None
3417    } else {
3418        Some(BaseChanges { added, removed })
3419    }
3420}
3421
3422/// Compute similarity between two classes based on their member signatures.
3423fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3424    // Collect method names + normalized bodies
3425    let method_sigs_a: std::collections::HashSet<String> = class_a
3426        .methods
3427        .iter()
3428        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3429        .collect();
3430    let method_sigs_b: std::collections::HashSet<String> = class_b
3431        .methods
3432        .iter()
3433        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3434        .collect();
3435
3436    let field_sigs_a: std::collections::HashSet<String> = class_a
3437        .fields
3438        .iter()
3439        .map(|f| f.normalized_body.clone())
3440        .collect();
3441    let field_sigs_b: std::collections::HashSet<String> = class_b
3442        .fields
3443        .iter()
3444        .map(|f| f.normalized_body.clone())
3445        .collect();
3446
3447    // Combined Jaccard similarity
3448    let all_a: std::collections::HashSet<&String> =
3449        method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3450    let all_b: std::collections::HashSet<&String> =
3451        method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3452
3453    if all_a.is_empty() && all_b.is_empty() {
3454        // Both empty classes - consider identical
3455        return 1.0;
3456    }
3457
3458    let intersection = all_a.intersection(&all_b).count();
3459    let union = all_a.union(&all_b).count();
3460
3461    if union == 0 {
3462        0.0
3463    } else {
3464        intersection as f64 / union as f64
3465    }
3466}
3467
3468// =============================================================================
3469// L6: File-Level Diff
3470// =============================================================================
3471
3472/// Recognized source file extensions for directory walking.
3473const SOURCE_EXTENSIONS: &[&str] = &[
3474    "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3475    "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3476];
3477
3478/// Walk a directory and collect source files with their relative paths.
3479fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3480    let mut files = Vec::new();
3481    collect_source_files_recursive(root, root, &mut files)?;
3482    files.sort_by(|a, b| a.0.cmp(&b.0));
3483    Ok(files)
3484}
3485
3486fn collect_source_files_recursive(
3487    root: &Path,
3488    current: &Path,
3489    files: &mut Vec<(String, PathBuf)>,
3490) -> Result<()> {
3491    for entry in fs::read_dir(current)? {
3492        let entry = entry?;
3493        let path = entry.path();
3494        if path.is_dir() {
3495            collect_source_files_recursive(root, &path, files)?;
3496        } else if path.is_file() {
3497            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3498                if SOURCE_EXTENSIONS.contains(&ext) {
3499                    let rel = path
3500                        .strip_prefix(root)
3501                        .unwrap_or(&path)
3502                        .to_string_lossy()
3503                        .replace('\\', "/");
3504                    files.push((rel, path));
3505                }
3506            }
3507        }
3508    }
3509    Ok(())
3510}
3511
3512/// Compute a structural fingerprint for a source file.
3513///
3514/// The fingerprint is a hash of the sorted list of function/class signatures
3515/// extracted via tree-sitter. Two files with the same structural definitions
3516/// (regardless of whitespace/comments) produce the same fingerprint.
3517fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3518    let lang = match Language::from_path(path) {
3519        Some(l) => l,
3520        None => {
3521            // Fallback: hash the raw content for unsupported languages
3522            let content = fs::read_to_string(path)?;
3523            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3524            content.hash(&mut hasher);
3525            return Ok((hasher.finish(), vec![]));
3526        }
3527    };
3528
3529    let source = fs::read_to_string(path)?;
3530    let pool = ParserPool::new();
3531    let tree = match pool.parse(&source, lang) {
3532        Ok(t) => t,
3533        Err(_) => {
3534            // Parse failure: hash raw content
3535            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3536            source.hash(&mut hasher);
3537            return Ok((hasher.finish(), vec![]));
3538        }
3539    };
3540
3541    let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3542
3543    // Build sorted list of signatures: "kind:name(params)|body_hash"
3544    // We include a hash of the normalized body so that body-only changes
3545    // (same name/params but different implementation) alter the fingerprint.
3546    let mut signatures: Vec<String> = nodes
3547        .iter()
3548        .map(|n| {
3549            let kind = match n.kind {
3550                NodeKind::Function => "fn",
3551                NodeKind::Class => "class",
3552                NodeKind::Method => "method",
3553                NodeKind::Field => "field",
3554                _ => "other",
3555            };
3556            let sig = if n.params.is_empty() {
3557                format!("{}:{}", kind, n.name)
3558            } else {
3559                format!("{}:{}({})", kind, n.name, n.params)
3560            };
3561            // Append a body hash so body-only changes are detected
3562            let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3563            n.normalized_body.hash(&mut body_hasher);
3564            format!("{}|{}", sig, body_hasher.finish())
3565        })
3566        .collect();
3567    signatures.sort();
3568
3569    let mut hasher = std::collections::hash_map::DefaultHasher::new();
3570    for sig in &signatures {
3571        sig.hash(&mut hasher);
3572    }
3573    let fingerprint = hasher.finish();
3574
3575    Ok((fingerprint, signatures))
3576}
3577
3578/// Run L6 file-level diff between two directories.
3579fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3580    let files_a = collect_source_files(dir_a)?;
3581    let files_b = collect_source_files(dir_b)?;
3582
3583    // Build maps: relative_path -> full_path
3584    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3585    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3586
3587    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3588
3589    let mut file_changes = Vec::new();
3590    let mut has_any_change = false;
3591
3592    for rel_path in all_paths {
3593        match (map_a.get(rel_path), map_b.get(rel_path)) {
3594            (Some(path_a), Some(path_b)) => {
3595                // File exists in both directories
3596                let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3597                let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3598
3599                if fp_a == fp_b {
3600                    // Identical structure - skip or include as no-change
3601                    // (tests filter these out anyway)
3602                } else {
3603                    has_any_change = true;
3604                    // Find which signatures differ
3605                    let set_a: HashSet<&String> = sigs_a.iter().collect();
3606                    let set_b: HashSet<&String> = sigs_b.iter().collect();
3607                    let changed: Vec<String> = set_a
3608                        .symmetric_difference(&set_b)
3609                        .map(|s| (*s).clone())
3610                        .collect();
3611
3612                    file_changes.push(FileLevelChange {
3613                        relative_path: rel_path.to_string(),
3614                        change_type: ChangeType::Update,
3615                        old_fingerprint: Some(fp_a),
3616                        new_fingerprint: Some(fp_b),
3617                        signature_changes: if changed.is_empty() {
3618                            None
3619                        } else {
3620                            Some(changed)
3621                        },
3622                    });
3623                }
3624            }
3625            (None, Some(path_b)) => {
3626                // Added file
3627                has_any_change = true;
3628                let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3629                file_changes.push(FileLevelChange {
3630                    relative_path: rel_path.to_string(),
3631                    change_type: ChangeType::Insert,
3632                    old_fingerprint: None,
3633                    new_fingerprint: Some(fp_b),
3634                    signature_changes: None,
3635                });
3636            }
3637            (Some(path_a), None) => {
3638                // Removed file
3639                has_any_change = true;
3640                let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3641                file_changes.push(FileLevelChange {
3642                    relative_path: rel_path.to_string(),
3643                    change_type: ChangeType::Delete,
3644                    old_fingerprint: Some(fp_a),
3645                    new_fingerprint: None,
3646                    signature_changes: None,
3647                });
3648            }
3649            (None, None) => unreachable!(),
3650        }
3651    }
3652
3653    Ok(DiffReport {
3654        file_a: dir_a.display().to_string(),
3655        file_b: dir_b.display().to_string(),
3656        identical: !has_any_change,
3657        changes: Vec::new(),
3658        summary: None,
3659        granularity: DiffGranularity::File,
3660        file_changes: Some(file_changes),
3661        module_changes: None,
3662        import_graph_summary: None,
3663        arch_changes: None,
3664        arch_summary: None,
3665    })
3666}
3667
3668// =============================================================================
3669// L7: Module-Level Diff
3670// =============================================================================
3671
3672/// An import edge used internally during graph building.
3673#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3674struct InternalImportEdge {
3675    source_file: String,
3676    target_module: String,
3677    imported_names: Vec<String>,
3678}
3679
3680/// Parse Python import statements from a file using regex.
3681///
3682/// Recognizes:
3683/// - `from X import Y, Z`
3684/// - `import X`
3685fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3686    let mut edges = Vec::new();
3687
3688    // Match "from X import Y, Z"
3689    let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3690    for cap in from_re.captures_iter(source) {
3691        let target = cap[1].to_string();
3692        let names_str = &cap[2];
3693        let names: Vec<String> = names_str
3694            .split(',')
3695            .map(|n| n.trim().to_string())
3696            .filter(|n| !n.is_empty())
3697            .collect();
3698        edges.push(InternalImportEdge {
3699            source_file: relative_path.to_string(),
3700            target_module: target,
3701            imported_names: names,
3702        });
3703    }
3704
3705    // Match "import X" (but not "from X import Y" which is already handled)
3706    let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3707    for cap in import_re.captures_iter(source) {
3708        let target = cap[1].to_string();
3709        edges.push(InternalImportEdge {
3710            source_file: relative_path.to_string(),
3711            target_module: target,
3712            imported_names: vec![],
3713        });
3714    }
3715
3716    edges
3717}
3718
3719/// Parse imports for a single file using CallGraphLanguageSupport.
3720///
3721/// Returns `Some(edges)` if a handler could parse the file, `None` otherwise.
3722/// On handler parse failure for Python files, falls back to regex parsing.
3723fn parse_file_imports(
3724    registry: &LanguageRegistry,
3725    source: &str,
3726    full_path: &Path,
3727    rel_path: &str,
3728) -> Vec<InternalImportEdge> {
3729    let ext = match full_path.extension().and_then(|e| e.to_str()) {
3730        Some(e) => format!(".{}", e),
3731        None => return Vec::new(),
3732    };
3733
3734    let is_python = ext == ".py" || ext == ".pyi";
3735
3736    // Try the language handler from the registry
3737    if let Some(handler) = registry.get_by_extension(&ext) {
3738        if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3739            return import_defs
3740                .into_iter()
3741                .map(|def| InternalImportEdge {
3742                    source_file: rel_path.to_string(),
3743                    target_module: def.module,
3744                    imported_names: def.names,
3745                })
3746                .collect();
3747        }
3748    }
3749
3750    // Fallback: regex-based parsing for Python files only
3751    if is_python {
3752        return parse_python_imports(source, rel_path);
3753    }
3754
3755    Vec::new()
3756}
3757
3758/// Build import graph for all source files in a directory.
3759///
3760/// Uses `CallGraphLanguageSupport::parse_imports()` from tldr-core for
3761/// multi-language support (Python, TypeScript, Go, Rust, Java, C#, etc.).
3762/// Falls back to regex-based `parse_python_imports()` for Python files
3763/// when the core API fails, and skips import parsing for files whose
3764/// language is unsupported or whose handler returns an error.
3765fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3766    let files = collect_source_files(root)?;
3767    let registry = LanguageRegistry::with_defaults();
3768    let mut all_edges = Vec::new();
3769
3770    for (rel_path, full_path) in &files {
3771        let source = fs::read_to_string(full_path)?;
3772        let edges = parse_file_imports(&registry, &source, full_path, rel_path);
3773        all_edges.extend(edges);
3774    }
3775
3776    Ok(all_edges)
3777}
3778
3779/// Convert an internal edge to the public ImportEdge type.
3780fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3781    ImportEdge {
3782        source_file: edge.source_file.clone(),
3783        target_module: edge.target_module.clone(),
3784        imported_names: edge.imported_names.clone(),
3785    }
3786}
3787
3788/// Create a comparable key for an import edge (for set operations).
3789fn edge_key(edge: &InternalImportEdge) -> String {
3790    format!(
3791        "{}->{}:{}",
3792        edge.source_file,
3793        edge.target_module,
3794        edge.imported_names.join(",")
3795    )
3796}
3797
3798/// Run L7 module-level diff between two directories.
3799fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3800    // Build import graphs
3801    let edges_a = build_import_graph(dir_a)?;
3802    let edges_b = build_import_graph(dir_b)?;
3803
3804    // Build edge key sets for comparison
3805    let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3806    let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3807
3808    // Edges added (in B but not in A)
3809    let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3810    let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3811
3812    // Get added/removed edges
3813    let added_edges: Vec<&InternalImportEdge> = edges_b
3814        .iter()
3815        .filter(|e| added_keys.contains(&edge_key(e)))
3816        .collect();
3817    let removed_edges: Vec<&InternalImportEdge> = edges_a
3818        .iter()
3819        .filter(|e| removed_keys.contains(&edge_key(e)))
3820        .collect();
3821
3822    // Also run L6 file-level diff for context
3823    let files_a = collect_source_files(dir_a)?;
3824    let files_b = collect_source_files(dir_b)?;
3825    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3826    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3827    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3828
3829    // Build per-module changes
3830    let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3831    let mut modules_with_import_changes = 0usize;
3832
3833    for rel_path in &all_paths {
3834        let in_a = map_a.contains_key(rel_path);
3835        let in_b = map_b.contains_key(rel_path);
3836
3837        // Determine module change type
3838        let change_type = if !in_a && in_b {
3839            ChangeType::Insert
3840        } else if in_a && !in_b {
3841            ChangeType::Delete
3842        } else {
3843            ChangeType::Update
3844        };
3845
3846        // Gather import changes for this module
3847        let mod_added: Vec<ImportEdge> = added_edges
3848            .iter()
3849            .filter(|e| e.source_file == *rel_path)
3850            .map(|e| to_public_edge(e))
3851            .collect();
3852        let mod_removed: Vec<ImportEdge> = removed_edges
3853            .iter()
3854            .filter(|e| e.source_file == *rel_path)
3855            .map(|e| to_public_edge(e))
3856            .collect();
3857
3858        // Compute file-level change if both exist
3859        let file_change = if in_a && in_b {
3860            let path_a = map_a[rel_path];
3861            let path_b = map_b[rel_path];
3862            let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3863            let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3864            if fp_a != fp_b {
3865                let set_a: HashSet<&String> = sigs_a.iter().collect();
3866                let set_b: HashSet<&String> = sigs_b.iter().collect();
3867                let changed: Vec<String> = set_a
3868                    .symmetric_difference(&set_b)
3869                    .map(|s| (*s).clone())
3870                    .collect();
3871                Some(FileLevelChange {
3872                    relative_path: rel_path.to_string(),
3873                    change_type: ChangeType::Update,
3874                    old_fingerprint: Some(fp_a),
3875                    new_fingerprint: Some(fp_b),
3876                    signature_changes: if changed.is_empty() {
3877                        None
3878                    } else {
3879                        Some(changed)
3880                    },
3881                })
3882            } else {
3883                None
3884            }
3885        } else {
3886            None
3887        };
3888
3889        // Only include modules with actual changes
3890        let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3891        let has_file_change = file_change.is_some();
3892        let is_new_or_deleted =
3893            change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3894
3895        if has_import_changes || has_file_change || is_new_or_deleted {
3896            if has_import_changes {
3897                modules_with_import_changes += 1;
3898            }
3899
3900            // For new modules, all their imports count as added
3901            let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3902                // Gather all imports for this new file
3903                edges_b
3904                    .iter()
3905                    .filter(|e| e.source_file == *rel_path)
3906                    .map(to_public_edge)
3907                    .collect()
3908            } else {
3909                mod_added
3910            };
3911            // For deleted modules, all their imports count as removed
3912            let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3913                edges_a
3914                    .iter()
3915                    .filter(|e| e.source_file == *rel_path)
3916                    .map(to_public_edge)
3917                    .collect()
3918            } else {
3919                mod_removed
3920            };
3921
3922            // Recheck after expanding
3923            let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3924            if has_expanded_imports && !has_import_changes {
3925                modules_with_import_changes += 1;
3926            }
3927
3928            module_changes.push(ModuleLevelChange {
3929                module_path: rel_path.to_string(),
3930                change_type,
3931                imports_added: final_added,
3932                imports_removed: final_removed,
3933                file_change,
3934            });
3935        }
3936    }
3937
3938    let summary = ImportGraphSummary {
3939        total_edges_a: edges_a.len(),
3940        total_edges_b: edges_b.len(),
3941        edges_added: added_keys.len(),
3942        edges_removed: removed_keys.len(),
3943        modules_with_import_changes,
3944    };
3945
3946    let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3947
3948    Ok(DiffReport {
3949        file_a: dir_a.display().to_string(),
3950        file_b: dir_b.display().to_string(),
3951        identical,
3952        changes: Vec::new(),
3953        summary: None,
3954        granularity: DiffGranularity::Module,
3955        file_changes: None,
3956        module_changes: Some(module_changes),
3957        import_graph_summary: Some(summary),
3958        arch_changes: None,
3959        arch_summary: None,
3960    })
3961}
3962
3963// =============================================================================
3964// L8: Architecture-Level Diff
3965// =============================================================================
3966
3967/// Classify a directory name into an architectural layer.
3968fn classify_directory_layer(dir_name: &str) -> String {
3969    let lower = dir_name.to_lowercase();
3970    match lower.as_str() {
3971        "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3972        "core" | "models" | "domain" | "entities" => "core".to_string(),
3973        "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3974        "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3975        "services" | "service" => "service".to_string(),
3976        "tests" | "test" | "spec" | "specs" => "test".to_string(),
3977        "config" | "settings" | "conf" => "config".to_string(),
3978        "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3979        _ => "other".to_string(),
3980    }
3981}
3982
3983/// Classify a directory using import-based fan-in/fan-out analysis.
3984///
3985/// For directories whose name doesn't match a known pattern ("other"),
3986/// we use the import graph to infer the architectural role:
3987/// - High fan-out + low fan-in  -> "entry" (entry points that depend on many modules)
3988/// - Low fan-out  + high fan-in -> "utility" (leaf modules imported by many)
3989/// - Balanced                   -> "service" (intermediate layer)
3990fn classify_by_import_flow(
3991    dir_name: &str,
3992    edges: &[InternalImportEdge],
3993    all_dirs: &HashSet<String>,
3994) -> String {
3995    // Count fan-out: how many distinct external directories does this dir import from?
3996    let fan_out: usize = edges
3997        .iter()
3998        .filter(|e| {
3999            e.source_file
4000                .split('/')
4001                .next()
4002                .map(|d| d == dir_name)
4003                .unwrap_or(false)
4004        })
4005        .filter(|e| {
4006            // Target module references a different top-level directory
4007            let target_first = e
4008                .target_module
4009                .split('/')
4010                .next()
4011                .or_else(|| e.target_module.split('.').next())
4012                .unwrap_or("");
4013            all_dirs.contains(target_first) && target_first != dir_name
4014        })
4015        .map(|e| e.target_module.clone())
4016        .collect::<HashSet<_>>()
4017        .len();
4018
4019    // Count fan-in: how many edges from OTHER directories target files in this dir?
4020    let fan_in: usize = edges
4021        .iter()
4022        .filter(|e| {
4023            let source_dir = e.source_file.split('/').next().unwrap_or("");
4024            source_dir != dir_name
4025        })
4026        .filter(|e| {
4027            let target_first = e
4028                .target_module
4029                .split('/')
4030                .next()
4031                .or_else(|| e.target_module.split('.').next())
4032                .unwrap_or("");
4033            target_first == dir_name
4034        })
4035        .count();
4036
4037    if fan_in == 0 && fan_out == 0 {
4038        return "other".to_string();
4039    }
4040
4041    // Classify based on ratio
4042    if fan_out > 0 && fan_in == 0 {
4043        "entry".to_string()
4044    } else if fan_in > fan_out * 2 {
4045        "utility".to_string()
4046    } else if fan_out > fan_in * 2 {
4047        "entry".to_string()
4048    } else {
4049        "service".to_string()
4050    }
4051}
4052
4053/// Collect top-level directories containing source files, classifying each
4054/// into an architectural layer.
4055///
4056/// Uses two-pass classification:
4057/// 1. Name-based heuristic (e.g., "api/" -> api, "utils/" -> utility)
4058/// 2. Import-based fan-in/fan-out analysis for "other" directories
4059fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
4060    let mut dirs: HashMap<String, String> = HashMap::new();
4061    let files = collect_source_files(root)?;
4062
4063    // Pass 1: classify by name
4064    for (rel_path, _) in &files {
4065        if let Some(first_dir) = rel_path.split('/').next() {
4066            if rel_path.contains('/') && !dirs.contains_key(first_dir) {
4067                let layer = classify_directory_layer(first_dir);
4068                dirs.insert(first_dir.to_string(), layer);
4069            }
4070        }
4071    }
4072
4073    // Pass 2: for directories classified as "other", try import-based classification
4074    let other_dirs: Vec<String> = dirs
4075        .iter()
4076        .filter(|(_, layer)| *layer == "other")
4077        .map(|(name, _)| name.clone())
4078        .collect();
4079
4080    if !other_dirs.is_empty() {
4081        // Build import graph to analyze import flow
4082        if let Ok(edges) = build_import_graph(root) {
4083            let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
4084            for dir_name in &other_dirs {
4085                let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
4086                if inferred != "other" {
4087                    dirs.insert(dir_name.clone(), inferred);
4088                }
4089            }
4090        }
4091    }
4092
4093    Ok(dirs)
4094}
4095
4096/// Run L8 architecture-level diff between two directories.
4097fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
4098    let dirs_a = collect_arch_directories(dir_a)?;
4099    let dirs_b = collect_arch_directories(dir_b)?;
4100
4101    let all_dirs: BTreeSet<&str> = dirs_a
4102        .keys()
4103        .chain(dirs_b.keys())
4104        .map(|s| s.as_str())
4105        .collect();
4106
4107    let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
4108    let mut directories_added = 0usize;
4109    let mut directories_removed = 0usize;
4110    let mut layer_migrations = 0usize;
4111    let mut changed_dirs = 0usize;
4112    let total_dirs = all_dirs.len();
4113
4114    for dir_name in &all_dirs {
4115        let in_a = dirs_a.get(*dir_name);
4116        let in_b = dirs_b.get(*dir_name);
4117
4118        match (in_a, in_b) {
4119            (Some(layer_a), Some(layer_b)) => {
4120                if layer_a != layer_b {
4121                    // Layer migration
4122                    changed_dirs += 1;
4123                    layer_migrations += 1;
4124                    arch_changes.push(ArchLevelChange {
4125                        directory: dir_name.to_string(),
4126                        change_type: ArchChangeType::LayerMigration,
4127                        old_layer: Some(layer_a.clone()),
4128                        new_layer: Some(layer_b.clone()),
4129                        migrated_functions: Vec::new(),
4130                    });
4131                }
4132                // Same layer = no change (stable)
4133            }
4134            (None, Some(layer_b)) => {
4135                // Added directory
4136                changed_dirs += 1;
4137                directories_added += 1;
4138                arch_changes.push(ArchLevelChange {
4139                    directory: dir_name.to_string(),
4140                    change_type: ArchChangeType::Added,
4141                    old_layer: None,
4142                    new_layer: Some(layer_b.clone()),
4143                    migrated_functions: Vec::new(),
4144                });
4145            }
4146            (Some(layer_a), None) => {
4147                // Removed directory
4148                changed_dirs += 1;
4149                directories_removed += 1;
4150                arch_changes.push(ArchLevelChange {
4151                    directory: dir_name.to_string(),
4152                    change_type: ArchChangeType::Removed,
4153                    old_layer: Some(layer_a.clone()),
4154                    new_layer: None,
4155                    migrated_functions: Vec::new(),
4156                });
4157            }
4158            (None, None) => unreachable!(),
4159        }
4160    }
4161
4162    let stability_score = if total_dirs == 0 {
4163        1.0
4164    } else {
4165        1.0 - (changed_dirs as f64 / total_dirs as f64)
4166    };
4167
4168    let summary = ArchDiffSummary {
4169        layer_migrations,
4170        directories_added,
4171        directories_removed,
4172        cycles_introduced: 0,
4173        cycles_resolved: 0,
4174        stability_score,
4175    };
4176
4177    let identical = arch_changes.is_empty();
4178
4179    Ok(DiffReport {
4180        file_a: dir_a.display().to_string(),
4181        file_b: dir_b.display().to_string(),
4182        identical,
4183        changes: Vec::new(),
4184        summary: None,
4185        granularity: DiffGranularity::Architecture,
4186        file_changes: None,
4187        module_changes: None,
4188        import_graph_summary: None,
4189        arch_changes: Some(arch_changes),
4190        arch_summary: Some(summary),
4191    })
4192}
4193
4194// =============================================================================
4195// Tests
4196// =============================================================================
4197
4198#[cfg(test)]
4199mod tests {
4200    use super::*;
4201
4202    const SAMPLE_A: &str = r#"
4203def original_function(x):
4204    return x * 2
4205
4206def renamed_later(a, b):
4207    return a + b
4208
4209def will_be_deleted():
4210    return "goodbye"
4211
4212class OriginalClass:
4213    def method_one(self):
4214        return 1
4215"#;
4216
4217    const SAMPLE_B: &str = r#"
4218def original_function(x):
4219    # Modified implementation
4220    return x * 3
4221
4222def better_name(a, b):
4223    return a + b
4224
4225def new_function():
4226    return "hello"
4227
4228class OriginalClass:
4229    def method_one(self):
4230        return 1
4231
4232    def method_two(self):
4233        return 2
4234"#;
4235
4236    /// Parse Python source for tests using the language-aware ParserPool
4237    fn parse_python(source: &str) -> tree_sitter::Tree {
4238        let pool = ParserPool::new();
4239        pool.parse(source, Language::Python).unwrap()
4240    }
4241
4242    #[test]
4243    fn test_extract_nodes() {
4244        let tree = parse_python(SAMPLE_A);
4245        let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4246
4247        // Should find: original_function, renamed_later, will_be_deleted, OriginalClass, method_one
4248        assert!(
4249            nodes.len() >= 5,
4250            "Expected at least 5 nodes, got {}",
4251            nodes.len()
4252        );
4253
4254        let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4255        assert!(names.contains(&"original_function"));
4256        assert!(names.contains(&"renamed_later"));
4257        assert!(names.contains(&"will_be_deleted"));
4258        assert!(names.contains(&"OriginalClass"));
4259        assert!(names.contains(&"method_one"));
4260    }
4261
4262    #[test]
4263    fn test_detect_update() {
4264        let tree_a = parse_python(SAMPLE_A);
4265        let tree_b = parse_python(SAMPLE_B);
4266
4267        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4268        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4269
4270        let file_a = PathBuf::from("a.py");
4271        let file_b = PathBuf::from("b.py");
4272        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4273
4274        // original_function should be detected as Update
4275        let updates: Vec<_> = changes
4276            .iter()
4277            .filter(|c| c.change_type == ChangeType::Update)
4278            .collect();
4279        assert!(!updates.is_empty(), "Should detect at least one update");
4280        assert!(
4281            updates
4282                .iter()
4283                .any(|c| c.name.as_deref() == Some("original_function")),
4284            "original_function should be marked as updated"
4285        );
4286    }
4287
4288    #[test]
4289    fn test_detect_insert() {
4290        let tree_a = parse_python(SAMPLE_A);
4291        let tree_b = parse_python(SAMPLE_B);
4292
4293        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4294        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4295
4296        let file_a = PathBuf::from("a.py");
4297        let file_b = PathBuf::from("b.py");
4298        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4299
4300        // new_function and method_two should be detected as Insert
4301        let inserts: Vec<_> = changes
4302            .iter()
4303            .filter(|c| c.change_type == ChangeType::Insert)
4304            .collect();
4305        assert!(!inserts.is_empty(), "Should detect insertions");
4306    }
4307
4308    #[test]
4309    fn test_detect_delete() {
4310        let tree_a = parse_python(SAMPLE_A);
4311        let tree_b = parse_python(SAMPLE_B);
4312
4313        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4314        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4315
4316        let file_a = PathBuf::from("a.py");
4317        let file_b = PathBuf::from("b.py");
4318        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4319
4320        // will_be_deleted should be detected as Delete
4321        let deletes: Vec<_> = changes
4322            .iter()
4323            .filter(|c| c.change_type == ChangeType::Delete)
4324            .collect();
4325        assert!(!deletes.is_empty(), "Should detect deletions");
4326        assert!(
4327            deletes
4328                .iter()
4329                .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4330            "will_be_deleted should be marked as deleted"
4331        );
4332    }
4333
4334    #[test]
4335    fn test_detect_rename() {
4336        let tree_a = parse_python(SAMPLE_A);
4337        let tree_b = parse_python(SAMPLE_B);
4338
4339        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4340        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4341
4342        let file_a = PathBuf::from("a.py");
4343        let file_b = PathBuf::from("b.py");
4344        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4345
4346        // renamed_later -> better_name should be detected as Rename
4347        let renames: Vec<_> = changes
4348            .iter()
4349            .filter(|c| c.change_type == ChangeType::Rename)
4350            .collect();
4351        assert!(!renames.is_empty(), "Should detect renames");
4352    }
4353
4354    #[test]
4355    fn test_identical_files() {
4356        let tree_a = parse_python(SAMPLE_A);
4357        let tree_b = parse_python(SAMPLE_A); // Same content
4358
4359        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4360        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4361
4362        let file_a = PathBuf::from("a.py");
4363        let file_b = PathBuf::from("b.py");
4364        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); // semantic_only
4365
4366        assert!(
4367            changes.is_empty(),
4368            "Identical files should have no semantic changes"
4369        );
4370    }
4371
4372    #[test]
4373    fn test_compute_similarity() {
4374        assert_eq!(compute_similarity("abc", "abc"), 1.0);
4375        assert_eq!(compute_similarity("", ""), 1.0); // two empty strings are equal
4376        assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); // Jaccard: 2/4 = 0.5
4377    }
4378
4379    #[test]
4380    fn test_normalize_body() {
4381        // Test that normalize_body skips the signature line and strips comments
4382        let body = "def foo():\n    # pure comment line\n    return 1  # inline comment";
4383        let normalized = normalize_body(body);
4384        // Should skip "def foo():" (first line), filter "# pure comment line" (comment-only)
4385        // and strip "# inline comment" from the return line
4386        assert!(!normalized.contains('#'), "Comments should be removed");
4387        assert!(
4388            !normalized.contains("def foo"),
4389            "Signature should be skipped"
4390        );
4391        assert!(normalized.contains("return 1"), "Body should remain");
4392    }
4393
4394    // =========================================================================
4395    // format_diff_text: L6-L8 rendering tests
4396    // =========================================================================
4397
4398    #[test]
4399    fn test_format_diff_text_renders_file_changes() {
4400        let mut report = DiffReport::new("dir_a/", "dir_b/");
4401        report.identical = false;
4402        report.file_changes = Some(vec![
4403            FileLevelChange {
4404                relative_path: "src/main.py".to_string(),
4405                change_type: ChangeType::Update,
4406                old_fingerprint: Some(12345),
4407                new_fingerprint: Some(67890),
4408                signature_changes: Some(vec!["fn foo()".to_string()]),
4409            },
4410            FileLevelChange {
4411                relative_path: "src/new_module.py".to_string(),
4412                change_type: ChangeType::Insert,
4413                old_fingerprint: None,
4414                new_fingerprint: Some(11111),
4415                signature_changes: None,
4416            },
4417            FileLevelChange {
4418                relative_path: "src/removed.py".to_string(),
4419                change_type: ChangeType::Delete,
4420                old_fingerprint: Some(99999),
4421                new_fingerprint: None,
4422                signature_changes: None,
4423            },
4424        ]);
4425
4426        let text = format_diff_text(&report);
4427        assert!(
4428            text.contains("File-Level Changes"),
4429            "Should have file-level section header"
4430        );
4431        assert!(text.contains("src/main.py"), "Should mention updated file");
4432        assert!(
4433            text.contains("src/new_module.py"),
4434            "Should mention added file"
4435        );
4436        assert!(
4437            text.contains("src/removed.py"),
4438            "Should mention removed file"
4439        );
4440    }
4441
4442    #[test]
4443    fn test_format_diff_text_renders_module_changes() {
4444        let mut report = DiffReport::new("dir_a/", "dir_b/");
4445        report.identical = false;
4446        report.module_changes = Some(vec![ModuleLevelChange {
4447            module_path: "src/utils.py".to_string(),
4448            change_type: ChangeType::Update,
4449            imports_added: vec![ImportEdge {
4450                source_file: "src/utils.py".to_string(),
4451                target_module: "os.path".to_string(),
4452                imported_names: vec!["join".to_string()],
4453            }],
4454            imports_removed: vec![],
4455            file_change: None,
4456        }]);
4457
4458        let text = format_diff_text(&report);
4459        assert!(
4460            text.contains("Module-Level Changes"),
4461            "Should have module-level section header"
4462        );
4463        assert!(
4464            text.contains("src/utils.py"),
4465            "Should mention the module path"
4466        );
4467        assert!(
4468            text.contains("os.path"),
4469            "Should mention added import target"
4470        );
4471    }
4472
4473    #[test]
4474    fn test_format_diff_text_renders_import_graph_summary() {
4475        let mut report = DiffReport::new("dir_a/", "dir_b/");
4476        report.identical = false;
4477        report.import_graph_summary = Some(ImportGraphSummary {
4478            total_edges_a: 10,
4479            total_edges_b: 15,
4480            edges_added: 7,
4481            edges_removed: 2,
4482            modules_with_import_changes: 3,
4483        });
4484
4485        let text = format_diff_text(&report);
4486        assert!(
4487            text.contains("Import Graph"),
4488            "Should have import graph section"
4489        );
4490        assert!(text.contains("7"), "Should show edges added");
4491        assert!(text.contains("2"), "Should show edges removed");
4492    }
4493
4494    #[test]
4495    fn test_format_diff_text_renders_arch_changes() {
4496        let mut report = DiffReport::new("dir_a/", "dir_b/");
4497        report.identical = false;
4498        report.arch_changes = Some(vec![
4499            ArchLevelChange {
4500                directory: "src/api/".to_string(),
4501                change_type: ArchChangeType::LayerMigration,
4502                old_layer: Some("presentation".to_string()),
4503                new_layer: Some("business".to_string()),
4504                migrated_functions: vec!["handle_request".to_string()],
4505            },
4506            ArchLevelChange {
4507                directory: "src/new_service/".to_string(),
4508                change_type: ArchChangeType::Added,
4509                old_layer: None,
4510                new_layer: Some("service".to_string()),
4511                migrated_functions: vec![],
4512            },
4513        ]);
4514
4515        let text = format_diff_text(&report);
4516        assert!(
4517            text.contains("Architecture-Level Changes"),
4518            "Should have arch section header"
4519        );
4520        assert!(
4521            text.contains("src/api/"),
4522            "Should mention migrated directory"
4523        );
4524        assert!(text.contains("presentation"), "Should show old layer");
4525        assert!(text.contains("business"), "Should show new layer");
4526        assert!(
4527            text.contains("src/new_service/"),
4528            "Should mention added directory"
4529        );
4530    }
4531
4532    #[test]
4533    fn test_format_diff_text_renders_arch_summary() {
4534        let mut report = DiffReport::new("dir_a/", "dir_b/");
4535        report.identical = false;
4536        report.arch_summary = Some(ArchDiffSummary {
4537            layer_migrations: 2,
4538            directories_added: 1,
4539            directories_removed: 0,
4540            cycles_introduced: 1,
4541            cycles_resolved: 0,
4542            stability_score: 0.75,
4543        });
4544
4545        let text = format_diff_text(&report);
4546        assert!(
4547            text.contains("Architecture Summary"),
4548            "Should have arch summary section"
4549        );
4550        assert!(text.contains("0.75"), "Should show stability score");
4551    }
4552
4553    #[test]
4554    fn test_format_diff_text_identical_skips_higher_levels() {
4555        // When identical, format_diff_text returns early, so even if higher-level
4556        // fields were somehow set, they should not appear.
4557        let mut report = DiffReport::new("a.py", "b.py");
4558        report.identical = true;
4559        report.file_changes = Some(vec![FileLevelChange {
4560            relative_path: "should_not_appear.py".to_string(),
4561            change_type: ChangeType::Insert,
4562            old_fingerprint: None,
4563            new_fingerprint: Some(1),
4564            signature_changes: None,
4565        }]);
4566
4567        let text = format_diff_text(&report);
4568        assert!(
4569            !text.contains("should_not_appear"),
4570            "Identical report should skip all change sections"
4571        );
4572        assert!(
4573            text.contains("No structural changes"),
4574            "Should show identical message"
4575        );
4576    }
4577}