Skip to main content

tldr_cli/commands/remaining/
diff.rs

1//! Diff command - AST-aware structural diff
2//!
3//! Compares two source files at the AST level, detecting:
4//! - Insert: new function/class/method
5//! - Delete: removed function/class/method
6//! - Update: modified body
7//! - Move: same content, different location
8//! - Rename: same body, different name
9//!
10//! # Example
11//!
12//! ```bash
13//! tldr diff old.py new.py
14//! tldr diff old.py new.py --semantic-only
15//! tldr diff old.py new.py --format text
16//! ```
17
18use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35    ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36    DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37    Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41// =============================================================================
42// Constants
43// =============================================================================
44
45/// Similarity threshold for detecting renames (0.0-1.0)
46const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48// =============================================================================
49// CLI Arguments
50// =============================================================================
51
52/// AST-aware structural diff between two files
53///
54/// Compares two source files at the AST level, detecting structural changes
55/// like inserted, deleted, updated, moved, and renamed functions/classes.
56///
57/// # Example
58///
59/// ```bash
60/// tldr diff old.py new.py
61/// tldr diff old.py new.py --semantic-only
62/// ```
63#[derive(Debug, Args)]
64pub struct DiffArgs {
65    /// First file (or directory for L6/L7/L8) to compare
66    pub file_a: PathBuf,
67
68    /// Second file (or directory for L6/L7/L8) to compare
69    pub file_b: PathBuf,
70
71    /// Diff granularity level
72    #[arg(long, short = 'g', default_value = "function")]
73    pub granularity: DiffGranularity,
74
75    /// Exclude formatting-only changes (comments, whitespace)
76    #[arg(long)]
77    pub semantic_only: bool,
78
79    /// Output file (optional, stdout if not specified)
80    #[arg(long, short = 'O')]
81    pub output: Option<PathBuf>,
82}
83
84// =============================================================================
85// Extracted Function Info
86// =============================================================================
87
88/// Information about an extracted function/class/method
89#[derive(Debug, Clone)]
90struct ExtractedNode {
91    /// Name of the function/class
92    name: String,
93    /// Kind of node
94    kind: NodeKind,
95    /// Line number (1-indexed)
96    line: u32,
97    /// End line number (1-indexed)
98    end_line: u32,
99    /// Column
100    column: u32,
101    /// Full source text (body)
102    body: String,
103    /// Normalized body (whitespace-insensitive)
104    normalized_body: String,
105    /// Parameters (for functions)
106    params: String,
107    /// Whether this is a method (inside a class)
108    is_method: bool,
109}
110
111impl ExtractedNode {
112    fn new(
113        name: impl Into<String>,
114        kind: NodeKind,
115        line: u32,
116        end_line: u32,
117        column: u32,
118        body: impl Into<String>,
119    ) -> Self {
120        let body_str: String = body.into();
121        let normalized = normalize_body(&body_str);
122        Self {
123            name: name.into(),
124            kind,
125            line,
126            end_line,
127            column,
128            body: body_str,
129            normalized_body: normalized,
130            params: String::new(),
131            is_method: false,
132        }
133    }
134
135    fn with_params(mut self, params: impl Into<String>) -> Self {
136        self.params = params.into();
137        self
138    }
139
140    fn with_method_kind(mut self) -> Self {
141        self.is_method = true;
142        if self.kind == NodeKind::Function {
143            self.kind = NodeKind::Method;
144        }
145        self
146    }
147}
148
149/// Normalize body for comparison (remove whitespace variations and comments)
150/// For rename detection, we skip the first line (function/class signature)
151/// and only compare the actual body content.
152fn normalize_body(body: &str) -> String {
153    body.lines()
154        .skip(1) // Skip signature line (def foo(): or class Bar:)
155        .map(|line| {
156            // Strip inline comments (simple approach: truncate at #)
157            let stripped = if let Some(pos) = line.find('#') {
158                // Make sure it's not inside a string
159                // Simple heuristic: if there's a # before any quote, strip it
160                let before_hash = &line[..pos];
161                let single_quotes = before_hash.matches('\'').count();
162                let double_quotes = before_hash.matches('"').count();
163                // If quotes are balanced (even count), it's a real comment
164                if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165                    &line[..pos]
166                } else {
167                    line
168                }
169            } else {
170                line
171            };
172            stripped.trim()
173        })
174        .filter(|line| !line.is_empty())
175        .collect::<Vec<_>>()
176        .join("\n")
177}
178
179// =============================================================================
180// Implementation
181// =============================================================================
182
183impl DiffArgs {
184    /// Run the diff command and return the structured report.
185    ///
186    /// This is the internal workhorse: it dispatches to the appropriate
187    /// algorithm based on `self.granularity` and returns a `DiffReport`
188    /// without any output formatting.
189    pub fn run_to_report(&self) -> Result<DiffReport> {
190        // Validate paths exist
191        if !self.file_a.exists() {
192            return Err(RemainingError::file_not_found(&self.file_a).into());
193        }
194        if !self.file_b.exists() {
195            return Err(RemainingError::file_not_found(&self.file_b).into());
196        }
197
198        match self.granularity {
199            DiffGranularity::File => {
200                // L6: directory-level structural fingerprint diff
201                if !self.file_a.is_dir() || !self.file_b.is_dir() {
202                    bail!("File-level (L6) diff requires directories, not individual files");
203                }
204                run_file_level_diff(&self.file_a, &self.file_b)
205            }
206            DiffGranularity::Module => {
207                // L7: module-level import graph diff
208                if !self.file_a.is_dir() || !self.file_b.is_dir() {
209                    bail!("Module-level (L7) diff requires directories, not individual files");
210                }
211                run_module_level_diff(&self.file_a, &self.file_b)
212            }
213            DiffGranularity::Architecture => {
214                // L8: architecture-level diff
215                if !self.file_a.is_dir() || !self.file_b.is_dir() {
216                    bail!(
217                        "Architecture-level (L8) diff requires directories, not individual files"
218                    );
219                }
220                run_arch_level_diff(&self.file_a, &self.file_b)
221            }
222            DiffGranularity::Class => {
223                // L5: class-level diff (supports both files and directories)
224                if self.file_a.is_dir() && self.file_b.is_dir() {
225                    run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226                } else {
227                    run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228                }
229            }
230            DiffGranularity::Statement => {
231                // L3: statement-level diff (Zhang-Shasha tree edit distance)
232                self.run_statement_level_diff()
233            }
234            DiffGranularity::Token => {
235                // L1: token-level diff using difftastic graph-based algorithm
236                self.run_token_level_diff()
237            }
238            DiffGranularity::Expression => {
239                // L2: expression-level diff (stub -- uses L1 until Phase 6)
240                self.run_expression_level_diff()
241            }
242            _ => {
243                // L4 and below: function-level diff (original behavior)
244                self.run_function_level_diff()
245            }
246        }
247    }
248
249    /// Run the diff command with output formatting.
250    pub fn run(&self, format: OutputFormat) -> Result<()> {
251        let report = self.run_to_report()?;
252
253        // Output
254        match format {
255            OutputFormat::Json => {
256                let json = serde_json::to_string_pretty(&report)?;
257                if let Some(ref output_path) = self.output {
258                    fs::write(output_path, &json)?;
259                } else {
260                    println!("{}", json);
261                }
262            }
263            OutputFormat::Text => {
264                let text = format_diff_text(&report);
265                if let Some(ref output_path) = self.output {
266                    fs::write(output_path, &text)?;
267                } else {
268                    println!("{}", text);
269                }
270            }
271            OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272                // Other formats not supported for diff, fall back to JSON
273                let json = serde_json::to_string_pretty(&report)?;
274                println!("{}", json);
275            }
276        }
277
278        Ok(())
279    }
280
281    /// Original L4 function-level diff implementation.
282    fn run_function_level_diff(&self) -> Result<DiffReport> {
283        // Detect language from file_a extension
284        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285            let ext = self
286                .file_a
287                .extension()
288                .map(|e| e.to_string_lossy().to_string())
289                .unwrap_or_else(|| "unknown".to_string());
290            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291        })?;
292
293        // Read file contents
294        let source_a = fs::read_to_string(&self.file_a)?;
295        let source_b = fs::read_to_string(&self.file_b)?;
296
297        // Parse both files using language-aware parser
298        let pool = ParserPool::new();
299        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301        })?;
302        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304        })?;
305
306        // Extract nodes from both files
307        let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308        let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310        // Detect changes
311        let changes = detect_changes(
312            &nodes_a,
313            &nodes_b,
314            &self.file_a,
315            &self.file_b,
316            self.semantic_only,
317        );
318
319        // Build summary
320        let mut summary = DiffSummary::default();
321        for change in &changes {
322            summary.total_changes += 1;
323            if change.change_type != ChangeType::Format {
324                summary.semantic_changes += 1;
325            }
326            match change.change_type {
327                ChangeType::Insert => summary.inserts += 1,
328                ChangeType::Delete => summary.deletes += 1,
329                ChangeType::Update => summary.updates += 1,
330                ChangeType::Move => summary.moves += 1,
331                ChangeType::Rename => summary.renames += 1,
332                ChangeType::Format => summary.formats += 1,
333                ChangeType::Extract => summary.extracts += 1,
334                ChangeType::Inline => {}
335            }
336        }
337
338        // Build report
339        let report = DiffReport {
340            file_a: self.file_a.display().to_string(),
341            file_b: self.file_b.display().to_string(),
342            identical: changes.is_empty(),
343            changes,
344            summary: Some(summary),
345            granularity: self.granularity,
346            file_changes: None,
347            module_changes: None,
348            import_graph_summary: None,
349            arch_changes: None,
350            arch_summary: None,
351        };
352
353        Ok(report)
354    }
355
356    /// L1 Token-level diff using difftastic's graph-based algorithm.
357    ///
358    /// Pipeline:
359    /// 1. Read files and detect language
360    /// 2. Parse with tree-sitter
361    /// 3. Convert to difftastic Syntax trees
362    /// 4. Run unchanged marking, Dijkstra graph diff, slider fixup
363    /// 5. Convert ChangeMap to DiffReport via changemap_to_report
364    fn run_token_level_diff(&self) -> Result<DiffReport> {
365        use super::difftastic;
366        use typed_arena::Arena;
367
368        // Detect language from file_a extension
369        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370            let ext = self
371                .file_a
372                .extension()
373                .map(|e| e.to_string_lossy().to_string())
374                .unwrap_or_else(|| "unknown".to_string());
375            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376        })?;
377
378        // Read file contents
379        let lhs_src = fs::read_to_string(&self.file_a)?;
380        let rhs_src = fs::read_to_string(&self.file_b)?;
381
382        // Get language config for difftastic tree-sitter conversion
383        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385        // Parse both files using existing tree-sitter infrastructure
386        let pool = ParserPool::new();
387        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389        })?;
390        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392        })?;
393
394        // Convert tree-sitter trees to difftastic Syntax trees
395        let arena = Arena::new();
396        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398        );
399
400        // Run diff pipeline
401        let mut change_map = difftastic::changes::ChangeMap::default();
402
403        // Phase 1: Mark unchanged nodes (structural matching)
404        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406        // Phase 2: Run Dijkstra graph diff on each changed chunk
407        for (lhs_chunk, rhs_chunk) in &chunks {
408            match (lhs_chunk.first(), rhs_chunk.first()) {
409                (Some(lhs_first), Some(rhs_first)) => {
410                    if difftastic::dijkstra::mark_syntax(
411                        Some(*lhs_first),
412                        Some(*rhs_first),
413                        &mut change_map,
414                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415                    )
416                    .is_err()
417                    {
418                        // Graph limit exceeded -- mark all nodes as Novel
419                        for node in lhs_chunk {
420                            difftastic::changes::insert_deep_novel(node, &mut change_map);
421                        }
422                        for node in rhs_chunk {
423                            difftastic::changes::insert_deep_novel(node, &mut change_map);
424                        }
425                    }
426                }
427                (Some(_), None) => {
428                    // LHS has nodes, RHS is empty -- all LHS nodes are Novel (deleted)
429                    for node in lhs_chunk {
430                        difftastic::changes::insert_deep_novel(node, &mut change_map);
431                    }
432                }
433                (None, Some(_)) => {
434                    // RHS has nodes, LHS is empty -- all RHS nodes are Novel (inserted)
435                    for node in rhs_chunk {
436                        difftastic::changes::insert_deep_novel(node, &mut change_map);
437                    }
438                }
439                (None, None) => {
440                    // Both sides empty -- nothing to do
441                }
442            }
443        }
444
445        // Phase 3: Fix sliders for better alignment
446        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449        // Convert to DiffReport
450        let fa = self.file_a.display().to_string();
451        let fb = self.file_b.display().to_string();
452        Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453            &lhs_nodes,
454            &rhs_nodes,
455            &change_map,
456            &fa,
457            &fb,
458        ))
459    }
460
461    /// L2 Expression-level diff using difftastic with expression grouping.
462    ///
463    /// Same diff pipeline as L1 (unchanged marking, Dijkstra, slider fixup)
464    /// but converts the ChangeMap via `changemap_to_l2_report`, which groups
465    /// token changes under their nearest `Syntax::List` parent.
466    fn run_expression_level_diff(&self) -> Result<DiffReport> {
467        use super::difftastic;
468        use typed_arena::Arena;
469
470        // Detect language from file_a extension
471        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472            let ext = self
473                .file_a
474                .extension()
475                .map(|e| e.to_string_lossy().to_string())
476                .unwrap_or_else(|| "unknown".to_string());
477            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478        })?;
479
480        // Read file contents
481        let lhs_src = fs::read_to_string(&self.file_a)?;
482        let rhs_src = fs::read_to_string(&self.file_b)?;
483
484        // Get language config for difftastic tree-sitter conversion
485        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487        // Parse both files using existing tree-sitter infrastructure
488        let pool = ParserPool::new();
489        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491        })?;
492        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494        })?;
495
496        // Convert tree-sitter trees to difftastic Syntax trees
497        let arena = Arena::new();
498        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500        );
501
502        // Run diff pipeline
503        let mut change_map = difftastic::changes::ChangeMap::default();
504
505        // Phase 1: Mark unchanged nodes (structural matching)
506        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508        // Phase 2: Run Dijkstra graph diff on each changed chunk
509        for (lhs_chunk, rhs_chunk) in &chunks {
510            match (lhs_chunk.first(), rhs_chunk.first()) {
511                (Some(lhs_first), Some(rhs_first)) => {
512                    if difftastic::dijkstra::mark_syntax(
513                        Some(*lhs_first),
514                        Some(*rhs_first),
515                        &mut change_map,
516                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517                    )
518                    .is_err()
519                    {
520                        for node in lhs_chunk {
521                            difftastic::changes::insert_deep_novel(node, &mut change_map);
522                        }
523                        for node in rhs_chunk {
524                            difftastic::changes::insert_deep_novel(node, &mut change_map);
525                        }
526                    }
527                }
528                (Some(_), None) => {
529                    for node in lhs_chunk {
530                        difftastic::changes::insert_deep_novel(node, &mut change_map);
531                    }
532                }
533                (None, Some(_)) => {
534                    for node in rhs_chunk {
535                        difftastic::changes::insert_deep_novel(node, &mut change_map);
536                    }
537                }
538                (None, None) => {}
539            }
540        }
541
542        // Phase 3: Fix sliders for better alignment
543        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546        // Convert to DiffReport using L2 expression grouping
547        let fa = self.file_a.display().to_string();
548        let fb = self.file_b.display().to_string();
549        Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550            &lhs_nodes,
551            &rhs_nodes,
552            &change_map,
553            &fa,
554            &fb,
555        ))
556    }
557}
558
559// =============================================================================
560// Tree-sitter Parsing
561// =============================================================================
562
563/// Get text for a node from source
564fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565    node.utf8_text(source).unwrap_or("")
566}
567
568/// Get the class-like node kinds for each language
569fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570    match language {
571        Language::Python => &["class_definition"],
572        Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573        Language::Go => &["type_declaration"],
574        Language::Rust => &["struct_item", "enum_item", "impl_item"],
575        Language::Java => &[
576            "class_declaration",
577            "interface_declaration",
578            "enum_declaration",
579        ],
580        Language::C => &["struct_specifier", "enum_specifier"],
581        Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582        Language::Ruby => &["class", "module"],
583        Language::Php => &["class_declaration", "interface_declaration"],
584        Language::CSharp => &[
585            "class_declaration",
586            "interface_declaration",
587            "struct_declaration",
588        ],
589        Language::Kotlin => &["class_declaration", "object_declaration"],
590        Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591        Language::Swift => &[
592            "class_declaration",
593            "struct_declaration",
594            "protocol_declaration",
595        ],
596        Language::Elixir => &["call"],         // defmodule is a call
597        Language::Lua | Language::Luau => &[], // Lua has no class syntax
598        Language::Ocaml => &["module_definition", "type_definition"],
599    }
600}
601
602/// Get the node kinds that represent class body containers for method extraction
603fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604    match language {
605        Language::Python => &["block"],
606        Language::TypeScript | Language::JavaScript => &["class_body"],
607        Language::Go => &[], // Go methods are not nested in type declarations
608        Language::Rust => &["declaration_list"], // impl_item body
609        Language::Java => &["class_body"],
610        Language::C | Language::Cpp => &["field_declaration_list"],
611        Language::Ruby => &["body_statement"],
612        Language::Php => &["declaration_list"],
613        Language::CSharp => &["declaration_list"],
614        Language::Kotlin => &["class_body"],
615        Language::Scala => &["template_body"],
616        Language::Swift => &["class_body"],
617        Language::Elixir => &["do_block"],
618        Language::Lua | Language::Luau => &[],
619        Language::Ocaml => &[],
620    }
621}
622
623// =============================================================================
624// Node Extraction
625// =============================================================================
626
627/// Extract all functions, classes, and methods from AST
628fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629    let mut nodes = Vec::new();
630    let kinds = NodeKindSets {
631        func: get_function_node_kinds(lang),
632        class: get_class_node_kinds(lang),
633        body: get_class_body_kinds(lang),
634    };
635    extract_nodes_recursive(root, source, &mut nodes, false, lang, &kinds);
636    nodes
637}
638
639struct NodeKindSets<'a> {
640    func: &'a [&'a str],
641    class: &'a [&'a str],
642    body: &'a [&'a str],
643}
644
645fn extract_nodes_recursive(
646    node: Node,
647    source: &[u8],
648    nodes: &mut Vec<ExtractedNode>,
649    in_class: bool,
650    lang: Language,
651    kinds: &NodeKindSets<'_>,
652) {
653    let kind = node.kind();
654
655    // OCaml-specific: function-kinds are `value_definition` AND
656    // `let_binding`. The tree-sitter shape is:
657    //   value_definition -> let_binding -> pattern: <name>
658    // Plus, `let_binding` ALSO appears nested inside expressions
659    // (`let _ = expr in body`), where it is NOT a function definition.
660    // VAL-018: filter to top-level value_definition only, and require a
661    // parameter (mirrors `extract_ocaml_functions` in
662    // crates/tldr-core/src/ast/extractor.rs:1132). Skip nested
663    // let_bindings inside function bodies and anonymous `_` bindings.
664    if lang == Language::Ocaml && kind == "value_definition" {
665        for child in node.children(&mut node.walk()) {
666            if child.kind() == "let_binding" && ocaml_let_binding_is_function(child) {
667                if let Some(extracted) = extract_function_node(child, source, in_class, lang) {
668                    // Skip anonymous `_` patterns and `()` unit bindings.
669                    if extracted.name != "_" && extracted.name != "()" && !extracted.name.is_empty()
670                    {
671                        nodes.push(extracted);
672                    }
673                }
674            }
675        }
676        // Don't recurse — we've already extracted the function. Inner
677        // let-bindings (e.g. `let _ = helper () in ...`) are body
678        // expressions, not functions.
679        return;
680    }
681    if lang == Language::Ocaml && kind == "let_binding" {
682        // Bare let_binding outside a value_definition: only valid as a
683        // top-level definition without a wrapping value_definition,
684        // which is not the canonical form. Don't extract; recurse normally.
685        // (Tree-sitter usually wraps top-level lets in value_definition.)
686        for child in node.children(&mut node.walk()) {
687            extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
688        }
689        return;
690    }
691
692    // Check if this is a function node
693    if kinds.func.contains(&kind) {
694        if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
695            nodes.push(extracted);
696        }
697    }
698    // Check if this is a class node
699    else if kinds.class.contains(&kind) {
700        if let Some(extracted) = extract_class_node(node, source, lang) {
701            nodes.push(extracted);
702        }
703        // Extract methods inside the class body
704        for child in node.children(&mut node.walk()) {
705            if kinds.body.contains(&child.kind()) {
706                extract_nodes_recursive(child, source, nodes, true, lang, kinds);
707            }
708        }
709        return; // Don't recurse further - we handled the body
710    }
711
712    // Recurse into children
713    for child in node.children(&mut node.walk()) {
714        extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
715    }
716}
717
718/// True if an OCaml `let_binding` node has at least one `parameter`
719/// child — i.e. it's a function definition rather than a value binding.
720/// Mirrors `ocaml_binding_has_params_simple` in
721/// `crates/tldr-core/src/ast/extractor.rs:1158`.
722fn ocaml_let_binding_is_function(node: Node) -> bool {
723    for child in node.children(&mut node.walk()) {
724        if child.kind() == "parameter" {
725            return true;
726        }
727    }
728    false
729}
730
731fn extract_function_node(
732    node: Node,
733    source: &[u8],
734    is_method: bool,
735    lang: Language,
736) -> Option<ExtractedNode> {
737    // Use language-aware name extraction from function_finder
738    let source_str = std::str::from_utf8(source).unwrap_or("");
739    let func_name = get_function_name(node, lang, source_str)?;
740
741    // Try to extract parameters (varies by language but most use "parameters" or "formal_parameters")
742    let params = node
743        .child_by_field_name("parameters")
744        .or_else(|| node.child_by_field_name("formal_parameters"))
745        .map(|p| node_text(p, source).to_string())
746        .unwrap_or_default();
747
748    let line = node.start_position().row as u32 + 1;
749    let end_line = node.end_position().row as u32 + 1;
750    let column = node.start_position().column as u32;
751    let body = node_text(node, source).to_string();
752
753    let mut extracted =
754        ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
755            .with_params(params);
756
757    if is_method {
758        extracted = extracted.with_method_kind();
759    }
760
761    Some(extracted)
762}
763
764fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
765    // Get class name - most languages use "name" field
766    let class_name = node
767        .child_by_field_name("name")
768        .map(|n| node_text(n, source).to_string())
769        .or_else(|| {
770            // Fallback: search for first identifier child
771            let mut cursor = node.walk();
772            for child in node.children(&mut cursor) {
773                if child.kind() == "identifier"
774                    || child.kind() == "type_identifier"
775                    || child.kind() == "constant"
776                {
777                    return Some(node_text(child, source).to_string());
778                }
779            }
780            None
781        })?;
782
783    // Skip empty names
784    if class_name.is_empty() {
785        return None;
786    }
787
788    // For Elixir defmodule, filter to only actual module definitions
789    if lang == Language::Elixir && node.kind() == "call" {
790        let first_child = node.child(0)?;
791        let first_text = node_text(first_child, source);
792        if first_text != "defmodule" {
793            return None;
794        }
795        // Module name is in the arguments
796        if let Some(args) = node.child(1) {
797            let name = node_text(args, source).to_string();
798            if !name.is_empty() {
799                let line = node.start_position().row as u32 + 1;
800                let end_line = node.end_position().row as u32 + 1;
801                let column = node.start_position().column as u32;
802                let body = node_text(node, source).to_string();
803                return Some(ExtractedNode::new(
804                    name,
805                    NodeKind::Class,
806                    line,
807                    end_line,
808                    column,
809                    body,
810                ));
811            }
812        }
813        return None;
814    }
815
816    let line = node.start_position().row as u32 + 1;
817    let end_line = node.end_position().row as u32 + 1;
818    let column = node.start_position().column as u32;
819    let body = node_text(node, source).to_string();
820
821    Some(ExtractedNode::new(
822        class_name,
823        NodeKind::Class,
824        line,
825        end_line,
826        column,
827        body,
828    ))
829}
830
831// =============================================================================
832// Change Detection
833// =============================================================================
834
835/// Detect changes between two sets of nodes
836fn detect_changes(
837    nodes_a: &[ExtractedNode],
838    nodes_b: &[ExtractedNode],
839    file_a: &Path,
840    file_b: &Path,
841    semantic_only: bool,
842) -> Vec<ASTChange> {
843    let mut changes = Vec::new();
844
845    // Build lookup maps by name
846    let _map_a: HashMap<&str, &ExtractedNode> =
847        nodes_a.iter().map(|n| (n.name.as_str(), n)).collect();
848    let map_b: HashMap<&str, &ExtractedNode> =
849        nodes_b.iter().map(|n| (n.name.as_str(), n)).collect();
850
851    // Track which nodes have been matched
852    let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
853    let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
854
855    // First pass: exact name matches
856    for (i, node_a) in nodes_a.iter().enumerate() {
857        let _ = node_a.end_line;
858        if let Some(&node_b) = map_b.get(node_a.name.as_str()) {
859            // Same name exists in both files
860            matched_a[i] = true;
861            if let Some(j) = nodes_b.iter().position(|n| n.name == node_a.name) {
862                matched_b[j] = true;
863            }
864
865            // Check if body changed
866            if node_a.normalized_body != node_b.normalized_body {
867                // It's an update
868                changes.push(ASTChange {
869                    change_type: ChangeType::Update,
870                    node_kind: node_a.kind,
871                    name: Some(node_a.name.clone()),
872                    old_location: Some(Location::with_column(
873                        file_a.display().to_string(),
874                        node_a.line,
875                        node_a.column,
876                    )),
877                    new_location: Some(Location::with_column(
878                        file_b.display().to_string(),
879                        node_b.line,
880                        node_b.column,
881                    )),
882                    old_text: Some(node_a.body.clone()),
883                    new_text: Some(node_b.body.clone()),
884                    similarity: Some(compute_similarity(
885                        &node_a.normalized_body,
886                        &node_b.normalized_body,
887                    )),
888                    children: None,
889                    base_changes: None,
890                });
891            } else if node_a.line != node_b.line && !semantic_only {
892                // Same content but moved - only report if not semantic_only
893                changes.push(ASTChange {
894                    change_type: ChangeType::Move,
895                    node_kind: node_a.kind,
896                    name: Some(node_a.name.clone()),
897                    old_location: Some(Location::with_column(
898                        file_a.display().to_string(),
899                        node_a.line,
900                        node_a.column,
901                    )),
902                    new_location: Some(Location::with_column(
903                        file_b.display().to_string(),
904                        node_b.line,
905                        node_b.column,
906                    )),
907                    old_text: None,
908                    new_text: None,
909                    similarity: Some(1.0),
910                    children: None,
911                    base_changes: None,
912                });
913            }
914        }
915    }
916
917    // Collect unmatched nodes
918    let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
919        .iter()
920        .enumerate()
921        .filter(|(i, _)| !matched_a[*i])
922        .collect();
923    let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
924        .iter()
925        .enumerate()
926        .filter(|(i, _)| !matched_b[*i])
927        .collect();
928
929    // Second pass: detect renames (same body, different name)
930    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
931
932    for (_, node_a) in &unmatched_a {
933        let mut best_match: Option<(usize, f64)> = None;
934
935        for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
936            if used_b[j] {
937                continue;
938            }
939            if node_a.kind != node_b.kind {
940                continue;
941            }
942
943            let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
944            if similarity >= RENAME_SIMILARITY_THRESHOLD
945                && (best_match.is_none() || similarity > best_match.unwrap().1)
946            {
947                best_match = Some((j, similarity));
948            }
949        }
950
951        if let Some((j, similarity)) = best_match {
952            let (_, node_b) = unmatched_b[j];
953            used_b[j] = true;
954
955            // Mark as renamed
956            changes.push(ASTChange {
957                change_type: ChangeType::Rename,
958                node_kind: node_a.kind,
959                name: Some(node_a.name.clone()),
960                old_location: Some(Location::with_column(
961                    file_a.display().to_string(),
962                    node_a.line,
963                    node_a.column,
964                )),
965                new_location: Some(Location::with_column(
966                    file_b.display().to_string(),
967                    node_b.line,
968                    node_b.column,
969                )),
970                old_text: Some(node_a.name.clone()),
971                new_text: Some(node_b.name.clone()),
972                similarity: Some(similarity),
973                children: None,
974                base_changes: None,
975            });
976        }
977    }
978
979    // Remaining unmatched in A are deletes
980    for (_, node_a) in &unmatched_a {
981        // Check if already matched as rename
982        let is_renamed = changes
983            .iter()
984            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
985        if !is_renamed {
986            changes.push(ASTChange {
987                change_type: ChangeType::Delete,
988                node_kind: node_a.kind,
989                name: Some(node_a.name.clone()),
990                old_location: Some(Location::with_column(
991                    file_a.display().to_string(),
992                    node_a.line,
993                    node_a.column,
994                )),
995                new_location: None,
996                old_text: None,
997                new_text: None,
998                similarity: None,
999                children: None,
1000                base_changes: None,
1001            });
1002        }
1003    }
1004
1005    // Remaining unmatched in B are inserts
1006    for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
1007        if !used_b[j] {
1008            changes.push(ASTChange {
1009                change_type: ChangeType::Insert,
1010                node_kind: node_b.kind,
1011                name: Some(node_b.name.clone()),
1012                old_location: None,
1013                new_location: Some(Location::with_column(
1014                    file_b.display().to_string(),
1015                    node_b.line,
1016                    node_b.column,
1017                )),
1018                old_text: None,
1019                new_text: None,
1020                similarity: None,
1021                children: None,
1022                base_changes: None,
1023            });
1024        }
1025    }
1026
1027    // Sort changes: deletes, renames, updates, inserts
1028    changes.sort_by_key(|c| match c.change_type {
1029        ChangeType::Delete => 0,
1030        ChangeType::Rename => 1,
1031        ChangeType::Update => 2,
1032        ChangeType::Move => 3,
1033        ChangeType::Insert => 4,
1034        _ => 5,
1035    });
1036
1037    changes
1038}
1039
1040// =============================================================================
1041// Similarity Computation
1042// =============================================================================
1043
1044/// Compute similarity between two strings using Jaccard on lines,
1045/// with a character-level fallback for short/single-line bodies.
1046fn compute_similarity(a: &str, b: &str) -> f64 {
1047    if a == b {
1048        return 1.0;
1049    }
1050    if a.is_empty() || b.is_empty() {
1051        return 0.0;
1052    }
1053
1054    // Jaccard similarity on lines
1055    let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1056    let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1057
1058    let intersection = lines_a.intersection(&lines_b).count();
1059    let union = lines_a.union(&lines_b).count();
1060
1061    let line_sim = if union == 0 {
1062        0.0
1063    } else {
1064        intersection as f64 / union as f64
1065    };
1066
1067    // For short bodies (few lines), also compute character-level similarity
1068    // to avoid 0.0 when a single line was slightly modified
1069    if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1070        return char_jaccard_similarity(a, b);
1071    }
1072
1073    line_sim
1074}
1075
1076/// Character-level Jaccard similarity (bigrams).
1077fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1078    if a.len() < 2 || b.len() < 2 {
1079        return if a == b { 1.0 } else { 0.0 };
1080    }
1081
1082    let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1083    let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1084
1085    let intersection = bigrams_a.intersection(&bigrams_b).count();
1086    let union = bigrams_a.union(&bigrams_b).count();
1087
1088    if union == 0 {
1089        0.0
1090    } else {
1091        intersection as f64 / union as f64
1092    }
1093}
1094
1095// =============================================================================
1096// Text Formatting
1097// =============================================================================
1098
1099/// Format diff report as human-readable text
1100fn format_diff_text(report: &DiffReport) -> String {
1101    let mut out = String::new();
1102
1103    out.push_str("Diff Report\n");
1104    out.push_str("===========\n\n");
1105    out.push_str(&format!("File A: {}\n", report.file_a));
1106    out.push_str(&format!("File B: {}\n", report.file_b));
1107    out.push_str(&format!("Identical: {}\n\n", report.identical));
1108
1109    if report.identical {
1110        out.push_str("No structural changes detected.\n");
1111        return out;
1112    }
1113
1114    out.push_str("Changes:\n");
1115    out.push_str("--------\n");
1116
1117    for change in &report.changes {
1118        let change_type = match change.change_type {
1119            ChangeType::Insert => "+",
1120            ChangeType::Delete => "-",
1121            ChangeType::Update => "~",
1122            ChangeType::Move => ">",
1123            ChangeType::Rename => "R",
1124            ChangeType::Format => "F",
1125            ChangeType::Extract => "E",
1126            ChangeType::Inline => "I",
1127        };
1128
1129        let kind = match change.node_kind {
1130            NodeKind::Function => "function",
1131            NodeKind::Class => "class",
1132            NodeKind::Method => "method",
1133            NodeKind::Field => "field",
1134            NodeKind::Statement => "statement",
1135            NodeKind::Expression => "expression",
1136            NodeKind::Block => "block",
1137        };
1138
1139        let name = change.name.as_deref().unwrap_or("<unknown>");
1140
1141        match change.change_type {
1142            ChangeType::Insert => {
1143                if let Some(ref loc) = change.new_location {
1144                    out.push_str(&format!(
1145                        "  {} {} {} at {}:{}\n",
1146                        change_type, kind, name, loc.file, loc.line
1147                    ));
1148                }
1149            }
1150            ChangeType::Delete => {
1151                if let Some(ref loc) = change.old_location {
1152                    out.push_str(&format!(
1153                        "  {} {} {} at {}:{}\n",
1154                        change_type, kind, name, loc.file, loc.line
1155                    ));
1156                }
1157            }
1158            ChangeType::Update | ChangeType::Move => {
1159                if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1160                {
1161                    out.push_str(&format!(
1162                        "  {} {} {} from {}:{} to {}:{}\n",
1163                        change_type, kind, name, old.file, old.line, new.file, new.line
1164                    ));
1165                }
1166            }
1167            ChangeType::Rename => {
1168                let old_name = change.old_text.as_deref().unwrap_or(name);
1169                let new_name = change.new_text.as_deref().unwrap_or(name);
1170                out.push_str(&format!(
1171                    "  {} {} {} -> {}\n",
1172                    change_type, kind, old_name, new_name
1173                ));
1174            }
1175            _ => {
1176                out.push_str(&format!("  {} {} {}\n", change_type, kind, name));
1177            }
1178        }
1179    }
1180
1181    if let Some(ref summary) = report.summary {
1182        out.push_str("\nSummary:\n");
1183        out.push_str("--------\n");
1184        out.push_str(&format!("  Total changes: {}\n", summary.total_changes));
1185        out.push_str(&format!(
1186            "  Semantic changes: {}\n",
1187            summary.semantic_changes
1188        ));
1189        out.push_str(&format!("  Inserts: {}\n", summary.inserts));
1190        out.push_str(&format!("  Deletes: {}\n", summary.deletes));
1191        out.push_str(&format!("  Updates: {}\n", summary.updates));
1192        out.push_str(&format!("  Renames: {}\n", summary.renames));
1193        out.push_str(&format!("  Moves: {}\n", summary.moves));
1194    }
1195
1196    // L6: File-level structural changes
1197    if let Some(ref file_changes) = report.file_changes {
1198        out.push_str("\nFile-Level Changes:\n");
1199        out.push_str("-------------------\n");
1200        for fc in file_changes {
1201            let change_type = match fc.change_type {
1202                ChangeType::Insert => "+",
1203                ChangeType::Delete => "-",
1204                ChangeType::Update => "~",
1205                _ => "?",
1206            };
1207            out.push_str(&format!("  {} {}\n", change_type, fc.relative_path));
1208            if let Some(ref sigs) = fc.signature_changes {
1209                for sig in sigs {
1210                    out.push_str(&format!("      changed: {}\n", sig));
1211                }
1212            }
1213        }
1214    }
1215
1216    // L7: Module-level changes
1217    if let Some(ref module_changes) = report.module_changes {
1218        out.push_str("\nModule-Level Changes:\n");
1219        out.push_str("---------------------\n");
1220        for mc in module_changes {
1221            let change_type = match mc.change_type {
1222                ChangeType::Insert => "+",
1223                ChangeType::Delete => "-",
1224                ChangeType::Update => "~",
1225                _ => "?",
1226            };
1227            out.push_str(&format!("  {} {}\n", change_type, mc.module_path));
1228            for edge in &mc.imports_added {
1229                let names = if edge.imported_names.is_empty() {
1230                    String::new()
1231                } else {
1232                    format!(" ({})", edge.imported_names.join(", "))
1233                };
1234                out.push_str(&format!("      + import {}{}\n", edge.target_module, names));
1235            }
1236            for edge in &mc.imports_removed {
1237                let names = if edge.imported_names.is_empty() {
1238                    String::new()
1239                } else {
1240                    format!(" ({})", edge.imported_names.join(", "))
1241                };
1242                out.push_str(&format!("      - import {}{}\n", edge.target_module, names));
1243            }
1244        }
1245    }
1246
1247    // L7: Import graph summary
1248    if let Some(ref igs) = report.import_graph_summary {
1249        out.push_str("\nImport Graph Summary:\n");
1250        out.push_str("---------------------\n");
1251        out.push_str(&format!("  Edges in A: {}\n", igs.total_edges_a));
1252        out.push_str(&format!("  Edges in B: {}\n", igs.total_edges_b));
1253        out.push_str(&format!("  Edges added: {}\n", igs.edges_added));
1254        out.push_str(&format!("  Edges removed: {}\n", igs.edges_removed));
1255        out.push_str(&format!(
1256            "  Modules with import changes: {}\n",
1257            igs.modules_with_import_changes
1258        ));
1259    }
1260
1261    // L8: Architecture-level changes
1262    if let Some(ref arch_changes) = report.arch_changes {
1263        out.push_str("\nArchitecture-Level Changes:\n");
1264        out.push_str("---------------------------\n");
1265        for ac in arch_changes {
1266            let change_label = match ac.change_type {
1267                ArchChangeType::LayerMigration => "migration",
1268                ArchChangeType::Added => "added",
1269                ArchChangeType::Removed => "removed",
1270                ArchChangeType::CompositionChanged => "composition changed",
1271                ArchChangeType::CycleIntroduced => "cycle introduced",
1272                ArchChangeType::CycleResolved => "cycle resolved",
1273            };
1274            out.push_str(&format!("  [{}] {}\n", change_label, ac.directory));
1275            if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1276                out.push_str(&format!("      {} -> {}\n", old, new));
1277            } else if let Some(ref new) = ac.new_layer {
1278                out.push_str(&format!("      -> {}\n", new));
1279            } else if let Some(ref old) = ac.old_layer {
1280                out.push_str(&format!("      {} ->\n", old));
1281            }
1282            if !ac.migrated_functions.is_empty() {
1283                out.push_str(&format!(
1284                    "      migrated: {}\n",
1285                    ac.migrated_functions.join(", ")
1286                ));
1287            }
1288        }
1289    }
1290
1291    // L8: Architecture diff summary
1292    if let Some(ref arch_summary) = report.arch_summary {
1293        out.push_str("\nArchitecture Summary:\n");
1294        out.push_str("---------------------\n");
1295        out.push_str(&format!(
1296            "  Layer migrations: {}\n",
1297            arch_summary.layer_migrations
1298        ));
1299        out.push_str(&format!(
1300            "  Directories added: {}\n",
1301            arch_summary.directories_added
1302        ));
1303        out.push_str(&format!(
1304            "  Directories removed: {}\n",
1305            arch_summary.directories_removed
1306        ));
1307        out.push_str(&format!(
1308            "  Cycles introduced: {}\n",
1309            arch_summary.cycles_introduced
1310        ));
1311        out.push_str(&format!(
1312            "  Cycles resolved: {}\n",
1313            arch_summary.cycles_resolved
1314        ));
1315        out.push_str(&format!(
1316            "  Stability score: {}\n",
1317            arch_summary.stability_score
1318        ));
1319    }
1320
1321    out
1322}
1323
1324// =============================================================================
1325// Statement-Level Diff (L3) - Zhang-Shasha Tree Edit Distance
1326// =============================================================================
1327
1328/// Statement node kinds per language for tree extraction.
1329fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1330    match lang {
1331        Language::Python => &[
1332            "return_statement",
1333            "if_statement",
1334            "for_statement",
1335            "while_statement",
1336            "expression_statement",
1337            "assert_statement",
1338            "raise_statement",
1339            "try_statement",
1340            "with_statement",
1341            "assignment",
1342            "augmented_assignment",
1343            "delete_statement",
1344            "pass_statement",
1345            "break_statement",
1346            "continue_statement",
1347        ],
1348        Language::TypeScript | Language::JavaScript => &[
1349            "return_statement",
1350            "if_statement",
1351            "for_statement",
1352            "for_in_statement",
1353            "while_statement",
1354            "do_statement",
1355            "expression_statement",
1356            "variable_declaration",
1357            "lexical_declaration",
1358            "throw_statement",
1359            "try_statement",
1360            "switch_statement",
1361            "break_statement",
1362            "continue_statement",
1363        ],
1364        Language::Go => &[
1365            "return_statement",
1366            "if_statement",
1367            "for_statement",
1368            "expression_statement",
1369            "short_var_declaration",
1370            "var_declaration",
1371            "assignment_statement",
1372            "go_statement",
1373            "defer_statement",
1374            "select_statement",
1375            "switch_statement",
1376        ],
1377        Language::Rust => &[
1378            "let_declaration",
1379            "expression_statement",
1380            "return_expression",
1381            "if_expression",
1382            "for_expression",
1383            "while_expression",
1384            "loop_expression",
1385            "match_expression",
1386        ],
1387        Language::Java => &[
1388            "return_statement",
1389            "if_statement",
1390            "for_statement",
1391            "enhanced_for_statement",
1392            "while_statement",
1393            "do_statement",
1394            "expression_statement",
1395            "local_variable_declaration",
1396            "throw_statement",
1397            "try_statement",
1398            "switch_expression",
1399        ],
1400        Language::C | Language::Cpp => &[
1401            "return_statement",
1402            "if_statement",
1403            "for_statement",
1404            "while_statement",
1405            "do_statement",
1406            "expression_statement",
1407            "declaration",
1408            "switch_statement",
1409        ],
1410        Language::Ruby => &[
1411            "return",
1412            "if",
1413            "unless",
1414            "for",
1415            "while",
1416            "until",
1417            "assignment",
1418            "call",
1419            "begin",
1420        ],
1421        Language::Php => &[
1422            "return_statement",
1423            "if_statement",
1424            "for_statement",
1425            "foreach_statement",
1426            "while_statement",
1427            "expression_statement",
1428            "echo_statement",
1429            "throw_expression",
1430            "try_statement",
1431        ],
1432        Language::CSharp => &[
1433            "return_statement",
1434            "if_statement",
1435            "for_statement",
1436            "foreach_statement",
1437            "while_statement",
1438            "expression_statement",
1439            "local_declaration_statement",
1440            "throw_statement",
1441            "try_statement",
1442        ],
1443        Language::Kotlin => &[
1444            "property_declaration",
1445            "assignment",
1446            "if_expression",
1447            "for_statement",
1448            "while_statement",
1449            "do_while_statement",
1450            "return_expression",
1451            "throw_expression",
1452            "try_expression",
1453        ],
1454        Language::Scala => &[
1455            "val_definition",
1456            "var_definition",
1457            "if_expression",
1458            "for_expression",
1459            "while_expression",
1460            "return_expression",
1461            "throw_expression",
1462            "try_expression",
1463            "call_expression",
1464        ],
1465        Language::Swift => &[
1466            "value_binding_pattern",
1467            "if_statement",
1468            "for_in_statement",
1469            "while_statement",
1470            "return_statement",
1471            "throw_statement",
1472            "guard_statement",
1473            "switch_statement",
1474        ],
1475        Language::Elixir => &["call", "if", "case", "cond"],
1476        Language::Lua | Language::Luau => &[
1477            "return_statement",
1478            "if_statement",
1479            "for_statement",
1480            "while_statement",
1481            "variable_declaration",
1482            "assignment_statement",
1483            "function_call",
1484        ],
1485        Language::Ocaml => &[
1486            "let_binding",
1487            "if_expression",
1488            "match_expression",
1489            "application",
1490        ],
1491    }
1492}
1493
1494/// A labeled tree node for the Zhang-Shasha tree edit distance algorithm.
1495#[derive(Debug, Clone)]
1496struct LabeledTreeNode {
1497    /// Node label: "node_kind:significant_text"
1498    label: String,
1499    /// Children (ordered)
1500    children: Vec<LabeledTreeNode>,
1501    /// Source line number (1-indexed) for mapping back to locations
1502    line: u32,
1503}
1504
1505/// Flattened node in postorder for Zhang-Shasha.
1506#[derive(Debug, Clone)]
1507struct PostorderNode {
1508    label: String,
1509    line: u32,
1510    /// Index of leftmost leaf descendant in the postorder array
1511    leftmost_leaf: usize,
1512}
1513
1514/// Edit operation from Zhang-Shasha.
1515#[derive(Debug, Clone)]
1516enum EditOp {
1517    /// Delete node from tree A (index in postorder of A)
1518    Delete { index_a: usize },
1519    /// Insert node from tree B (index in postorder of B)
1520    Insert { index_b: usize },
1521    /// Relabel (update) node A[i] -> B[j]
1522    Relabel { index_a: usize, index_b: usize },
1523}
1524
1525/// Build a labeled tree from a tree-sitter function body node.
1526///
1527/// Walks the AST and picks out statement-level nodes, building an ordered
1528/// tree where each statement is a node and nested statements (e.g., inside
1529/// if-bodies) become children.
1530fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1531    let label = build_node_label(node, source);
1532    let line = node.start_position().row as u32 + 1;
1533
1534    let mut children = Vec::new();
1535    let mut cursor = node.walk();
1536    for child in node.children(&mut cursor) {
1537        if statement_kinds.contains(&child.kind()) {
1538            // This child is a statement node - add it and recurse into its body
1539            children.push(build_labeled_tree(child, source, statement_kinds));
1540        } else {
1541            // Not a statement node - look deeper for nested statements
1542            let nested = collect_nested_statements(child, source, statement_kinds);
1543            children.extend(nested);
1544        }
1545    }
1546
1547    LabeledTreeNode {
1548        label,
1549        children,
1550        line,
1551    }
1552}
1553
1554/// Collect statement nodes from non-statement intermediate nodes.
1555fn collect_nested_statements(
1556    node: Node,
1557    source: &[u8],
1558    statement_kinds: &[&str],
1559) -> Vec<LabeledTreeNode> {
1560    let mut result = Vec::new();
1561    let mut cursor = node.walk();
1562    for child in node.children(&mut cursor) {
1563        if statement_kinds.contains(&child.kind()) {
1564            result.push(build_labeled_tree(child, source, statement_kinds));
1565        } else {
1566            result.extend(collect_nested_statements(child, source, statement_kinds));
1567        }
1568    }
1569    result
1570}
1571
1572/// Build a label string for a tree-sitter node.
1573///
1574/// Format: "node_kind:significant_tokens" where significant tokens
1575/// are identifiers and operators (not whitespace or delimiters).
1576fn build_node_label(node: Node, source: &[u8]) -> String {
1577    let kind = node.kind();
1578    let text = node.utf8_text(source).unwrap_or("");
1579
1580    // Extract significant tokens: identifiers, operators, literals
1581    // We take just the first line for conciseness and strip whitespace
1582    let first_line = text.lines().next().unwrap_or("").trim();
1583
1584    // Truncate to avoid huge labels
1585    let significant = if first_line.len() > 120 {
1586        &first_line[..120]
1587    } else {
1588        first_line
1589    };
1590
1591    format!("{}:{}", kind, significant)
1592}
1593
1594/// Extract statement-level subtree from a function body node.
1595///
1596/// Finds the function body (block node) and builds a labeled tree
1597/// from the statements within it.
1598fn extract_statement_tree(
1599    func_node: Node,
1600    source: &[u8],
1601    lang: Language,
1602    statement_kinds: &[&str],
1603) -> LabeledTreeNode {
1604    // Find the function body node
1605    let body_node = find_function_body(func_node, lang);
1606
1607    match body_node {
1608        Some(body) => {
1609            // Build a root node representing the function body
1610            let mut children = Vec::new();
1611            let mut cursor = body.walk();
1612            for child in body.children(&mut cursor) {
1613                if statement_kinds.contains(&child.kind()) {
1614                    children.push(build_labeled_tree(child, source, statement_kinds));
1615                } else {
1616                    children.extend(collect_nested_statements(child, source, statement_kinds));
1617                }
1618            }
1619
1620            LabeledTreeNode {
1621                label: format!("body:{}", func_node.kind()),
1622                children,
1623                line: body.start_position().row as u32 + 1,
1624            }
1625        }
1626        None => {
1627            // Fallback: treat the entire function node as the body
1628            build_labeled_tree(func_node, source, statement_kinds)
1629        }
1630    }
1631}
1632
1633/// Find the body/block node within a function definition.
1634fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1635    // Try common field names
1636    if let Some(body) = func_node.child_by_field_name("body") {
1637        return Some(body);
1638    }
1639    if let Some(body) = func_node.child_by_field_name("block") {
1640        return Some(body);
1641    }
1642
1643    // Language-specific body detection
1644    let body_kinds = match lang {
1645        Language::Python => &["block"][..],
1646        Language::TypeScript | Language::JavaScript => &["statement_block"],
1647        Language::Go => &["block"],
1648        Language::Rust => &["block"],
1649        Language::Java => &["block"],
1650        Language::C | Language::Cpp => &["compound_statement"],
1651        Language::Ruby => &["body_statement"],
1652        Language::Php => &["compound_statement"],
1653        Language::CSharp => &["block"],
1654        Language::Kotlin => &["function_body"],
1655        Language::Scala => &["block", "indented_block"],
1656        Language::Swift => &["function_body"],
1657        Language::Elixir => &["do_block"],
1658        Language::Lua | Language::Luau => &["block"],
1659        Language::Ocaml => &["let_binding"],
1660    };
1661
1662    let mut cursor = func_node.walk();
1663    let found = func_node
1664        .children(&mut cursor)
1665        .find(|&child| body_kinds.contains(&child.kind()));
1666    found
1667}
1668
1669/// Count total nodes in a labeled tree.
1670fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1671    1 + tree.children.iter().map(count_tree_nodes).sum::<usize>()
1672}
1673
1674// =============================================================================
1675// Zhang-Shasha Tree Edit Distance
1676// =============================================================================
1677
1678/// Flatten a labeled tree into postorder traversal, computing leftmost leaf descendants.
1679fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1680    let mut nodes = Vec::new();
1681    flatten_postorder_recursive(tree, &mut nodes);
1682    nodes
1683}
1684
1685fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1686    if tree.children.is_empty() {
1687        // Leaf node: leftmost leaf is itself
1688        let idx = nodes.len();
1689        nodes.push(PostorderNode {
1690            label: tree.label.clone(),
1691            line: tree.line,
1692            leftmost_leaf: idx,
1693        });
1694        return idx;
1695    }
1696
1697    // Process children first (postorder)
1698    let mut first_child_leftmost = usize::MAX;
1699    for (i, child) in tree.children.iter().enumerate() {
1700        let child_leftmost = flatten_postorder_recursive(child, nodes);
1701        if i == 0 {
1702            first_child_leftmost = child_leftmost;
1703        }
1704    }
1705
1706    // Now add this node
1707    nodes.push(PostorderNode {
1708        label: tree.label.clone(),
1709        line: tree.line,
1710        leftmost_leaf: first_child_leftmost,
1711    });
1712
1713    // The leftmost leaf of this node is the leftmost leaf of its first child
1714    first_child_leftmost
1715}
1716
1717/// Compute keyroots from a postorder traversal.
1718///
1719/// A keyroot is a node whose leftmost-leaf is different from its parent's
1720/// leftmost-leaf, OR the root node. In practice, we collect the rightmost
1721/// node at each unique leftmost-leaf value.
1722fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1723    let n = nodes.len();
1724    if n == 0 {
1725        return Vec::new();
1726    }
1727
1728    // For each unique leftmost leaf value, keep the highest index (rightmost occurrence)
1729    let mut lr_map: HashMap<usize, usize> = HashMap::new();
1730    for (i, node) in nodes.iter().enumerate() {
1731        lr_map.insert(node.leftmost_leaf, i);
1732    }
1733
1734    let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1735    keyroots.sort();
1736    keyroots
1737}
1738
1739/// Run the Zhang-Shasha tree edit distance algorithm.
1740///
1741/// Returns the edit operations (edit script).
1742///
1743/// Costs: Delete = 1, Insert = 1, Relabel = 0 (same label) or 1 (different label).
1744fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1745    let na = nodes_a.len();
1746    let nb = nodes_b.len();
1747
1748    if na == 0 && nb == 0 {
1749        return Vec::new();
1750    }
1751    if na == 0 {
1752        // All inserts
1753        return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1754    }
1755    if nb == 0 {
1756        // All deletes
1757        return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1758    }
1759
1760    let keyroots_a = compute_keyroots(nodes_a);
1761    let keyroots_b = compute_keyroots(nodes_b);
1762
1763    // Tree distance matrix (1-indexed, 0 means empty tree)
1764    let mut td = vec![vec![0usize; nb + 1]; na + 1];
1765    // Track operations: 0=relabel/match, 1=delete, 2=insert, 3=tree-match
1766    let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1767
1768    for &kr_a in &keyroots_a {
1769        for &kr_b in &keyroots_b {
1770            let la = nodes_a[kr_a].leftmost_leaf;
1771            let lb = nodes_b[kr_b].leftmost_leaf;
1772
1773            let rows = kr_a - la + 2;
1774            let cols = kr_b - lb + 2;
1775            let mut fd = vec![vec![0usize; cols]; rows];
1776
1777            // Base cases
1778            for i in 1..rows {
1779                fd[i][0] = fd[i - 1][0] + 1;
1780            }
1781            for j in 1..cols {
1782                fd[0][j] = fd[0][j - 1] + 1;
1783            }
1784
1785            for i in 1..rows {
1786                for j in 1..cols {
1787                    let idx_a = la + i - 1;
1788                    let idx_b = lb + j - 1;
1789
1790                    let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1791                        0
1792                    } else {
1793                        1
1794                    };
1795
1796                    if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1797                        let delete = fd[i - 1][j] + 1;
1798                        let insert = fd[i][j - 1] + 1;
1799                        let relabel = fd[i - 1][j - 1] + cost_relabel;
1800
1801                        if relabel <= delete && relabel <= insert {
1802                            fd[i][j] = relabel;
1803                            td[idx_a + 1][idx_b + 1] = relabel;
1804                            td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1805                        } else if delete <= insert {
1806                            fd[i][j] = delete;
1807                            td[idx_a + 1][idx_b + 1] = delete;
1808                            td_ops[idx_a + 1][idx_b + 1] = 1;
1809                        } else {
1810                            fd[i][j] = insert;
1811                            td[idx_a + 1][idx_b + 1] = insert;
1812                            td_ops[idx_a + 1][idx_b + 1] = 2;
1813                        }
1814                    } else {
1815                        let p = nodes_a[idx_a].leftmost_leaf - la;
1816                        let q = nodes_b[idx_b].leftmost_leaf - lb;
1817
1818                        let delete = fd[i - 1][j] + 1;
1819                        let insert = fd[i][j - 1] + 1;
1820                        let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1821
1822                        if tree_match <= delete && tree_match <= insert {
1823                            fd[i][j] = tree_match;
1824                        } else if delete <= insert {
1825                            fd[i][j] = delete;
1826                        } else {
1827                            fd[i][j] = insert;
1828                        }
1829                    }
1830                }
1831            }
1832        }
1833    }
1834
1835    // Extract edit script using sequence alignment on postorder nodes
1836    // guided by the tree distance computation
1837    let mut ops = Vec::new();
1838    derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1839    ops
1840}
1841
1842/// Derive edit operations using DP on the postorder sequences.
1843///
1844/// This produces the edit script by sequence-aligning the postorder
1845/// traversals, which captures the essential edit operations.
1846fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1847    let na = nodes_a.len();
1848    let nb = nodes_b.len();
1849
1850    let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1851    let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1852
1853    for i in 1..=na {
1854        dp[i][0] = i;
1855        choice[i][0] = 1;
1856    }
1857    for j in 1..=nb {
1858        dp[0][j] = j;
1859        choice[0][j] = 2;
1860    }
1861
1862    for i in 1..=na {
1863        for j in 1..=nb {
1864            let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1865                0
1866            } else {
1867                1
1868            };
1869
1870            let del = dp[i - 1][j] + 1;
1871            let ins = dp[i][j - 1] + 1;
1872            let sub = dp[i - 1][j - 1] + cost;
1873
1874            if sub <= del && sub <= ins {
1875                dp[i][j] = sub;
1876                choice[i][j] = if cost == 0 { 0 } else { 3 };
1877            } else if del <= ins {
1878                dp[i][j] = del;
1879                choice[i][j] = 1;
1880            } else {
1881                dp[i][j] = ins;
1882                choice[i][j] = 2;
1883            }
1884        }
1885    }
1886
1887    // Backtrack
1888    let mut i = na;
1889    let mut j = nb;
1890    let mut rev_ops = Vec::new();
1891
1892    while i > 0 || j > 0 {
1893        if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1894            if choice[i][j] == 3 {
1895                rev_ops.push(EditOp::Relabel {
1896                    index_a: i - 1,
1897                    index_b: j - 1,
1898                });
1899            }
1900            i -= 1;
1901            j -= 1;
1902        } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1903            rev_ops.push(EditOp::Delete { index_a: i - 1 });
1904            i -= 1;
1905        } else if j > 0 {
1906            rev_ops.push(EditOp::Insert { index_b: j - 1 });
1907            j -= 1;
1908        }
1909    }
1910
1911    rev_ops.reverse();
1912    ops.extend(rev_ops);
1913}
1914
1915/// Convert Zhang-Shasha edit operations into ASTChange records.
1916fn edit_ops_to_ast_changes(
1917    ops: &[EditOp],
1918    nodes_a: &[PostorderNode],
1919    nodes_b: &[PostorderNode],
1920    file_a: &Path,
1921    file_b: &Path,
1922) -> Vec<ASTChange> {
1923    let mut changes = Vec::new();
1924
1925    for op in ops {
1926        match op {
1927            EditOp::Delete { index_a } => {
1928                let node = &nodes_a[*index_a];
1929                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1930                changes.push(ASTChange {
1931                    change_type: ChangeType::Delete,
1932                    node_kind: NodeKind::Statement,
1933                    name: Some(stmt_kind.to_string()),
1934                    old_location: Some(Location::new(file_a.display().to_string(), node.line)),
1935                    new_location: None,
1936                    old_text: Some(node.label.clone()),
1937                    new_text: None,
1938                    similarity: None,
1939                    children: None,
1940                    base_changes: None,
1941                });
1942            }
1943            EditOp::Insert { index_b } => {
1944                let node = &nodes_b[*index_b];
1945                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1946                changes.push(ASTChange {
1947                    change_type: ChangeType::Insert,
1948                    node_kind: NodeKind::Statement,
1949                    name: Some(stmt_kind.to_string()),
1950                    old_location: None,
1951                    new_location: Some(Location::new(file_b.display().to_string(), node.line)),
1952                    old_text: None,
1953                    new_text: Some(node.label.clone()),
1954                    similarity: None,
1955                    children: None,
1956                    base_changes: None,
1957                });
1958            }
1959            EditOp::Relabel { index_a, index_b } => {
1960                let node_a = &nodes_a[*index_a];
1961                let node_b = &nodes_b[*index_b];
1962                let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
1963                changes.push(ASTChange {
1964                    change_type: ChangeType::Update,
1965                    node_kind: NodeKind::Statement,
1966                    name: Some(stmt_kind.to_string()),
1967                    old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
1968                    new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
1969                    old_text: Some(node_a.label.clone()),
1970                    new_text: Some(node_b.label.clone()),
1971                    similarity: None,
1972                    children: None,
1973                    base_changes: None,
1974                });
1975            }
1976        }
1977    }
1978
1979    changes
1980}
1981
1982/// Maximum number of statements before falling back to L4-style Jaccard.
1983const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
1984
1985impl DiffArgs {
1986    /// L3 Statement-level diff: Zhang-Shasha tree edit distance within matched functions.
1987    ///
1988    /// Algorithm:
1989    /// 1. Parse both files and extract functions (reusing L4 infrastructure)
1990    /// 2. Match functions by name
1991    /// 3. For each matched pair with different bodies:
1992    ///    a. Extract statement subtrees from tree-sitter AST
1993    ///    b. Build labeled trees from statement nodes
1994    ///    c. Run Zhang-Shasha tree edit distance
1995    ///    d. Convert edit script to ASTChange children
1996    /// 4. For unmatched functions: report as function-level Insert/Delete
1997    fn run_statement_level_diff(&self) -> Result<DiffReport> {
1998        // Detect language
1999        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
2000            let ext = self
2001                .file_a
2002                .extension()
2003                .map(|e| e.to_string_lossy().to_string())
2004                .unwrap_or_else(|| "unknown".to_string());
2005            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
2006        })?;
2007
2008        // Read file contents
2009        let source_a = fs::read_to_string(&self.file_a)?;
2010        let source_b = fs::read_to_string(&self.file_b)?;
2011
2012        // Parse both files
2013        let pool = ParserPool::new();
2014        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
2015            RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
2016        })?;
2017        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
2018            RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
2019        })?;
2020
2021        // Extract function nodes (reuse L4 infrastructure)
2022        let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2023        let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2024
2025        let statement_kinds = get_statement_node_kinds(lang);
2026
2027        // Build name lookup maps
2028        let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
2029            .iter()
2030            .enumerate()
2031            .map(|(i, n)| (n.name.as_str(), (i, n)))
2032            .collect();
2033
2034        let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
2035        let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
2036        let mut changes = Vec::new();
2037
2038        // Pass 1: Match functions by name and compute statement-level diffs
2039        for (i, func_a) in funcs_a.iter().enumerate() {
2040            if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2041                matched_a[i] = true;
2042                matched_b[j] = true;
2043
2044                // Check if bodies differ
2045                if func_a.normalized_body != func_b.normalized_body {
2046                    // Find the function nodes in the parsed trees
2047                    let func_node_a =
2048                        find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2049                    let func_node_b =
2050                        find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2051
2052                    let stmt_children = match (func_node_a, func_node_b) {
2053                        (Some(node_a), Some(node_b)) => {
2054                            // Build statement trees
2055                            let tree_a_stmts = extract_statement_tree(
2056                                node_a,
2057                                source_a.as_bytes(),
2058                                lang,
2059                                statement_kinds,
2060                            );
2061                            let tree_b_stmts = extract_statement_tree(
2062                                node_b,
2063                                source_b.as_bytes(),
2064                                lang,
2065                                statement_kinds,
2066                            );
2067
2068                            let count_a = count_tree_nodes(&tree_a_stmts);
2069                            let count_b = count_tree_nodes(&tree_b_stmts);
2070
2071                            // Check fallback threshold
2072                            if count_a > STATEMENT_FALLBACK_THRESHOLD
2073                                || count_b > STATEMENT_FALLBACK_THRESHOLD
2074                            {
2075                                // Fall back to L4-style (no statement children)
2076                                None
2077                            } else {
2078                                // Flatten to postorder and run Zhang-Shasha
2079                                let po_a = flatten_postorder(&tree_a_stmts);
2080                                let po_b = flatten_postorder(&tree_b_stmts);
2081
2082                                let edit_ops = zhang_shasha(&po_a, &po_b);
2083
2084                                if edit_ops.is_empty() {
2085                                    None
2086                                } else {
2087                                    let stmt_changes = edit_ops_to_ast_changes(
2088                                        &edit_ops,
2089                                        &po_a,
2090                                        &po_b,
2091                                        &self.file_a,
2092                                        &self.file_b,
2093                                    );
2094                                    if stmt_changes.is_empty() {
2095                                        None
2096                                    } else {
2097                                        Some(stmt_changes)
2098                                    }
2099                                }
2100                            }
2101                        }
2102                        _ => None,
2103                    };
2104
2105                    changes.push(ASTChange {
2106                        change_type: ChangeType::Update,
2107                        node_kind: func_a.kind,
2108                        name: Some(func_a.name.clone()),
2109                        old_location: Some(Location::with_column(
2110                            self.file_a.display().to_string(),
2111                            func_a.line,
2112                            func_a.column,
2113                        )),
2114                        new_location: Some(Location::with_column(
2115                            self.file_b.display().to_string(),
2116                            func_b.line,
2117                            func_b.column,
2118                        )),
2119                        old_text: Some(func_a.body.clone()),
2120                        new_text: Some(func_b.body.clone()),
2121                        similarity: Some(compute_similarity(
2122                            &func_a.normalized_body,
2123                            &func_b.normalized_body,
2124                        )),
2125                        children: stmt_children,
2126                        base_changes: None,
2127                    });
2128                }
2129            }
2130        }
2131
2132        // Pass 2: Detect renames among unmatched functions
2133        let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2134            .iter()
2135            .enumerate()
2136            .filter(|(i, _)| !matched_a[*i])
2137            .collect();
2138        let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2139            .iter()
2140            .enumerate()
2141            .filter(|(i, _)| !matched_b[*i])
2142            .collect();
2143
2144        let mut used_b = vec![false; unmatched_b.len()];
2145
2146        for (_, func_a) in &unmatched_a {
2147            let mut best_match: Option<(usize, f64)> = None;
2148            for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2149                if used_b[j] || func_a.kind != func_b.kind {
2150                    continue;
2151                }
2152                let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2153                if sim >= RENAME_SIMILARITY_THRESHOLD
2154                    && (best_match.is_none() || sim > best_match.unwrap().1)
2155                {
2156                    best_match = Some((j, sim));
2157                }
2158            }
2159
2160            if let Some((j, sim)) = best_match {
2161                let (_, func_b) = unmatched_b[j];
2162                used_b[j] = true;
2163                changes.push(ASTChange {
2164                    change_type: ChangeType::Rename,
2165                    node_kind: func_a.kind,
2166                    name: Some(func_a.name.clone()),
2167                    old_location: Some(Location::with_column(
2168                        self.file_a.display().to_string(),
2169                        func_a.line,
2170                        func_a.column,
2171                    )),
2172                    new_location: Some(Location::with_column(
2173                        self.file_b.display().to_string(),
2174                        func_b.line,
2175                        func_b.column,
2176                    )),
2177                    old_text: Some(func_a.name.clone()),
2178                    new_text: Some(func_b.name.clone()),
2179                    similarity: Some(sim),
2180                    children: None,
2181                    base_changes: None,
2182                });
2183            }
2184        }
2185
2186        // Pass 3: Remaining unmatched in A are Deletes
2187        for (_, func_a) in &unmatched_a {
2188            let is_renamed = changes.iter().any(|c| {
2189                c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2190            });
2191            if !is_renamed {
2192                changes.push(ASTChange {
2193                    change_type: ChangeType::Delete,
2194                    node_kind: func_a.kind,
2195                    name: Some(func_a.name.clone()),
2196                    old_location: Some(Location::with_column(
2197                        self.file_a.display().to_string(),
2198                        func_a.line,
2199                        func_a.column,
2200                    )),
2201                    new_location: None,
2202                    old_text: None,
2203                    new_text: None,
2204                    similarity: None,
2205                    children: None,
2206                    base_changes: None,
2207                });
2208            }
2209        }
2210
2211        // Pass 4: Remaining unmatched in B are Inserts
2212        for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2213            if !used_b[j] {
2214                changes.push(ASTChange {
2215                    change_type: ChangeType::Insert,
2216                    node_kind: func_b.kind,
2217                    name: Some(func_b.name.clone()),
2218                    old_location: None,
2219                    new_location: Some(Location::with_column(
2220                        self.file_b.display().to_string(),
2221                        func_b.line,
2222                        func_b.column,
2223                    )),
2224                    old_text: None,
2225                    new_text: None,
2226                    similarity: None,
2227                    children: None,
2228                    base_changes: None,
2229                });
2230            }
2231        }
2232
2233        // Build summary
2234        let mut summary = DiffSummary::default();
2235        for change in &changes {
2236            summary.total_changes += 1;
2237            if change.change_type != ChangeType::Format {
2238                summary.semantic_changes += 1;
2239            }
2240            match change.change_type {
2241                ChangeType::Insert => summary.inserts += 1,
2242                ChangeType::Delete => summary.deletes += 1,
2243                ChangeType::Update => summary.updates += 1,
2244                ChangeType::Move => summary.moves += 1,
2245                ChangeType::Rename => summary.renames += 1,
2246                ChangeType::Format => summary.formats += 1,
2247                ChangeType::Extract => summary.extracts += 1,
2248                ChangeType::Inline => {}
2249            }
2250        }
2251
2252        // Sort changes
2253        changes.sort_by_key(|c| match c.change_type {
2254            ChangeType::Delete => 0,
2255            ChangeType::Rename => 1,
2256            ChangeType::Update => 2,
2257            ChangeType::Move => 3,
2258            ChangeType::Insert => 4,
2259            _ => 5,
2260        });
2261
2262        Ok(DiffReport {
2263            file_a: self.file_a.display().to_string(),
2264            file_b: self.file_b.display().to_string(),
2265            identical: changes.is_empty(),
2266            changes,
2267            summary: Some(summary),
2268            granularity: DiffGranularity::Statement,
2269            file_changes: None,
2270            module_changes: None,
2271            import_graph_summary: None,
2272            arch_changes: None,
2273            arch_summary: None,
2274        })
2275    }
2276}
2277
2278/// Find a function tree-sitter node by its start line number.
2279fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2280    let func_kinds = get_function_node_kinds(lang);
2281    find_function_node_recursive(root, target_line, func_kinds)
2282}
2283
2284fn find_function_node_recursive<'a>(
2285    node: Node<'a>,
2286    target_line: u32,
2287    func_kinds: &[&str],
2288) -> Option<Node<'a>> {
2289    let line = node.start_position().row as u32 + 1;
2290
2291    if func_kinds.contains(&node.kind()) && line == target_line {
2292        return Some(node);
2293    }
2294
2295    let mut cursor = node.walk();
2296    for child in node.children(&mut cursor) {
2297        if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2298            return Some(found);
2299        }
2300    }
2301
2302    None
2303}
2304
2305// =============================================================================
2306// Class-Level Diff (L5)
2307// =============================================================================
2308
2309/// Information about a class extracted from AST for class-level diffing.
2310#[derive(Debug, Clone)]
2311struct ClassNode {
2312    /// Class name
2313    name: String,
2314    /// Line number (1-indexed)
2315    line: u32,
2316    /// End line number (1-indexed)
2317    end_line: u32,
2318    /// Column
2319    column: u32,
2320    /// Full source text
2321    body: String,
2322    /// Normalized body for comparison
2323    normalized_body: String,
2324    /// Methods within this class
2325    methods: Vec<ExtractedNode>,
2326    /// Class-level fields (assignments in class body)
2327    fields: Vec<FieldNode>,
2328    /// Base classes
2329    bases: Vec<String>,
2330}
2331
2332/// A class-level field (class variable assignment).
2333#[derive(Debug, Clone)]
2334struct FieldNode {
2335    /// Field name
2336    name: String,
2337    /// Line number
2338    line: u32,
2339    /// Column
2340    column: u32,
2341    /// Full text of the assignment
2342    body: String,
2343    /// Normalized body
2344    normalized_body: String,
2345}
2346
2347/// Run a class-level diff between two files.
2348///
2349/// This is the L5 diff algorithm. It extracts classes from both files,
2350/// matches them by name, and then diffs their members (methods, fields, bases).
2351pub fn run_class_diff(file_a: &Path, file_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2352    // Validate files exist
2353    if !file_a.exists() {
2354        return Err(RemainingError::file_not_found(file_a).into());
2355    }
2356    if !file_b.exists() {
2357        return Err(RemainingError::file_not_found(file_b).into());
2358    }
2359
2360    // Detect language from file_a extension
2361    let lang = Language::from_path(file_a).ok_or_else(|| {
2362        let ext = file_a
2363            .extension()
2364            .map(|e| e.to_string_lossy().to_string())
2365            .unwrap_or_else(|| "unknown".to_string());
2366        RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2367    })?;
2368
2369    // Read file contents
2370    let source_a = fs::read_to_string(file_a)?;
2371    let source_b = fs::read_to_string(file_b)?;
2372
2373    // Parse both files
2374    let pool = ParserPool::new();
2375    let tree_a = pool
2376        .parse(&source_a, lang)
2377        .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2378    let tree_b = pool
2379        .parse(&source_b, lang)
2380        .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2381
2382    // Extract class information from both files
2383    let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2384    let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2385
2386    // Detect class-level changes
2387    let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2388
2389    // Build summary
2390    let mut summary = DiffSummary::default();
2391    for change in &changes {
2392        summary.total_changes += 1;
2393        if change.change_type != ChangeType::Format {
2394            summary.semantic_changes += 1;
2395        }
2396        match change.change_type {
2397            ChangeType::Insert => summary.inserts += 1,
2398            ChangeType::Delete => summary.deletes += 1,
2399            ChangeType::Update => summary.updates += 1,
2400            ChangeType::Move => summary.moves += 1,
2401            ChangeType::Rename => summary.renames += 1,
2402            ChangeType::Format => summary.formats += 1,
2403            ChangeType::Extract => summary.extracts += 1,
2404            ChangeType::Inline => {}
2405        }
2406    }
2407
2408    let report = DiffReport {
2409        file_a: file_a.display().to_string(),
2410        file_b: file_b.display().to_string(),
2411        identical: changes.is_empty(),
2412        changes,
2413        summary: Some(summary),
2414        granularity: DiffGranularity::Class,
2415        file_changes: None,
2416        module_changes: None,
2417        import_graph_summary: None,
2418        arch_changes: None,
2419        arch_summary: None,
2420    };
2421
2422    Ok(report)
2423}
2424
2425/// Run class-level diff across two directories, pairing files by relative path.
2426/// Skips files with unsupported language extensions.
2427fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2428    let files_a = collect_source_files(dir_a)?;
2429    let files_b = collect_source_files(dir_b)?;
2430
2431    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2432    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2433
2434    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2435
2436    let mut all_changes = Vec::new();
2437
2438    for rel_path in all_paths {
2439        match (map_a.get(rel_path), map_b.get(rel_path)) {
2440            (Some(path_a), Some(path_b)) => {
2441                // File exists in both -- run class diff, skip on language error
2442                match run_class_diff(path_a, path_b, semantic_only) {
2443                    Ok(sub_report) => all_changes.extend(sub_report.changes),
2444                    Err(_) => continue, // unsupported language, skip
2445                }
2446            }
2447            (None, Some(_)) | (Some(_), None) => {
2448                // Added or removed file -- skip at class level (L6 handles file-level adds/removes)
2449                continue;
2450            }
2451            (None, None) => unreachable!(),
2452        }
2453    }
2454
2455    let mut summary = DiffSummary::default();
2456    for change in &all_changes {
2457        summary.total_changes += 1;
2458        if change.change_type != ChangeType::Format {
2459            summary.semantic_changes += 1;
2460        }
2461        match change.change_type {
2462            ChangeType::Insert => summary.inserts += 1,
2463            ChangeType::Delete => summary.deletes += 1,
2464            ChangeType::Update => summary.updates += 1,
2465            ChangeType::Move => summary.moves += 1,
2466            ChangeType::Rename => summary.renames += 1,
2467            ChangeType::Format => summary.formats += 1,
2468            ChangeType::Extract => summary.extracts += 1,
2469            ChangeType::Inline => {}
2470        }
2471    }
2472
2473    Ok(DiffReport {
2474        file_a: dir_a.display().to_string(),
2475        file_b: dir_b.display().to_string(),
2476        identical: all_changes.is_empty(),
2477        changes: all_changes,
2478        summary: Some(summary),
2479        granularity: DiffGranularity::Class,
2480        file_changes: None,
2481        module_changes: None,
2482        import_graph_summary: None,
2483        arch_changes: None,
2484        arch_summary: None,
2485    })
2486}
2487
2488/// Extract class nodes with their members from the AST.
2489fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2490    let mut classes = Vec::new();
2491    let class_kinds = get_class_node_kinds(lang);
2492    let func_kinds = get_function_node_kinds(lang);
2493    let body_kinds = get_class_body_kinds(lang);
2494
2495    extract_class_nodes_recursive(
2496        root,
2497        source,
2498        &mut classes,
2499        lang,
2500        func_kinds,
2501        class_kinds,
2502        body_kinds,
2503    );
2504
2505    // Go: methods are declared at file level with receiver syntax, not inside the struct.
2506    // Scan root-level method_declaration nodes and associate them with their struct.
2507    if lang == Language::Go {
2508        associate_go_receiver_methods(root, source, lang, &mut classes);
2509    }
2510
2511    classes
2512}
2513
2514/// For Go, scan file-level `method_declaration` nodes, parse the receiver type,
2515/// and associate each method with the matching struct's ClassNode.
2516fn associate_go_receiver_methods(
2517    root: Node,
2518    source: &[u8],
2519    lang: Language,
2520    classes: &mut [ClassNode],
2521) {
2522    let source_str = std::str::from_utf8(source).unwrap_or("");
2523    let mut cursor = root.walk();
2524    for child in root.children(&mut cursor) {
2525        if child.kind() != "method_declaration" {
2526            continue;
2527        }
2528        // Extract receiver type name
2529        let receiver_type = match extract_go_receiver_type(child, source) {
2530            Some(name) => name,
2531            None => continue,
2532        };
2533
2534        // Extract method name and build an ExtractedNode
2535        let method_name = match get_function_name(child, lang, source_str) {
2536            Some(name) => name,
2537            None => continue,
2538        };
2539
2540        let params = child
2541            .child_by_field_name("parameters")
2542            .map(|p| node_text(p, source).to_string())
2543            .unwrap_or_default();
2544
2545        let line = child.start_position().row as u32 + 1;
2546        let end_line = child.end_position().row as u32 + 1;
2547        let column = child.start_position().column as u32;
2548        let body = node_text(child, source).to_string();
2549
2550        let extracted =
2551            ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2552                .with_params(params)
2553                .with_method_kind();
2554
2555        // Associate with matching struct
2556        for class in classes.iter_mut() {
2557            if class.name == receiver_type {
2558                class.methods.push(extracted);
2559                break;
2560            }
2561        }
2562    }
2563}
2564
2565/// Extract the receiver type name from a Go method_declaration node.
2566///
2567/// Handles both pointer receivers `(f *Foo)` and value receivers `(f Foo)`.
2568/// Returns the bare type name (e.g., "Foo") without the pointer `*`.
2569fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2570    // method_declaration -> receiver: parameter_list -> parameter_declaration -> type
2571    let receiver = method_node.child_by_field_name("receiver")?;
2572    let mut recv_cursor = receiver.walk();
2573    for recv_child in receiver.children(&mut recv_cursor) {
2574        if recv_child.kind() == "parameter_declaration" {
2575            if let Some(type_node) = recv_child.child_by_field_name("type") {
2576                return extract_go_type_identifier(type_node, source);
2577            }
2578        }
2579    }
2580    None
2581}
2582
2583/// Recursively extract the type_identifier from a Go type node,
2584/// handling pointer_type wrappers.
2585fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2586    match type_node.kind() {
2587        "type_identifier" => Some(node_text(type_node, source).to_string()),
2588        "pointer_type" => {
2589            // pointer_type has a single named child which is the underlying type
2590            let mut cursor = type_node.walk();
2591            for child in type_node.children(&mut cursor) {
2592                if child.is_named() {
2593                    return extract_go_type_identifier(child, source);
2594                }
2595            }
2596            None
2597        }
2598        _ => None,
2599    }
2600}
2601
2602fn extract_class_nodes_recursive(
2603    node: Node,
2604    source: &[u8],
2605    classes: &mut Vec<ClassNode>,
2606    lang: Language,
2607    func_kinds: &[&str],
2608    class_kinds: &[&str],
2609    body_kinds: &[&str],
2610) {
2611    let kind = node.kind();
2612
2613    if class_kinds.contains(&kind) {
2614        if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2615            classes.push(class_node);
2616        }
2617        return; // Don't recurse into class children for nested classes at this level
2618    }
2619
2620    for child in node.children(&mut node.walk()) {
2621        extract_class_nodes_recursive(
2622            child,
2623            source,
2624            classes,
2625            lang,
2626            func_kinds,
2627            class_kinds,
2628            body_kinds,
2629        );
2630    }
2631}
2632
2633/// Build a ClassNode from a tree-sitter class node.
2634fn build_class_node(
2635    node: Node,
2636    source: &[u8],
2637    lang: Language,
2638    func_kinds: &[&str],
2639    body_kinds: &[&str],
2640) -> Option<ClassNode> {
2641    // Get class name
2642    let class_name = node
2643        .child_by_field_name("name")
2644        .map(|n| node_text(n, source).to_string())
2645        .or_else(|| {
2646            // Go: type_declaration has no "name" field; the name is in
2647            // the child type_spec node's "name" field.
2648            if lang == Language::Go && node.kind() == "type_declaration" {
2649                let mut cursor = node.walk();
2650                for child in node.children(&mut cursor) {
2651                    if child.kind() == "type_spec" {
2652                        if let Some(name_node) = child.child_by_field_name("name") {
2653                            return Some(node_text(name_node, source).to_string());
2654                        }
2655                    }
2656                }
2657            }
2658            // Fallback: search for first identifier child
2659            let mut cursor = node.walk();
2660            for child in node.children(&mut cursor) {
2661                if child.kind() == "identifier"
2662                    || child.kind() == "type_identifier"
2663                    || child.kind() == "constant"
2664                {
2665                    return Some(node_text(child, source).to_string());
2666                }
2667            }
2668            None
2669        })?;
2670
2671    if class_name.is_empty() {
2672        return None;
2673    }
2674
2675    let line = node.start_position().row as u32 + 1;
2676    let end_line = node.end_position().row as u32 + 1;
2677    let column = node.start_position().column as u32;
2678    let body = node_text(node, source).to_string();
2679    let normalized_body = normalize_body(&body);
2680
2681    // Extract base classes
2682    let bases = extract_bases(node, source, lang);
2683
2684    // Extract methods and fields from class body
2685    let mut methods = Vec::new();
2686    let mut fields = Vec::new();
2687
2688    for child in node.children(&mut node.walk()) {
2689        if body_kinds.contains(&child.kind()) {
2690            extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2691        }
2692    }
2693
2694    Some(ClassNode {
2695        name: class_name,
2696        line,
2697        end_line,
2698        column,
2699        body,
2700        normalized_body,
2701        methods,
2702        fields,
2703        bases,
2704    })
2705}
2706
2707/// Extract base classes from a class definition node.
2708fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2709    let mut bases = Vec::new();
2710
2711    match lang {
2712        Language::Python => {
2713            // Python: class Foo(Base1, Base2):
2714            // Look for argument_list or superclasses
2715            if let Some(superclasses) = node.child_by_field_name("superclasses") {
2716                for child in superclasses.children(&mut superclasses.walk()) {
2717                    let text = node_text(child, source).trim().to_string();
2718                    if !text.is_empty() && text != "(" && text != ")" && text != "," {
2719                        bases.push(text);
2720                    }
2721                }
2722            }
2723        }
2724        _ => {
2725            // For other languages, base extraction would be different
2726            // For now, only Python is fully supported for class-level diff
2727        }
2728    }
2729
2730    bases
2731}
2732
2733/// Extract methods and fields from a class body.
2734fn extract_class_members(
2735    body_node: Node,
2736    source: &[u8],
2737    lang: Language,
2738    func_kinds: &[&str],
2739    methods: &mut Vec<ExtractedNode>,
2740    fields: &mut Vec<FieldNode>,
2741) {
2742    for child in body_node.children(&mut body_node.walk()) {
2743        let kind = child.kind();
2744
2745        // Extract methods
2746        if func_kinds.contains(&kind) {
2747            let source_str = std::str::from_utf8(source).unwrap_or("");
2748            if let Some(func_name) = get_function_name(child, lang, source_str) {
2749                let params = child
2750                    .child_by_field_name("parameters")
2751                    .or_else(|| child.child_by_field_name("formal_parameters"))
2752                    .map(|p| node_text(p, source).to_string())
2753                    .unwrap_or_default();
2754
2755                let line = child.start_position().row as u32 + 1;
2756                let end_line = child.end_position().row as u32 + 1;
2757                let column = child.start_position().column as u32;
2758                let body = node_text(child, source).to_string();
2759
2760                let extracted =
2761                    ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2762                        .with_params(params)
2763                        .with_method_kind();
2764
2765                methods.push(extracted);
2766            }
2767        }
2768        // Extract fields (Python: expression_statement with assignment)
2769        else if kind == "expression_statement" {
2770            if let Some(field) = extract_field_from_statement(child, source, lang) {
2771                fields.push(field);
2772            }
2773        }
2774    }
2775}
2776
2777/// Extract a field from a statement node (e.g., `timeout = 30`).
2778fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2779    // Look for assignment in this expression_statement
2780    for child in node.children(&mut node.walk()) {
2781        if child.kind() == "assignment" {
2782            // Get the left side (field name)
2783            if let Some(left) = child.child_by_field_name("left") {
2784                let name = node_text(left, source).trim().to_string();
2785                if !name.is_empty() && !name.contains('.') {
2786                    // Skip `self.x = ...` (those are instance vars, not class fields)
2787                    let line = node.start_position().row as u32 + 1;
2788                    let column = node.start_position().column as u32;
2789                    let body = node_text(node, source).to_string();
2790                    let normalized_body = body.trim().to_string();
2791
2792                    return Some(FieldNode {
2793                        name,
2794                        line,
2795                        column,
2796                        body,
2797                        normalized_body,
2798                    });
2799                }
2800            }
2801        }
2802    }
2803    None
2804}
2805
2806/// Detect changes between two sets of class nodes.
2807fn detect_class_changes(
2808    classes_a: &[ClassNode],
2809    classes_b: &[ClassNode],
2810    file_a: &Path,
2811    file_b: &Path,
2812    _semantic_only: bool,
2813) -> Vec<ASTChange> {
2814    let mut changes = Vec::new();
2815
2816    // Build lookup maps by name
2817    let map_b: HashMap<&str, &ClassNode> = classes_b.iter().map(|c| (c.name.as_str(), c)).collect();
2818
2819    // Track which classes have been matched
2820    let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2821    let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2822
2823    // First pass: exact name matches
2824    for (i, class_a) in classes_a.iter().enumerate() {
2825        let _ = class_a.end_line;
2826        let _ = &class_a.body;
2827        let _ = &class_a.normalized_body;
2828        if let Some(&class_b) = map_b.get(class_a.name.as_str()) {
2829            matched_a[i] = true;
2830            if let Some(j) = classes_b.iter().position(|c| c.name == class_a.name) {
2831                matched_b[j] = true;
2832            }
2833
2834            // Diff the matched pair
2835            if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2836                changes.push(change);
2837            }
2838        }
2839    }
2840
2841    // Collect unmatched classes
2842    let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2843        .iter()
2844        .enumerate()
2845        .filter(|(i, _)| !matched_a[*i])
2846        .collect();
2847    let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2848        .iter()
2849        .enumerate()
2850        .filter(|(i, _)| !matched_b[*i])
2851        .collect();
2852
2853    // Second pass: detect renames (same member signatures, different name)
2854    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2855
2856    for (_, class_a) in &unmatched_a {
2857        let mut best_match: Option<(usize, f64)> = None;
2858
2859        for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2860            if used_b[j] {
2861                continue;
2862            }
2863
2864            let similarity = compute_class_similarity(class_a, class_b);
2865            if similarity >= RENAME_SIMILARITY_THRESHOLD
2866                && (best_match.is_none() || similarity > best_match.unwrap().1)
2867            {
2868                best_match = Some((j, similarity));
2869            }
2870        }
2871
2872        if let Some((j, similarity)) = best_match {
2873            let (_, class_b) = unmatched_b[j];
2874            used_b[j] = true;
2875
2876            changes.push(ASTChange {
2877                change_type: ChangeType::Rename,
2878                node_kind: NodeKind::Class,
2879                name: Some(class_a.name.clone()),
2880                old_location: Some(Location::with_column(
2881                    file_a.display().to_string(),
2882                    class_a.line,
2883                    class_a.column,
2884                )),
2885                new_location: Some(Location::with_column(
2886                    file_b.display().to_string(),
2887                    class_b.line,
2888                    class_b.column,
2889                )),
2890                old_text: Some(class_a.name.clone()),
2891                new_text: Some(class_b.name.clone()),
2892                similarity: Some(similarity),
2893                children: None,
2894                base_changes: None,
2895            });
2896        }
2897    }
2898
2899    // Remaining unmatched in A are deletes
2900    for (_, class_a) in &unmatched_a {
2901        let is_renamed = changes
2902            .iter()
2903            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
2904        if !is_renamed {
2905            changes.push(ASTChange {
2906                change_type: ChangeType::Delete,
2907                node_kind: NodeKind::Class,
2908                name: Some(class_a.name.clone()),
2909                old_location: Some(Location::with_column(
2910                    file_a.display().to_string(),
2911                    class_a.line,
2912                    class_a.column,
2913                )),
2914                new_location: None,
2915                old_text: None,
2916                new_text: None,
2917                similarity: None,
2918                children: None,
2919                base_changes: None,
2920            });
2921        }
2922    }
2923
2924    // Remaining unmatched in B are inserts
2925    for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2926        if !used_b[j] {
2927            changes.push(ASTChange {
2928                change_type: ChangeType::Insert,
2929                node_kind: NodeKind::Class,
2930                name: Some(class_b.name.clone()),
2931                old_location: None,
2932                new_location: Some(Location::with_column(
2933                    file_b.display().to_string(),
2934                    class_b.line,
2935                    class_b.column,
2936                )),
2937                old_text: None,
2938                new_text: None,
2939                similarity: None,
2940                children: None,
2941                base_changes: None,
2942            });
2943        }
2944    }
2945
2946    // Sort changes: deletes first, then renames, updates, inserts
2947    changes.sort_by_key(|c| match c.change_type {
2948        ChangeType::Delete => 0,
2949        ChangeType::Rename => 1,
2950        ChangeType::Update => 2,
2951        ChangeType::Move => 3,
2952        ChangeType::Insert => 4,
2953        _ => 5,
2954    });
2955
2956    changes
2957}
2958
2959/// Diff two matched classes and produce an ASTChange if they differ.
2960fn diff_class_pair(
2961    class_a: &ClassNode,
2962    class_b: &ClassNode,
2963    file_a: &Path,
2964    file_b: &Path,
2965) -> Option<ASTChange> {
2966    let mut children = Vec::new();
2967    let mut has_changes = false;
2968
2969    // 1. Diff methods
2970    diff_methods(
2971        &class_a.methods,
2972        &class_b.methods,
2973        file_a,
2974        file_b,
2975        &mut children,
2976    );
2977
2978    // 2. Diff fields
2979    diff_fields(
2980        &class_a.fields,
2981        &class_b.fields,
2982        file_a,
2983        file_b,
2984        &mut children,
2985    );
2986
2987    // 3. Diff base classes
2988    let base_changes = diff_bases(&class_a.bases, &class_b.bases);
2989
2990    if !children.is_empty() {
2991        has_changes = true;
2992    }
2993    if base_changes.is_some() {
2994        has_changes = true;
2995    }
2996
2997    if !has_changes {
2998        return None; // Classes are identical
2999    }
3000
3001    Some(ASTChange {
3002        change_type: ChangeType::Update,
3003        node_kind: NodeKind::Class,
3004        name: Some(class_a.name.clone()),
3005        old_location: Some(Location::with_column(
3006            file_a.display().to_string(),
3007            class_a.line,
3008            class_a.column,
3009        )),
3010        new_location: Some(Location::with_column(
3011            file_b.display().to_string(),
3012            class_b.line,
3013            class_b.column,
3014        )),
3015        old_text: None,
3016        new_text: None,
3017        similarity: None,
3018        children: if children.is_empty() {
3019            None
3020        } else {
3021            Some(children)
3022        },
3023        base_changes,
3024    })
3025}
3026
3027/// Diff methods between two matched classes.
3028fn diff_methods(
3029    methods_a: &[ExtractedNode],
3030    methods_b: &[ExtractedNode],
3031    file_a: &Path,
3032    file_b: &Path,
3033    children: &mut Vec<ASTChange>,
3034) {
3035    let map_b: HashMap<&str, &ExtractedNode> =
3036        methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3037
3038    let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3039    let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3040
3041    // Exact name match
3042    for (i, method_a) in methods_a.iter().enumerate() {
3043        if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3044            matched_a[i] = true;
3045            if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3046                matched_b[j] = true;
3047            }
3048
3049            // Check if body changed
3050            if method_a.normalized_body != method_b.normalized_body {
3051                children.push(ASTChange {
3052                    change_type: ChangeType::Update,
3053                    node_kind: NodeKind::Method,
3054                    name: Some(method_a.name.clone()),
3055                    old_location: Some(Location::with_column(
3056                        file_a.display().to_string(),
3057                        method_a.line,
3058                        method_a.column,
3059                    )),
3060                    new_location: Some(Location::with_column(
3061                        file_b.display().to_string(),
3062                        method_b.line,
3063                        method_b.column,
3064                    )),
3065                    old_text: None,
3066                    new_text: None,
3067                    similarity: Some(compute_similarity(
3068                        &method_a.normalized_body,
3069                        &method_b.normalized_body,
3070                    )),
3071                    children: None,
3072                    base_changes: None,
3073                });
3074            }
3075        }
3076    }
3077
3078    // Collect unmatched
3079    let unmatched_a: Vec<&ExtractedNode> = methods_a
3080        .iter()
3081        .enumerate()
3082        .filter(|(i, _)| !matched_a[*i])
3083        .map(|(_, m)| m)
3084        .collect();
3085    let unmatched_b: Vec<&ExtractedNode> = methods_b
3086        .iter()
3087        .enumerate()
3088        .filter(|(i, _)| !matched_b[*i])
3089        .map(|(_, m)| m)
3090        .collect();
3091
3092    // Rename detection among unmatched methods
3093    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3094
3095    for method_a in &unmatched_a {
3096        let mut best_match: Option<(usize, f64)> = None;
3097
3098        for (j, method_b) in unmatched_b.iter().enumerate() {
3099            if used_b[j] {
3100                continue;
3101            }
3102            let similarity =
3103                compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3104            if similarity >= RENAME_SIMILARITY_THRESHOLD
3105                && (best_match.is_none() || similarity > best_match.unwrap().1)
3106            {
3107                best_match = Some((j, similarity));
3108            }
3109        }
3110
3111        if let Some((j, similarity)) = best_match {
3112            let method_b = unmatched_b[j];
3113            used_b[j] = true;
3114
3115            children.push(ASTChange {
3116                change_type: ChangeType::Rename,
3117                node_kind: NodeKind::Method,
3118                name: Some(method_a.name.clone()),
3119                old_location: Some(Location::with_column(
3120                    file_a.display().to_string(),
3121                    method_a.line,
3122                    method_a.column,
3123                )),
3124                new_location: Some(Location::with_column(
3125                    file_b.display().to_string(),
3126                    method_b.line,
3127                    method_b.column,
3128                )),
3129                old_text: Some(method_a.name.clone()),
3130                new_text: Some(method_b.name.clone()),
3131                similarity: Some(similarity),
3132                children: None,
3133                base_changes: None,
3134            });
3135        }
3136    }
3137
3138    // Remaining unmatched in A are deletes
3139    for method_a in &unmatched_a {
3140        let is_renamed = children.iter().any(|c| {
3141            c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3142        });
3143        if !is_renamed {
3144            children.push(ASTChange {
3145                change_type: ChangeType::Delete,
3146                node_kind: NodeKind::Method,
3147                name: Some(method_a.name.clone()),
3148                old_location: Some(Location::with_column(
3149                    file_a.display().to_string(),
3150                    method_a.line,
3151                    method_a.column,
3152                )),
3153                new_location: None,
3154                old_text: None,
3155                new_text: None,
3156                similarity: None,
3157                children: None,
3158                base_changes: None,
3159            });
3160        }
3161    }
3162
3163    // Remaining unmatched in B are inserts
3164    for (j, method_b) in unmatched_b.iter().enumerate() {
3165        if !used_b[j] {
3166            children.push(ASTChange {
3167                change_type: ChangeType::Insert,
3168                node_kind: NodeKind::Method,
3169                name: Some(method_b.name.clone()),
3170                old_location: None,
3171                new_location: Some(Location::with_column(
3172                    file_b.display().to_string(),
3173                    method_b.line,
3174                    method_b.column,
3175                )),
3176                old_text: None,
3177                new_text: None,
3178                similarity: None,
3179                children: None,
3180                base_changes: None,
3181            });
3182        }
3183    }
3184}
3185
3186/// Diff fields between two matched classes.
3187fn diff_fields(
3188    fields_a: &[FieldNode],
3189    fields_b: &[FieldNode],
3190    file_a: &Path,
3191    file_b: &Path,
3192    children: &mut Vec<ASTChange>,
3193) {
3194    let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3195
3196    let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3197    let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3198
3199    // Exact name match
3200    for (i, field_a) in fields_a.iter().enumerate() {
3201        if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3202            matched_a[i] = true;
3203            if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3204                matched_b[j] = true;
3205            }
3206
3207            // Check if value changed
3208            if field_a.normalized_body != field_b.normalized_body {
3209                children.push(ASTChange {
3210                    change_type: ChangeType::Update,
3211                    node_kind: NodeKind::Field,
3212                    name: Some(field_a.name.clone()),
3213                    old_location: Some(Location::with_column(
3214                        file_a.display().to_string(),
3215                        field_a.line,
3216                        field_a.column,
3217                    )),
3218                    new_location: Some(Location::with_column(
3219                        file_b.display().to_string(),
3220                        field_b.line,
3221                        field_b.column,
3222                    )),
3223                    old_text: Some(field_a.body.trim().to_string()),
3224                    new_text: Some(field_b.body.trim().to_string()),
3225                    similarity: None,
3226                    children: None,
3227                    base_changes: None,
3228                });
3229            }
3230        }
3231    }
3232
3233    // Remaining unmatched in A are deletes
3234    for (i, field_a) in fields_a.iter().enumerate() {
3235        if !matched_a[i] {
3236            children.push(ASTChange {
3237                change_type: ChangeType::Delete,
3238                node_kind: NodeKind::Field,
3239                name: Some(field_a.name.clone()),
3240                old_location: Some(Location::with_column(
3241                    file_a.display().to_string(),
3242                    field_a.line,
3243                    field_a.column,
3244                )),
3245                new_location: None,
3246                old_text: None,
3247                new_text: None,
3248                similarity: None,
3249                children: None,
3250                base_changes: None,
3251            });
3252        }
3253    }
3254
3255    // Remaining unmatched in B are inserts
3256    for (j, field_b) in fields_b.iter().enumerate() {
3257        if !matched_b[j] {
3258            children.push(ASTChange {
3259                change_type: ChangeType::Insert,
3260                node_kind: NodeKind::Field,
3261                name: Some(field_b.name.clone()),
3262                old_location: None,
3263                new_location: Some(Location::with_column(
3264                    file_b.display().to_string(),
3265                    field_b.line,
3266                    field_b.column,
3267                )),
3268                old_text: None,
3269                new_text: None,
3270                similarity: None,
3271                children: None,
3272                base_changes: None,
3273            });
3274        }
3275    }
3276}
3277
3278/// Diff base classes between two matched classes.
3279fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3280    let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3281    let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3282
3283    let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3284    let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3285
3286    if added.is_empty() && removed.is_empty() {
3287        None
3288    } else {
3289        Some(BaseChanges { added, removed })
3290    }
3291}
3292
3293/// Compute similarity between two classes based on their member signatures.
3294fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3295    // Collect method names + normalized bodies
3296    let method_sigs_a: std::collections::HashSet<String> = class_a
3297        .methods
3298        .iter()
3299        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3300        .collect();
3301    let method_sigs_b: std::collections::HashSet<String> = class_b
3302        .methods
3303        .iter()
3304        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3305        .collect();
3306
3307    let field_sigs_a: std::collections::HashSet<String> = class_a
3308        .fields
3309        .iter()
3310        .map(|f| f.normalized_body.clone())
3311        .collect();
3312    let field_sigs_b: std::collections::HashSet<String> = class_b
3313        .fields
3314        .iter()
3315        .map(|f| f.normalized_body.clone())
3316        .collect();
3317
3318    // Combined Jaccard similarity
3319    let all_a: std::collections::HashSet<&String> =
3320        method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3321    let all_b: std::collections::HashSet<&String> =
3322        method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3323
3324    if all_a.is_empty() && all_b.is_empty() {
3325        // Both empty classes - consider identical
3326        return 1.0;
3327    }
3328
3329    let intersection = all_a.intersection(&all_b).count();
3330    let union = all_a.union(&all_b).count();
3331
3332    if union == 0 {
3333        0.0
3334    } else {
3335        intersection as f64 / union as f64
3336    }
3337}
3338
3339// =============================================================================
3340// L6: File-Level Diff
3341// =============================================================================
3342
3343/// Recognized source file extensions for directory walking.
3344const SOURCE_EXTENSIONS: &[&str] = &[
3345    "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3346    "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3347];
3348
3349/// Walk a directory and collect source files with their relative paths.
3350fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3351    let mut files = Vec::new();
3352    collect_source_files_recursive(root, root, &mut files)?;
3353    files.sort_by(|a, b| a.0.cmp(&b.0));
3354    Ok(files)
3355}
3356
3357fn collect_source_files_recursive(
3358    root: &Path,
3359    current: &Path,
3360    files: &mut Vec<(String, PathBuf)>,
3361) -> Result<()> {
3362    for entry in fs::read_dir(current)? {
3363        let entry = entry?;
3364        let path = entry.path();
3365        if path.is_dir() {
3366            collect_source_files_recursive(root, &path, files)?;
3367        } else if path.is_file() {
3368            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3369                if SOURCE_EXTENSIONS.contains(&ext) {
3370                    let rel = path
3371                        .strip_prefix(root)
3372                        .unwrap_or(&path)
3373                        .to_string_lossy()
3374                        .replace('\\', "/");
3375                    files.push((rel, path));
3376                }
3377            }
3378        }
3379    }
3380    Ok(())
3381}
3382
3383/// Compute a structural fingerprint for a source file.
3384///
3385/// The fingerprint is a hash of the sorted list of function/class signatures
3386/// extracted via tree-sitter. Two files with the same structural definitions
3387/// (regardless of whitespace/comments) produce the same fingerprint.
3388fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3389    let lang = match Language::from_path(path) {
3390        Some(l) => l,
3391        None => {
3392            // Fallback: hash the raw content for unsupported languages
3393            let content = fs::read_to_string(path)?;
3394            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3395            content.hash(&mut hasher);
3396            return Ok((hasher.finish(), vec![]));
3397        }
3398    };
3399
3400    let source = fs::read_to_string(path)?;
3401    let pool = ParserPool::new();
3402    let tree = match pool.parse(&source, lang) {
3403        Ok(t) => t,
3404        Err(_) => {
3405            // Parse failure: hash raw content
3406            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3407            source.hash(&mut hasher);
3408            return Ok((hasher.finish(), vec![]));
3409        }
3410    };
3411
3412    let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3413
3414    // Build sorted list of signatures: "kind:name(params)|body_hash"
3415    // We include a hash of the normalized body so that body-only changes
3416    // (same name/params but different implementation) alter the fingerprint.
3417    let mut signatures: Vec<String> = nodes
3418        .iter()
3419        .map(|n| {
3420            let kind = match n.kind {
3421                NodeKind::Function => "fn",
3422                NodeKind::Class => "class",
3423                NodeKind::Method => "method",
3424                NodeKind::Field => "field",
3425                _ => "other",
3426            };
3427            let sig = if n.params.is_empty() {
3428                format!("{}:{}", kind, n.name)
3429            } else {
3430                format!("{}:{}({})", kind, n.name, n.params)
3431            };
3432            // Append a body hash so body-only changes are detected
3433            let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3434            n.normalized_body.hash(&mut body_hasher);
3435            format!("{}|{}", sig, body_hasher.finish())
3436        })
3437        .collect();
3438    signatures.sort();
3439
3440    let mut hasher = std::collections::hash_map::DefaultHasher::new();
3441    for sig in &signatures {
3442        sig.hash(&mut hasher);
3443    }
3444    let fingerprint = hasher.finish();
3445
3446    Ok((fingerprint, signatures))
3447}
3448
3449/// Run L6 file-level diff between two directories.
3450fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3451    let files_a = collect_source_files(dir_a)?;
3452    let files_b = collect_source_files(dir_b)?;
3453
3454    // Build maps: relative_path -> full_path
3455    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3456    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3457
3458    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3459
3460    let mut file_changes = Vec::new();
3461    let mut has_any_change = false;
3462
3463    for rel_path in all_paths {
3464        match (map_a.get(rel_path), map_b.get(rel_path)) {
3465            (Some(path_a), Some(path_b)) => {
3466                // File exists in both directories
3467                let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3468                let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3469
3470                if fp_a == fp_b {
3471                    // Identical structure - skip or include as no-change
3472                    // (tests filter these out anyway)
3473                } else {
3474                    has_any_change = true;
3475                    // Find which signatures differ
3476                    let set_a: HashSet<&String> = sigs_a.iter().collect();
3477                    let set_b: HashSet<&String> = sigs_b.iter().collect();
3478                    let changed: Vec<String> = set_a
3479                        .symmetric_difference(&set_b)
3480                        .map(|s| (*s).clone())
3481                        .collect();
3482
3483                    file_changes.push(FileLevelChange {
3484                        relative_path: rel_path.to_string(),
3485                        change_type: ChangeType::Update,
3486                        old_fingerprint: Some(fp_a),
3487                        new_fingerprint: Some(fp_b),
3488                        signature_changes: if changed.is_empty() {
3489                            None
3490                        } else {
3491                            Some(changed)
3492                        },
3493                    });
3494                }
3495            }
3496            (None, Some(path_b)) => {
3497                // Added file
3498                has_any_change = true;
3499                let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3500                file_changes.push(FileLevelChange {
3501                    relative_path: rel_path.to_string(),
3502                    change_type: ChangeType::Insert,
3503                    old_fingerprint: None,
3504                    new_fingerprint: Some(fp_b),
3505                    signature_changes: None,
3506                });
3507            }
3508            (Some(path_a), None) => {
3509                // Removed file
3510                has_any_change = true;
3511                let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3512                file_changes.push(FileLevelChange {
3513                    relative_path: rel_path.to_string(),
3514                    change_type: ChangeType::Delete,
3515                    old_fingerprint: Some(fp_a),
3516                    new_fingerprint: None,
3517                    signature_changes: None,
3518                });
3519            }
3520            (None, None) => unreachable!(),
3521        }
3522    }
3523
3524    Ok(DiffReport {
3525        file_a: dir_a.display().to_string(),
3526        file_b: dir_b.display().to_string(),
3527        identical: !has_any_change,
3528        changes: Vec::new(),
3529        summary: None,
3530        granularity: DiffGranularity::File,
3531        file_changes: Some(file_changes),
3532        module_changes: None,
3533        import_graph_summary: None,
3534        arch_changes: None,
3535        arch_summary: None,
3536    })
3537}
3538
3539// =============================================================================
3540// L7: Module-Level Diff
3541// =============================================================================
3542
3543/// An import edge used internally during graph building.
3544#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3545struct InternalImportEdge {
3546    source_file: String,
3547    target_module: String,
3548    imported_names: Vec<String>,
3549}
3550
3551/// Parse Python import statements from a file using regex.
3552///
3553/// Recognizes:
3554/// - `from X import Y, Z`
3555/// - `import X`
3556fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3557    let mut edges = Vec::new();
3558
3559    // Match "from X import Y, Z"
3560    let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3561    for cap in from_re.captures_iter(source) {
3562        let target = cap[1].to_string();
3563        let names_str = &cap[2];
3564        let names: Vec<String> = names_str
3565            .split(',')
3566            .map(|n| n.trim().to_string())
3567            .filter(|n| !n.is_empty())
3568            .collect();
3569        edges.push(InternalImportEdge {
3570            source_file: relative_path.to_string(),
3571            target_module: target,
3572            imported_names: names,
3573        });
3574    }
3575
3576    // Match "import X" (but not "from X import Y" which is already handled)
3577    let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3578    for cap in import_re.captures_iter(source) {
3579        let target = cap[1].to_string();
3580        edges.push(InternalImportEdge {
3581            source_file: relative_path.to_string(),
3582            target_module: target,
3583            imported_names: vec![],
3584        });
3585    }
3586
3587    edges
3588}
3589
3590/// Parse imports for a single file using CallGraphLanguageSupport.
3591///
3592/// Returns `Some(edges)` if a handler could parse the file, `None` otherwise.
3593/// On handler parse failure for Python files, falls back to regex parsing.
3594fn parse_file_imports(
3595    registry: &LanguageRegistry,
3596    source: &str,
3597    full_path: &Path,
3598    rel_path: &str,
3599) -> Vec<InternalImportEdge> {
3600    let ext = match full_path.extension().and_then(|e| e.to_str()) {
3601        Some(e) => format!(".{}", e),
3602        None => return Vec::new(),
3603    };
3604
3605    let is_python = ext == ".py" || ext == ".pyi";
3606
3607    // Try the language handler from the registry
3608    if let Some(handler) = registry.get_by_extension(&ext) {
3609        if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3610            return import_defs
3611                .into_iter()
3612                .map(|def| InternalImportEdge {
3613                    source_file: rel_path.to_string(),
3614                    target_module: def.module,
3615                    imported_names: def.names,
3616                })
3617                .collect();
3618        }
3619    }
3620
3621    // Fallback: regex-based parsing for Python files only
3622    if is_python {
3623        return parse_python_imports(source, rel_path);
3624    }
3625
3626    Vec::new()
3627}
3628
3629/// Build import graph for all source files in a directory.
3630///
3631/// Uses `CallGraphLanguageSupport::parse_imports()` from tldr-core for
3632/// multi-language support (Python, TypeScript, Go, Rust, Java, C#, etc.).
3633/// Falls back to regex-based `parse_python_imports()` for Python files
3634/// when the core API fails, and skips import parsing for files whose
3635/// language is unsupported or whose handler returns an error.
3636fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3637    let files = collect_source_files(root)?;
3638    let registry = LanguageRegistry::with_defaults();
3639    let mut all_edges = Vec::new();
3640
3641    for (rel_path, full_path) in &files {
3642        let source = fs::read_to_string(full_path)?;
3643        let edges = parse_file_imports(&registry, &source, full_path, rel_path);
3644        all_edges.extend(edges);
3645    }
3646
3647    Ok(all_edges)
3648}
3649
3650/// Convert an internal edge to the public ImportEdge type.
3651fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3652    ImportEdge {
3653        source_file: edge.source_file.clone(),
3654        target_module: edge.target_module.clone(),
3655        imported_names: edge.imported_names.clone(),
3656    }
3657}
3658
3659/// Create a comparable key for an import edge (for set operations).
3660fn edge_key(edge: &InternalImportEdge) -> String {
3661    format!(
3662        "{}->{}:{}",
3663        edge.source_file,
3664        edge.target_module,
3665        edge.imported_names.join(",")
3666    )
3667}
3668
3669/// Run L7 module-level diff between two directories.
3670fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3671    // Build import graphs
3672    let edges_a = build_import_graph(dir_a)?;
3673    let edges_b = build_import_graph(dir_b)?;
3674
3675    // Build edge key sets for comparison
3676    let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3677    let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3678
3679    // Edges added (in B but not in A)
3680    let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3681    let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3682
3683    // Get added/removed edges
3684    let added_edges: Vec<&InternalImportEdge> = edges_b
3685        .iter()
3686        .filter(|e| added_keys.contains(&edge_key(e)))
3687        .collect();
3688    let removed_edges: Vec<&InternalImportEdge> = edges_a
3689        .iter()
3690        .filter(|e| removed_keys.contains(&edge_key(e)))
3691        .collect();
3692
3693    // Also run L6 file-level diff for context
3694    let files_a = collect_source_files(dir_a)?;
3695    let files_b = collect_source_files(dir_b)?;
3696    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3697    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3698    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3699
3700    // Build per-module changes
3701    let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3702    let mut modules_with_import_changes = 0usize;
3703
3704    for rel_path in &all_paths {
3705        let in_a = map_a.contains_key(rel_path);
3706        let in_b = map_b.contains_key(rel_path);
3707
3708        // Determine module change type
3709        let change_type = if !in_a && in_b {
3710            ChangeType::Insert
3711        } else if in_a && !in_b {
3712            ChangeType::Delete
3713        } else {
3714            ChangeType::Update
3715        };
3716
3717        // Gather import changes for this module
3718        let mod_added: Vec<ImportEdge> = added_edges
3719            .iter()
3720            .filter(|e| e.source_file == *rel_path)
3721            .map(|e| to_public_edge(e))
3722            .collect();
3723        let mod_removed: Vec<ImportEdge> = removed_edges
3724            .iter()
3725            .filter(|e| e.source_file == *rel_path)
3726            .map(|e| to_public_edge(e))
3727            .collect();
3728
3729        // Compute file-level change if both exist
3730        let file_change = if in_a && in_b {
3731            let path_a = map_a[rel_path];
3732            let path_b = map_b[rel_path];
3733            let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3734            let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3735            if fp_a != fp_b {
3736                let set_a: HashSet<&String> = sigs_a.iter().collect();
3737                let set_b: HashSet<&String> = sigs_b.iter().collect();
3738                let changed: Vec<String> = set_a
3739                    .symmetric_difference(&set_b)
3740                    .map(|s| (*s).clone())
3741                    .collect();
3742                Some(FileLevelChange {
3743                    relative_path: rel_path.to_string(),
3744                    change_type: ChangeType::Update,
3745                    old_fingerprint: Some(fp_a),
3746                    new_fingerprint: Some(fp_b),
3747                    signature_changes: if changed.is_empty() {
3748                        None
3749                    } else {
3750                        Some(changed)
3751                    },
3752                })
3753            } else {
3754                None
3755            }
3756        } else {
3757            None
3758        };
3759
3760        // Only include modules with actual changes
3761        let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3762        let has_file_change = file_change.is_some();
3763        let is_new_or_deleted =
3764            change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3765
3766        if has_import_changes || has_file_change || is_new_or_deleted {
3767            if has_import_changes {
3768                modules_with_import_changes += 1;
3769            }
3770
3771            // For new modules, all their imports count as added
3772            let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3773                // Gather all imports for this new file
3774                edges_b
3775                    .iter()
3776                    .filter(|e| e.source_file == *rel_path)
3777                    .map(to_public_edge)
3778                    .collect()
3779            } else {
3780                mod_added
3781            };
3782            // For deleted modules, all their imports count as removed
3783            let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3784                edges_a
3785                    .iter()
3786                    .filter(|e| e.source_file == *rel_path)
3787                    .map(to_public_edge)
3788                    .collect()
3789            } else {
3790                mod_removed
3791            };
3792
3793            // Recheck after expanding
3794            let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3795            if has_expanded_imports && !has_import_changes {
3796                modules_with_import_changes += 1;
3797            }
3798
3799            module_changes.push(ModuleLevelChange {
3800                module_path: rel_path.to_string(),
3801                change_type,
3802                imports_added: final_added,
3803                imports_removed: final_removed,
3804                file_change,
3805            });
3806        }
3807    }
3808
3809    let summary = ImportGraphSummary {
3810        total_edges_a: edges_a.len(),
3811        total_edges_b: edges_b.len(),
3812        edges_added: added_keys.len(),
3813        edges_removed: removed_keys.len(),
3814        modules_with_import_changes,
3815    };
3816
3817    let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3818
3819    Ok(DiffReport {
3820        file_a: dir_a.display().to_string(),
3821        file_b: dir_b.display().to_string(),
3822        identical,
3823        changes: Vec::new(),
3824        summary: None,
3825        granularity: DiffGranularity::Module,
3826        file_changes: None,
3827        module_changes: Some(module_changes),
3828        import_graph_summary: Some(summary),
3829        arch_changes: None,
3830        arch_summary: None,
3831    })
3832}
3833
3834// =============================================================================
3835// L8: Architecture-Level Diff
3836// =============================================================================
3837
3838/// Classify a directory name into an architectural layer.
3839fn classify_directory_layer(dir_name: &str) -> String {
3840    let lower = dir_name.to_lowercase();
3841    match lower.as_str() {
3842        "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3843        "core" | "models" | "domain" | "entities" => "core".to_string(),
3844        "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3845        "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3846        "services" | "service" => "service".to_string(),
3847        "tests" | "test" | "spec" | "specs" => "test".to_string(),
3848        "config" | "settings" | "conf" => "config".to_string(),
3849        "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3850        _ => "other".to_string(),
3851    }
3852}
3853
3854/// Classify a directory using import-based fan-in/fan-out analysis.
3855///
3856/// For directories whose name doesn't match a known pattern ("other"),
3857/// we use the import graph to infer the architectural role:
3858/// - High fan-out + low fan-in  -> "entry" (entry points that depend on many modules)
3859/// - Low fan-out  + high fan-in -> "utility" (leaf modules imported by many)
3860/// - Balanced                   -> "service" (intermediate layer)
3861fn classify_by_import_flow(
3862    dir_name: &str,
3863    edges: &[InternalImportEdge],
3864    all_dirs: &HashSet<String>,
3865) -> String {
3866    // Count fan-out: how many distinct external directories does this dir import from?
3867    let fan_out: usize = edges
3868        .iter()
3869        .filter(|e| {
3870            e.source_file
3871                .split('/')
3872                .next()
3873                .map(|d| d == dir_name)
3874                .unwrap_or(false)
3875        })
3876        .filter(|e| {
3877            // Target module references a different top-level directory
3878            let target_first = e
3879                .target_module
3880                .split('/')
3881                .next()
3882                .or_else(|| e.target_module.split('.').next())
3883                .unwrap_or("");
3884            all_dirs.contains(target_first) && target_first != dir_name
3885        })
3886        .map(|e| e.target_module.clone())
3887        .collect::<HashSet<_>>()
3888        .len();
3889
3890    // Count fan-in: how many edges from OTHER directories target files in this dir?
3891    let fan_in: usize = edges
3892        .iter()
3893        .filter(|e| {
3894            let source_dir = e.source_file.split('/').next().unwrap_or("");
3895            source_dir != dir_name
3896        })
3897        .filter(|e| {
3898            let target_first = e
3899                .target_module
3900                .split('/')
3901                .next()
3902                .or_else(|| e.target_module.split('.').next())
3903                .unwrap_or("");
3904            target_first == dir_name
3905        })
3906        .count();
3907
3908    if fan_in == 0 && fan_out == 0 {
3909        return "other".to_string();
3910    }
3911
3912    // Classify based on ratio
3913    if fan_out > 0 && fan_in == 0 {
3914        "entry".to_string()
3915    } else if fan_in > fan_out * 2 {
3916        "utility".to_string()
3917    } else if fan_out > fan_in * 2 {
3918        "entry".to_string()
3919    } else {
3920        "service".to_string()
3921    }
3922}
3923
3924/// Collect top-level directories containing source files, classifying each
3925/// into an architectural layer.
3926///
3927/// Uses two-pass classification:
3928/// 1. Name-based heuristic (e.g., "api/" -> api, "utils/" -> utility)
3929/// 2. Import-based fan-in/fan-out analysis for "other" directories
3930fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
3931    let mut dirs: HashMap<String, String> = HashMap::new();
3932    let files = collect_source_files(root)?;
3933
3934    // Pass 1: classify by name
3935    for (rel_path, _) in &files {
3936        if let Some(first_dir) = rel_path.split('/').next() {
3937            if rel_path.contains('/') && !dirs.contains_key(first_dir) {
3938                let layer = classify_directory_layer(first_dir);
3939                dirs.insert(first_dir.to_string(), layer);
3940            }
3941        }
3942    }
3943
3944    // Pass 2: for directories classified as "other", try import-based classification
3945    let other_dirs: Vec<String> = dirs
3946        .iter()
3947        .filter(|(_, layer)| *layer == "other")
3948        .map(|(name, _)| name.clone())
3949        .collect();
3950
3951    if !other_dirs.is_empty() {
3952        // Build import graph to analyze import flow
3953        if let Ok(edges) = build_import_graph(root) {
3954            let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
3955            for dir_name in &other_dirs {
3956                let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
3957                if inferred != "other" {
3958                    dirs.insert(dir_name.clone(), inferred);
3959                }
3960            }
3961        }
3962    }
3963
3964    Ok(dirs)
3965}
3966
3967/// Run L8 architecture-level diff between two directories.
3968fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3969    let dirs_a = collect_arch_directories(dir_a)?;
3970    let dirs_b = collect_arch_directories(dir_b)?;
3971
3972    let all_dirs: BTreeSet<&str> = dirs_a
3973        .keys()
3974        .chain(dirs_b.keys())
3975        .map(|s| s.as_str())
3976        .collect();
3977
3978    let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
3979    let mut directories_added = 0usize;
3980    let mut directories_removed = 0usize;
3981    let mut layer_migrations = 0usize;
3982    let mut changed_dirs = 0usize;
3983    let total_dirs = all_dirs.len();
3984
3985    for dir_name in &all_dirs {
3986        let in_a = dirs_a.get(*dir_name);
3987        let in_b = dirs_b.get(*dir_name);
3988
3989        match (in_a, in_b) {
3990            (Some(layer_a), Some(layer_b)) => {
3991                if layer_a != layer_b {
3992                    // Layer migration
3993                    changed_dirs += 1;
3994                    layer_migrations += 1;
3995                    arch_changes.push(ArchLevelChange {
3996                        directory: dir_name.to_string(),
3997                        change_type: ArchChangeType::LayerMigration,
3998                        old_layer: Some(layer_a.clone()),
3999                        new_layer: Some(layer_b.clone()),
4000                        migrated_functions: Vec::new(),
4001                    });
4002                }
4003                // Same layer = no change (stable)
4004            }
4005            (None, Some(layer_b)) => {
4006                // Added directory
4007                changed_dirs += 1;
4008                directories_added += 1;
4009                arch_changes.push(ArchLevelChange {
4010                    directory: dir_name.to_string(),
4011                    change_type: ArchChangeType::Added,
4012                    old_layer: None,
4013                    new_layer: Some(layer_b.clone()),
4014                    migrated_functions: Vec::new(),
4015                });
4016            }
4017            (Some(layer_a), None) => {
4018                // Removed directory
4019                changed_dirs += 1;
4020                directories_removed += 1;
4021                arch_changes.push(ArchLevelChange {
4022                    directory: dir_name.to_string(),
4023                    change_type: ArchChangeType::Removed,
4024                    old_layer: Some(layer_a.clone()),
4025                    new_layer: None,
4026                    migrated_functions: Vec::new(),
4027                });
4028            }
4029            (None, None) => unreachable!(),
4030        }
4031    }
4032
4033    let stability_score = if total_dirs == 0 {
4034        1.0
4035    } else {
4036        1.0 - (changed_dirs as f64 / total_dirs as f64)
4037    };
4038
4039    let summary = ArchDiffSummary {
4040        layer_migrations,
4041        directories_added,
4042        directories_removed,
4043        cycles_introduced: 0,
4044        cycles_resolved: 0,
4045        stability_score,
4046    };
4047
4048    let identical = arch_changes.is_empty();
4049
4050    Ok(DiffReport {
4051        file_a: dir_a.display().to_string(),
4052        file_b: dir_b.display().to_string(),
4053        identical,
4054        changes: Vec::new(),
4055        summary: None,
4056        granularity: DiffGranularity::Architecture,
4057        file_changes: None,
4058        module_changes: None,
4059        import_graph_summary: None,
4060        arch_changes: Some(arch_changes),
4061        arch_summary: Some(summary),
4062    })
4063}
4064
4065// =============================================================================
4066// Tests
4067// =============================================================================
4068
4069#[cfg(test)]
4070mod tests {
4071    use super::*;
4072
4073    const SAMPLE_A: &str = r#"
4074def original_function(x):
4075    return x * 2
4076
4077def renamed_later(a, b):
4078    return a + b
4079
4080def will_be_deleted():
4081    return "goodbye"
4082
4083class OriginalClass:
4084    def method_one(self):
4085        return 1
4086"#;
4087
4088    const SAMPLE_B: &str = r#"
4089def original_function(x):
4090    # Modified implementation
4091    return x * 3
4092
4093def better_name(a, b):
4094    return a + b
4095
4096def new_function():
4097    return "hello"
4098
4099class OriginalClass:
4100    def method_one(self):
4101        return 1
4102
4103    def method_two(self):
4104        return 2
4105"#;
4106
4107    /// Parse Python source for tests using the language-aware ParserPool
4108    fn parse_python(source: &str) -> tree_sitter::Tree {
4109        let pool = ParserPool::new();
4110        pool.parse(source, Language::Python).unwrap()
4111    }
4112
4113    #[test]
4114    fn test_extract_nodes() {
4115        let tree = parse_python(SAMPLE_A);
4116        let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4117
4118        // Should find: original_function, renamed_later, will_be_deleted, OriginalClass, method_one
4119        assert!(
4120            nodes.len() >= 5,
4121            "Expected at least 5 nodes, got {}",
4122            nodes.len()
4123        );
4124
4125        let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4126        assert!(names.contains(&"original_function"));
4127        assert!(names.contains(&"renamed_later"));
4128        assert!(names.contains(&"will_be_deleted"));
4129        assert!(names.contains(&"OriginalClass"));
4130        assert!(names.contains(&"method_one"));
4131    }
4132
4133    #[test]
4134    fn test_detect_update() {
4135        let tree_a = parse_python(SAMPLE_A);
4136        let tree_b = parse_python(SAMPLE_B);
4137
4138        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4139        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4140
4141        let file_a = PathBuf::from("a.py");
4142        let file_b = PathBuf::from("b.py");
4143        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4144
4145        // original_function should be detected as Update
4146        let updates: Vec<_> = changes
4147            .iter()
4148            .filter(|c| c.change_type == ChangeType::Update)
4149            .collect();
4150        assert!(!updates.is_empty(), "Should detect at least one update");
4151        assert!(
4152            updates
4153                .iter()
4154                .any(|c| c.name.as_deref() == Some("original_function")),
4155            "original_function should be marked as updated"
4156        );
4157    }
4158
4159    #[test]
4160    fn test_detect_insert() {
4161        let tree_a = parse_python(SAMPLE_A);
4162        let tree_b = parse_python(SAMPLE_B);
4163
4164        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4165        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4166
4167        let file_a = PathBuf::from("a.py");
4168        let file_b = PathBuf::from("b.py");
4169        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4170
4171        // new_function and method_two should be detected as Insert
4172        let inserts: Vec<_> = changes
4173            .iter()
4174            .filter(|c| c.change_type == ChangeType::Insert)
4175            .collect();
4176        assert!(!inserts.is_empty(), "Should detect insertions");
4177    }
4178
4179    #[test]
4180    fn test_detect_delete() {
4181        let tree_a = parse_python(SAMPLE_A);
4182        let tree_b = parse_python(SAMPLE_B);
4183
4184        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4185        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4186
4187        let file_a = PathBuf::from("a.py");
4188        let file_b = PathBuf::from("b.py");
4189        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4190
4191        // will_be_deleted should be detected as Delete
4192        let deletes: Vec<_> = changes
4193            .iter()
4194            .filter(|c| c.change_type == ChangeType::Delete)
4195            .collect();
4196        assert!(!deletes.is_empty(), "Should detect deletions");
4197        assert!(
4198            deletes
4199                .iter()
4200                .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4201            "will_be_deleted should be marked as deleted"
4202        );
4203    }
4204
4205    #[test]
4206    fn test_detect_rename() {
4207        let tree_a = parse_python(SAMPLE_A);
4208        let tree_b = parse_python(SAMPLE_B);
4209
4210        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4211        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4212
4213        let file_a = PathBuf::from("a.py");
4214        let file_b = PathBuf::from("b.py");
4215        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4216
4217        // renamed_later -> better_name should be detected as Rename
4218        let renames: Vec<_> = changes
4219            .iter()
4220            .filter(|c| c.change_type == ChangeType::Rename)
4221            .collect();
4222        assert!(!renames.is_empty(), "Should detect renames");
4223    }
4224
4225    #[test]
4226    fn test_identical_files() {
4227        let tree_a = parse_python(SAMPLE_A);
4228        let tree_b = parse_python(SAMPLE_A); // Same content
4229
4230        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4231        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4232
4233        let file_a = PathBuf::from("a.py");
4234        let file_b = PathBuf::from("b.py");
4235        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); // semantic_only
4236
4237        assert!(
4238            changes.is_empty(),
4239            "Identical files should have no semantic changes"
4240        );
4241    }
4242
4243    #[test]
4244    fn test_compute_similarity() {
4245        assert_eq!(compute_similarity("abc", "abc"), 1.0);
4246        assert_eq!(compute_similarity("", ""), 1.0); // two empty strings are equal
4247        assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); // Jaccard: 2/4 = 0.5
4248    }
4249
4250    #[test]
4251    fn test_normalize_body() {
4252        // Test that normalize_body skips the signature line and strips comments
4253        let body = "def foo():\n    # pure comment line\n    return 1  # inline comment";
4254        let normalized = normalize_body(body);
4255        // Should skip "def foo():" (first line), filter "# pure comment line" (comment-only)
4256        // and strip "# inline comment" from the return line
4257        assert!(!normalized.contains('#'), "Comments should be removed");
4258        assert!(
4259            !normalized.contains("def foo"),
4260            "Signature should be skipped"
4261        );
4262        assert!(normalized.contains("return 1"), "Body should remain");
4263    }
4264
4265    // =========================================================================
4266    // format_diff_text: L6-L8 rendering tests
4267    // =========================================================================
4268
4269    #[test]
4270    fn test_format_diff_text_renders_file_changes() {
4271        let mut report = DiffReport::new("dir_a/", "dir_b/");
4272        report.identical = false;
4273        report.file_changes = Some(vec![
4274            FileLevelChange {
4275                relative_path: "src/main.py".to_string(),
4276                change_type: ChangeType::Update,
4277                old_fingerprint: Some(12345),
4278                new_fingerprint: Some(67890),
4279                signature_changes: Some(vec!["fn foo()".to_string()]),
4280            },
4281            FileLevelChange {
4282                relative_path: "src/new_module.py".to_string(),
4283                change_type: ChangeType::Insert,
4284                old_fingerprint: None,
4285                new_fingerprint: Some(11111),
4286                signature_changes: None,
4287            },
4288            FileLevelChange {
4289                relative_path: "src/removed.py".to_string(),
4290                change_type: ChangeType::Delete,
4291                old_fingerprint: Some(99999),
4292                new_fingerprint: None,
4293                signature_changes: None,
4294            },
4295        ]);
4296
4297        let text = format_diff_text(&report);
4298        assert!(
4299            text.contains("File-Level Changes"),
4300            "Should have file-level section header"
4301        );
4302        assert!(text.contains("src/main.py"), "Should mention updated file");
4303        assert!(
4304            text.contains("src/new_module.py"),
4305            "Should mention added file"
4306        );
4307        assert!(
4308            text.contains("src/removed.py"),
4309            "Should mention removed file"
4310        );
4311    }
4312
4313    #[test]
4314    fn test_format_diff_text_renders_module_changes() {
4315        let mut report = DiffReport::new("dir_a/", "dir_b/");
4316        report.identical = false;
4317        report.module_changes = Some(vec![ModuleLevelChange {
4318            module_path: "src/utils.py".to_string(),
4319            change_type: ChangeType::Update,
4320            imports_added: vec![ImportEdge {
4321                source_file: "src/utils.py".to_string(),
4322                target_module: "os.path".to_string(),
4323                imported_names: vec!["join".to_string()],
4324            }],
4325            imports_removed: vec![],
4326            file_change: None,
4327        }]);
4328
4329        let text = format_diff_text(&report);
4330        assert!(
4331            text.contains("Module-Level Changes"),
4332            "Should have module-level section header"
4333        );
4334        assert!(
4335            text.contains("src/utils.py"),
4336            "Should mention the module path"
4337        );
4338        assert!(
4339            text.contains("os.path"),
4340            "Should mention added import target"
4341        );
4342    }
4343
4344    #[test]
4345    fn test_format_diff_text_renders_import_graph_summary() {
4346        let mut report = DiffReport::new("dir_a/", "dir_b/");
4347        report.identical = false;
4348        report.import_graph_summary = Some(ImportGraphSummary {
4349            total_edges_a: 10,
4350            total_edges_b: 15,
4351            edges_added: 7,
4352            edges_removed: 2,
4353            modules_with_import_changes: 3,
4354        });
4355
4356        let text = format_diff_text(&report);
4357        assert!(
4358            text.contains("Import Graph"),
4359            "Should have import graph section"
4360        );
4361        assert!(text.contains("7"), "Should show edges added");
4362        assert!(text.contains("2"), "Should show edges removed");
4363    }
4364
4365    #[test]
4366    fn test_format_diff_text_renders_arch_changes() {
4367        let mut report = DiffReport::new("dir_a/", "dir_b/");
4368        report.identical = false;
4369        report.arch_changes = Some(vec![
4370            ArchLevelChange {
4371                directory: "src/api/".to_string(),
4372                change_type: ArchChangeType::LayerMigration,
4373                old_layer: Some("presentation".to_string()),
4374                new_layer: Some("business".to_string()),
4375                migrated_functions: vec!["handle_request".to_string()],
4376            },
4377            ArchLevelChange {
4378                directory: "src/new_service/".to_string(),
4379                change_type: ArchChangeType::Added,
4380                old_layer: None,
4381                new_layer: Some("service".to_string()),
4382                migrated_functions: vec![],
4383            },
4384        ]);
4385
4386        let text = format_diff_text(&report);
4387        assert!(
4388            text.contains("Architecture-Level Changes"),
4389            "Should have arch section header"
4390        );
4391        assert!(
4392            text.contains("src/api/"),
4393            "Should mention migrated directory"
4394        );
4395        assert!(text.contains("presentation"), "Should show old layer");
4396        assert!(text.contains("business"), "Should show new layer");
4397        assert!(
4398            text.contains("src/new_service/"),
4399            "Should mention added directory"
4400        );
4401    }
4402
4403    #[test]
4404    fn test_format_diff_text_renders_arch_summary() {
4405        let mut report = DiffReport::new("dir_a/", "dir_b/");
4406        report.identical = false;
4407        report.arch_summary = Some(ArchDiffSummary {
4408            layer_migrations: 2,
4409            directories_added: 1,
4410            directories_removed: 0,
4411            cycles_introduced: 1,
4412            cycles_resolved: 0,
4413            stability_score: 0.75,
4414        });
4415
4416        let text = format_diff_text(&report);
4417        assert!(
4418            text.contains("Architecture Summary"),
4419            "Should have arch summary section"
4420        );
4421        assert!(text.contains("0.75"), "Should show stability score");
4422    }
4423
4424    #[test]
4425    fn test_format_diff_text_identical_skips_higher_levels() {
4426        // When identical, format_diff_text returns early, so even if higher-level
4427        // fields were somehow set, they should not appear.
4428        let mut report = DiffReport::new("a.py", "b.py");
4429        report.identical = true;
4430        report.file_changes = Some(vec![FileLevelChange {
4431            relative_path: "should_not_appear.py".to_string(),
4432            change_type: ChangeType::Insert,
4433            old_fingerprint: None,
4434            new_fingerprint: Some(1),
4435            signature_changes: None,
4436        }]);
4437
4438        let text = format_diff_text(&report);
4439        assert!(
4440            !text.contains("should_not_appear"),
4441            "Identical report should skip all change sections"
4442        );
4443        assert!(
4444            text.contains("No structural changes"),
4445            "Should show identical message"
4446        );
4447    }
4448}