Skip to main content

tldr_cli/commands/remaining/
diff.rs

1//! Diff command - AST-aware structural diff
2//!
3//! Compares two source files at the AST level, detecting:
4//! - Insert: new function/class/method
5//! - Delete: removed function/class/method
6//! - Update: modified body
7//! - Move: same content, different location
8//! - Rename: same body, different name
9//!
10//! # Example
11//!
12//! ```bash
13//! tldr diff old.py new.py
14//! tldr diff old.py new.py --semantic-only
15//! tldr diff old.py new.py --format text
16//! ```
17
18use std::collections::{BTreeSet, HashMap, HashSet};
19use std::fs;
20use std::hash::{Hash, Hasher};
21use std::path::{Path, PathBuf};
22
23use anyhow::{bail, Result};
24use clap::Args;
25use regex::Regex;
26use tree_sitter::Node;
27
28use tldr_core::ast::function_finder::{get_function_name, get_function_node_kinds};
29use tldr_core::ast::parser::ParserPool;
30use tldr_core::callgraph::languages::LanguageRegistry;
31use tldr_core::types::Language;
32
33use super::error::RemainingError;
34use super::types::{
35    ASTChange, ArchChangeType, ArchDiffSummary, ArchLevelChange, BaseChanges, ChangeType,
36    DiffGranularity, DiffReport, DiffSummary, FileLevelChange, ImportEdge, ImportGraphSummary,
37    Location, ModuleLevelChange, NodeKind,
38};
39use crate::output::OutputFormat;
40
41// =============================================================================
42// Constants
43// =============================================================================
44
45/// Similarity threshold for detecting renames (0.0-1.0)
46const RENAME_SIMILARITY_THRESHOLD: f64 = 0.8;
47
48// =============================================================================
49// CLI Arguments
50// =============================================================================
51
52/// AST-aware structural diff between two files
53///
54/// Compares two source files at the AST level, detecting structural changes
55/// like inserted, deleted, updated, moved, and renamed functions/classes.
56///
57/// # Example
58///
59/// ```bash
60/// tldr diff old.py new.py
61/// tldr diff old.py new.py --semantic-only
62/// ```
63#[derive(Debug, Args)]
64pub struct DiffArgs {
65    /// First file (or directory for L6/L7/L8) to compare
66    pub file_a: PathBuf,
67
68    /// Second file (or directory for L6/L7/L8) to compare
69    pub file_b: PathBuf,
70
71    /// Diff granularity level
72    #[arg(long, short = 'g', default_value = "function")]
73    pub granularity: DiffGranularity,
74
75    /// Exclude formatting-only changes (comments, whitespace)
76    #[arg(long)]
77    pub semantic_only: bool,
78
79    /// Output file (optional, stdout if not specified)
80    #[arg(long, short = 'O')]
81    pub output: Option<PathBuf>,
82}
83
84// =============================================================================
85// Extracted Function Info
86// =============================================================================
87
88/// Information about an extracted function/class/method
89#[derive(Debug, Clone)]
90struct ExtractedNode {
91    /// Name of the function/class
92    name: String,
93    /// Kind of node
94    kind: NodeKind,
95    /// Line number (1-indexed)
96    line: u32,
97    /// End line number (1-indexed)
98    end_line: u32,
99    /// Column
100    column: u32,
101    /// Full source text (body)
102    body: String,
103    /// Normalized body (whitespace-insensitive)
104    normalized_body: String,
105    /// Parameters (for functions)
106    params: String,
107    /// Whether this is a method (inside a class)
108    is_method: bool,
109}
110
111impl ExtractedNode {
112    fn new(
113        name: impl Into<String>,
114        kind: NodeKind,
115        line: u32,
116        end_line: u32,
117        column: u32,
118        body: impl Into<String>,
119    ) -> Self {
120        let body_str: String = body.into();
121        let normalized = normalize_body(&body_str);
122        Self {
123            name: name.into(),
124            kind,
125            line,
126            end_line,
127            column,
128            body: body_str,
129            normalized_body: normalized,
130            params: String::new(),
131            is_method: false,
132        }
133    }
134
135    fn with_params(mut self, params: impl Into<String>) -> Self {
136        self.params = params.into();
137        self
138    }
139
140    fn with_method_kind(mut self) -> Self {
141        self.is_method = true;
142        if self.kind == NodeKind::Function {
143            self.kind = NodeKind::Method;
144        }
145        self
146    }
147}
148
149/// Normalize body for comparison (remove whitespace variations and comments)
150/// For rename detection, we skip the first line (function/class signature)
151/// and only compare the actual body content.
152fn normalize_body(body: &str) -> String {
153    body.lines()
154        .skip(1) // Skip signature line (def foo(): or class Bar:)
155        .map(|line| {
156            // Strip inline comments (simple approach: truncate at #)
157            let stripped = if let Some(pos) = line.find('#') {
158                // Make sure it's not inside a string
159                // Simple heuristic: if there's a # before any quote, strip it
160                let before_hash = &line[..pos];
161                let single_quotes = before_hash.matches('\'').count();
162                let double_quotes = before_hash.matches('"').count();
163                // If quotes are balanced (even count), it's a real comment
164                if single_quotes % 2 == 0 && double_quotes % 2 == 0 {
165                    &line[..pos]
166                } else {
167                    line
168                }
169            } else {
170                line
171            };
172            stripped.trim()
173        })
174        .filter(|line| !line.is_empty())
175        .collect::<Vec<_>>()
176        .join("\n")
177}
178
179// =============================================================================
180// Implementation
181// =============================================================================
182
183impl DiffArgs {
184    /// Run the diff command and return the structured report.
185    ///
186    /// This is the internal workhorse: it dispatches to the appropriate
187    /// algorithm based on `self.granularity` and returns a `DiffReport`
188    /// without any output formatting.
189    pub fn run_to_report(&self) -> Result<DiffReport> {
190        // Validate paths exist
191        if !self.file_a.exists() {
192            return Err(RemainingError::file_not_found(&self.file_a).into());
193        }
194        if !self.file_b.exists() {
195            return Err(RemainingError::file_not_found(&self.file_b).into());
196        }
197
198        match self.granularity {
199            DiffGranularity::File => {
200                // L6: directory-level structural fingerprint diff
201                if !self.file_a.is_dir() || !self.file_b.is_dir() {
202                    bail!("File-level (L6) diff requires directories, not individual files");
203                }
204                run_file_level_diff(&self.file_a, &self.file_b)
205            }
206            DiffGranularity::Module => {
207                // L7: module-level import graph diff
208                if !self.file_a.is_dir() || !self.file_b.is_dir() {
209                    bail!("Module-level (L7) diff requires directories, not individual files");
210                }
211                run_module_level_diff(&self.file_a, &self.file_b)
212            }
213            DiffGranularity::Architecture => {
214                // L8: architecture-level diff
215                if !self.file_a.is_dir() || !self.file_b.is_dir() {
216                    bail!(
217                        "Architecture-level (L8) diff requires directories, not individual files"
218                    );
219                }
220                run_arch_level_diff(&self.file_a, &self.file_b)
221            }
222            DiffGranularity::Class => {
223                // L5: class-level diff (supports both files and directories)
224                if self.file_a.is_dir() && self.file_b.is_dir() {
225                    run_class_diff_directory(&self.file_a, &self.file_b, self.semantic_only)
226                } else {
227                    run_class_diff(&self.file_a, &self.file_b, self.semantic_only)
228                }
229            }
230            DiffGranularity::Statement => {
231                // L3: statement-level diff (Zhang-Shasha tree edit distance)
232                self.run_statement_level_diff()
233            }
234            DiffGranularity::Token => {
235                // L1: token-level diff using difftastic graph-based algorithm
236                self.run_token_level_diff()
237            }
238            DiffGranularity::Expression => {
239                // L2: expression-level diff (stub -- uses L1 until Phase 6)
240                self.run_expression_level_diff()
241            }
242            _ => {
243                // L4 and below: function-level diff (original behavior)
244                self.run_function_level_diff()
245            }
246        }
247    }
248
249    /// Run the diff command with output formatting.
250    pub fn run(&self, format: OutputFormat) -> Result<()> {
251        let report = self.run_to_report()?;
252
253        // Output
254        match format {
255            OutputFormat::Json => {
256                let json = serde_json::to_string_pretty(&report)?;
257                if let Some(ref output_path) = self.output {
258                    fs::write(output_path, &json)?;
259                } else {
260                    println!("{}", json);
261                }
262            }
263            OutputFormat::Text => {
264                let text = format_diff_text(&report);
265                if let Some(ref output_path) = self.output {
266                    fs::write(output_path, &text)?;
267                } else {
268                    println!("{}", text);
269                }
270            }
271            OutputFormat::Sarif | OutputFormat::Compact | OutputFormat::Dot => {
272                // Other formats not supported for diff, fall back to JSON
273                let json = serde_json::to_string_pretty(&report)?;
274                println!("{}", json);
275            }
276        }
277
278        Ok(())
279    }
280
281    /// Original L4 function-level diff implementation.
282    fn run_function_level_diff(&self) -> Result<DiffReport> {
283        // Detect language from file_a extension
284        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
285            let ext = self
286                .file_a
287                .extension()
288                .map(|e| e.to_string_lossy().to_string())
289                .unwrap_or_else(|| "unknown".to_string());
290            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
291        })?;
292
293        // Read file contents
294        let source_a = fs::read_to_string(&self.file_a)?;
295        let source_b = fs::read_to_string(&self.file_b)?;
296
297        // Parse both files using language-aware parser
298        let pool = ParserPool::new();
299        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
300            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
301        })?;
302        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
303            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
304        })?;
305
306        // Extract nodes from both files
307        let nodes_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
308        let nodes_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
309
310        // Detect changes
311        let changes = detect_changes(
312            &nodes_a,
313            &nodes_b,
314            &self.file_a,
315            &self.file_b,
316            self.semantic_only,
317        );
318
319        // Build summary
320        let mut summary = DiffSummary::default();
321        for change in &changes {
322            summary.total_changes += 1;
323            if change.change_type != ChangeType::Format {
324                summary.semantic_changes += 1;
325            }
326            match change.change_type {
327                ChangeType::Insert => summary.inserts += 1,
328                ChangeType::Delete => summary.deletes += 1,
329                ChangeType::Update => summary.updates += 1,
330                ChangeType::Move => summary.moves += 1,
331                ChangeType::Rename => summary.renames += 1,
332                ChangeType::Format => summary.formats += 1,
333                ChangeType::Extract => summary.extracts += 1,
334                ChangeType::Inline => {}
335            }
336        }
337
338        // Build report
339        let report = DiffReport {
340            file_a: self.file_a.display().to_string(),
341            file_b: self.file_b.display().to_string(),
342            identical: changes.is_empty(),
343            changes,
344            summary: Some(summary),
345            granularity: self.granularity,
346            file_changes: None,
347            module_changes: None,
348            import_graph_summary: None,
349            arch_changes: None,
350            arch_summary: None,
351        };
352
353        Ok(report)
354    }
355
356    /// L1 Token-level diff using difftastic's graph-based algorithm.
357    ///
358    /// Pipeline:
359    /// 1. Read files and detect language
360    /// 2. Parse with tree-sitter
361    /// 3. Convert to difftastic Syntax trees
362    /// 4. Run unchanged marking, Dijkstra graph diff, slider fixup
363    /// 5. Convert ChangeMap to DiffReport via changemap_to_report
364    fn run_token_level_diff(&self) -> Result<DiffReport> {
365        use super::difftastic;
366        use typed_arena::Arena;
367
368        // Detect language from file_a extension
369        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
370            let ext = self
371                .file_a
372                .extension()
373                .map(|e| e.to_string_lossy().to_string())
374                .unwrap_or_else(|| "unknown".to_string());
375            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
376        })?;
377
378        // Read file contents
379        let lhs_src = fs::read_to_string(&self.file_a)?;
380        let rhs_src = fs::read_to_string(&self.file_b)?;
381
382        // Get language config for difftastic tree-sitter conversion
383        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
384
385        // Parse both files using existing tree-sitter infrastructure
386        let pool = ParserPool::new();
387        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
388            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
389        })?;
390        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
391            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
392        })?;
393
394        // Convert tree-sitter trees to difftastic Syntax trees
395        let arena = Arena::new();
396        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
397            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
398        );
399
400        // Run diff pipeline
401        let mut change_map = difftastic::changes::ChangeMap::default();
402
403        // Phase 1: Mark unchanged nodes (structural matching)
404        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
405
406        // Phase 2: Run Dijkstra graph diff on each changed chunk
407        for (lhs_chunk, rhs_chunk) in &chunks {
408            match (lhs_chunk.first(), rhs_chunk.first()) {
409                (Some(lhs_first), Some(rhs_first)) => {
410                    if difftastic::dijkstra::mark_syntax(
411                        Some(*lhs_first),
412                        Some(*rhs_first),
413                        &mut change_map,
414                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
415                    )
416                    .is_err()
417                    {
418                        // Graph limit exceeded -- mark all nodes as Novel
419                        for node in lhs_chunk {
420                            difftastic::changes::insert_deep_novel(node, &mut change_map);
421                        }
422                        for node in rhs_chunk {
423                            difftastic::changes::insert_deep_novel(node, &mut change_map);
424                        }
425                    }
426                }
427                (Some(_), None) => {
428                    // LHS has nodes, RHS is empty -- all LHS nodes are Novel (deleted)
429                    for node in lhs_chunk {
430                        difftastic::changes::insert_deep_novel(node, &mut change_map);
431                    }
432                }
433                (None, Some(_)) => {
434                    // RHS has nodes, LHS is empty -- all RHS nodes are Novel (inserted)
435                    for node in rhs_chunk {
436                        difftastic::changes::insert_deep_novel(node, &mut change_map);
437                    }
438                }
439                (None, None) => {
440                    // Both sides empty -- nothing to do
441                }
442            }
443        }
444
445        // Phase 3: Fix sliders for better alignment
446        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
447        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
448
449        // Convert to DiffReport
450        let fa = self.file_a.display().to_string();
451        let fb = self.file_b.display().to_string();
452        Ok(difftastic::changemap_to_report::changemap_to_l1_report(
453            &lhs_nodes,
454            &rhs_nodes,
455            &change_map,
456            &fa,
457            &fb,
458        ))
459    }
460
461    /// L2 Expression-level diff using difftastic with expression grouping.
462    ///
463    /// Same diff pipeline as L1 (unchanged marking, Dijkstra, slider fixup)
464    /// but converts the ChangeMap via `changemap_to_l2_report`, which groups
465    /// token changes under their nearest `Syntax::List` parent.
466    fn run_expression_level_diff(&self) -> Result<DiffReport> {
467        use super::difftastic;
468        use typed_arena::Arena;
469
470        // Detect language from file_a extension
471        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
472            let ext = self
473                .file_a
474                .extension()
475                .map(|e| e.to_string_lossy().to_string())
476                .unwrap_or_else(|| "unknown".to_string());
477            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
478        })?;
479
480        // Read file contents
481        let lhs_src = fs::read_to_string(&self.file_a)?;
482        let rhs_src = fs::read_to_string(&self.file_b)?;
483
484        // Get language config for difftastic tree-sitter conversion
485        let config = difftastic::lang_config::LangConfig::for_language(lang.as_str());
486
487        // Parse both files using existing tree-sitter infrastructure
488        let pool = ParserPool::new();
489        let lhs_tree = pool.parse(&lhs_src, lang).map_err(|e| {
490            RemainingError::parse_error(&self.file_a, format!("Failed to parse file: {}", e))
491        })?;
492        let rhs_tree = pool.parse(&rhs_src, lang).map_err(|e| {
493            RemainingError::parse_error(&self.file_b, format!("Failed to parse file: {}", e))
494        })?;
495
496        // Convert tree-sitter trees to difftastic Syntax trees
497        let arena = Arena::new();
498        let (lhs_nodes, rhs_nodes) = difftastic::ts_to_syntax::prepare_syntax_trees(
499            &arena, &lhs_src, &rhs_src, &lhs_tree, &rhs_tree, &config,
500        );
501
502        // Run diff pipeline
503        let mut change_map = difftastic::changes::ChangeMap::default();
504
505        // Phase 1: Mark unchanged nodes (structural matching)
506        let chunks = difftastic::unchanged::mark_unchanged(&lhs_nodes, &rhs_nodes, &mut change_map);
507
508        // Phase 2: Run Dijkstra graph diff on each changed chunk
509        for (lhs_chunk, rhs_chunk) in &chunks {
510            match (lhs_chunk.first(), rhs_chunk.first()) {
511                (Some(lhs_first), Some(rhs_first)) => {
512                    if difftastic::dijkstra::mark_syntax(
513                        Some(*lhs_first),
514                        Some(*rhs_first),
515                        &mut change_map,
516                        difftastic::dijkstra::DEFAULT_GRAPH_LIMIT,
517                    )
518                    .is_err()
519                    {
520                        for node in lhs_chunk {
521                            difftastic::changes::insert_deep_novel(node, &mut change_map);
522                        }
523                        for node in rhs_chunk {
524                            difftastic::changes::insert_deep_novel(node, &mut change_map);
525                        }
526                    }
527                }
528                (Some(_), None) => {
529                    for node in lhs_chunk {
530                        difftastic::changes::insert_deep_novel(node, &mut change_map);
531                    }
532                }
533                (None, Some(_)) => {
534                    for node in rhs_chunk {
535                        difftastic::changes::insert_deep_novel(node, &mut change_map);
536                    }
537                }
538                (None, None) => {}
539            }
540        }
541
542        // Phase 3: Fix sliders for better alignment
543        difftastic::sliders::fix_all_sliders(&lhs_nodes, &mut change_map);
544        difftastic::sliders::fix_all_sliders(&rhs_nodes, &mut change_map);
545
546        // Convert to DiffReport using L2 expression grouping
547        let fa = self.file_a.display().to_string();
548        let fb = self.file_b.display().to_string();
549        Ok(difftastic::changemap_to_report::changemap_to_l2_report(
550            &lhs_nodes,
551            &rhs_nodes,
552            &change_map,
553            &fa,
554            &fb,
555        ))
556    }
557}
558
559// =============================================================================
560// Tree-sitter Parsing
561// =============================================================================
562
563/// Get text for a node from source
564fn node_text<'a>(node: Node, source: &'a [u8]) -> &'a str {
565    node.utf8_text(source).unwrap_or("")
566}
567
568/// Get the class-like node kinds for each language
569fn get_class_node_kinds(language: Language) -> &'static [&'static str] {
570    match language {
571        Language::Python => &["class_definition"],
572        Language::TypeScript | Language::JavaScript => &["class_declaration", "class"],
573        Language::Go => &["type_declaration"],
574        Language::Rust => &["struct_item", "enum_item", "impl_item"],
575        Language::Java => &[
576            "class_declaration",
577            "interface_declaration",
578            "enum_declaration",
579        ],
580        Language::C => &["struct_specifier", "enum_specifier"],
581        Language::Cpp => &["class_specifier", "struct_specifier", "enum_specifier"],
582        Language::Ruby => &["class", "module"],
583        Language::Php => &["class_declaration", "interface_declaration"],
584        Language::CSharp => &[
585            "class_declaration",
586            "interface_declaration",
587            "struct_declaration",
588        ],
589        Language::Kotlin => &["class_declaration", "object_declaration"],
590        Language::Scala => &["class_definition", "object_definition", "trait_definition"],
591        Language::Swift => &[
592            "class_declaration",
593            "struct_declaration",
594            "protocol_declaration",
595        ],
596        Language::Elixir => &["call"],         // defmodule is a call
597        Language::Lua | Language::Luau => &[], // Lua has no class syntax
598        Language::Ocaml => &["module_definition", "type_definition"],
599    }
600}
601
602/// Get the node kinds that represent class body containers for method extraction
603fn get_class_body_kinds(language: Language) -> &'static [&'static str] {
604    match language {
605        Language::Python => &["block"],
606        Language::TypeScript | Language::JavaScript => &["class_body"],
607        Language::Go => &[], // Go methods are not nested in type declarations
608        Language::Rust => &["declaration_list"], // impl_item body
609        Language::Java => &["class_body"],
610        Language::C | Language::Cpp => &["field_declaration_list"],
611        Language::Ruby => &["body_statement"],
612        Language::Php => &["declaration_list"],
613        Language::CSharp => &["declaration_list"],
614        Language::Kotlin => &["class_body"],
615        Language::Scala => &["template_body"],
616        Language::Swift => &["class_body"],
617        Language::Elixir => &["do_block"],
618        Language::Lua | Language::Luau => &[],
619        Language::Ocaml => &[],
620    }
621}
622
623// =============================================================================
624// Node Extraction
625// =============================================================================
626
627/// Extract all functions, classes, and methods from AST
628fn extract_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ExtractedNode> {
629    let mut nodes = Vec::new();
630    let kinds = NodeKindSets {
631        func: get_function_node_kinds(lang),
632        class: get_class_node_kinds(lang),
633        body: get_class_body_kinds(lang),
634    };
635    extract_nodes_recursive(
636        root,
637        source,
638        &mut nodes,
639        false,
640        lang,
641        &kinds,
642    );
643    nodes
644}
645
646struct NodeKindSets<'a> {
647    func: &'a [&'a str],
648    class: &'a [&'a str],
649    body: &'a [&'a str],
650}
651
652fn extract_nodes_recursive(
653    node: Node,
654    source: &[u8],
655    nodes: &mut Vec<ExtractedNode>,
656    in_class: bool,
657    lang: Language,
658    kinds: &NodeKindSets<'_>,
659) {
660    let kind = node.kind();
661
662    // Check if this is a function node
663    if kinds.func.contains(&kind) {
664        if let Some(extracted) = extract_function_node(node, source, in_class, lang) {
665            nodes.push(extracted);
666        }
667    }
668    // Check if this is a class node
669    else if kinds.class.contains(&kind) {
670        if let Some(extracted) = extract_class_node(node, source, lang) {
671            nodes.push(extracted);
672        }
673        // Extract methods inside the class body
674        for child in node.children(&mut node.walk()) {
675            if kinds.body.contains(&child.kind()) {
676                extract_nodes_recursive(child, source, nodes, true, lang, kinds);
677            }
678        }
679        return; // Don't recurse further - we handled the body
680    }
681
682    // Recurse into children
683    for child in node.children(&mut node.walk()) {
684        extract_nodes_recursive(child, source, nodes, in_class, lang, kinds);
685    }
686}
687
688fn extract_function_node(
689    node: Node,
690    source: &[u8],
691    is_method: bool,
692    lang: Language,
693) -> Option<ExtractedNode> {
694    // Use language-aware name extraction from function_finder
695    let source_str = std::str::from_utf8(source).unwrap_or("");
696    let func_name = get_function_name(node, lang, source_str)?;
697
698    // Try to extract parameters (varies by language but most use "parameters" or "formal_parameters")
699    let params = node
700        .child_by_field_name("parameters")
701        .or_else(|| node.child_by_field_name("formal_parameters"))
702        .map(|p| node_text(p, source).to_string())
703        .unwrap_or_default();
704
705    let line = node.start_position().row as u32 + 1;
706    let end_line = node.end_position().row as u32 + 1;
707    let column = node.start_position().column as u32;
708    let body = node_text(node, source).to_string();
709
710    let mut extracted =
711        ExtractedNode::new(func_name, NodeKind::Function, line, end_line, column, body)
712            .with_params(params);
713
714    if is_method {
715        extracted = extracted.with_method_kind();
716    }
717
718    Some(extracted)
719}
720
721fn extract_class_node(node: Node, source: &[u8], lang: Language) -> Option<ExtractedNode> {
722    // Get class name - most languages use "name" field
723    let class_name = node
724        .child_by_field_name("name")
725        .map(|n| node_text(n, source).to_string())
726        .or_else(|| {
727            // Fallback: search for first identifier child
728            let mut cursor = node.walk();
729            for child in node.children(&mut cursor) {
730                if child.kind() == "identifier"
731                    || child.kind() == "type_identifier"
732                    || child.kind() == "constant"
733                {
734                    return Some(node_text(child, source).to_string());
735                }
736            }
737            None
738        })?;
739
740    // Skip empty names
741    if class_name.is_empty() {
742        return None;
743    }
744
745    // For Elixir defmodule, filter to only actual module definitions
746    if lang == Language::Elixir && node.kind() == "call" {
747        let first_child = node.child(0)?;
748        let first_text = node_text(first_child, source);
749        if first_text != "defmodule" {
750            return None;
751        }
752        // Module name is in the arguments
753        if let Some(args) = node.child(1) {
754            let name = node_text(args, source).to_string();
755            if !name.is_empty() {
756                let line = node.start_position().row as u32 + 1;
757                let end_line = node.end_position().row as u32 + 1;
758                let column = node.start_position().column as u32;
759                let body = node_text(node, source).to_string();
760                return Some(ExtractedNode::new(
761                    name,
762                    NodeKind::Class,
763                    line,
764                    end_line,
765                    column,
766                    body,
767                ));
768            }
769        }
770        return None;
771    }
772
773    let line = node.start_position().row as u32 + 1;
774    let end_line = node.end_position().row as u32 + 1;
775    let column = node.start_position().column as u32;
776    let body = node_text(node, source).to_string();
777
778    Some(ExtractedNode::new(
779        class_name,
780        NodeKind::Class,
781        line,
782        end_line,
783        column,
784        body,
785    ))
786}
787
788// =============================================================================
789// Change Detection
790// =============================================================================
791
792/// Detect changes between two sets of nodes
793fn detect_changes(
794    nodes_a: &[ExtractedNode],
795    nodes_b: &[ExtractedNode],
796    file_a: &Path,
797    file_b: &Path,
798    semantic_only: bool,
799) -> Vec<ASTChange> {
800    let mut changes = Vec::new();
801
802    // Build lookup maps by name
803    let _map_a: HashMap<&str, &ExtractedNode> =
804        nodes_a.iter().map(|n| (n.name.as_str(), n)).collect();
805    let map_b: HashMap<&str, &ExtractedNode> =
806        nodes_b.iter().map(|n| (n.name.as_str(), n)).collect();
807
808    // Track which nodes have been matched
809    let mut matched_a: Vec<bool> = vec![false; nodes_a.len()];
810    let mut matched_b: Vec<bool> = vec![false; nodes_b.len()];
811
812    // First pass: exact name matches
813    for (i, node_a) in nodes_a.iter().enumerate() {
814        let _ = node_a.end_line;
815        if let Some(&node_b) = map_b.get(node_a.name.as_str()) {
816            // Same name exists in both files
817            matched_a[i] = true;
818            if let Some(j) = nodes_b.iter().position(|n| n.name == node_a.name) {
819                matched_b[j] = true;
820            }
821
822            // Check if body changed
823            if node_a.normalized_body != node_b.normalized_body {
824                // It's an update
825                changes.push(ASTChange {
826                    change_type: ChangeType::Update,
827                    node_kind: node_a.kind,
828                    name: Some(node_a.name.clone()),
829                    old_location: Some(Location::with_column(
830                        file_a.display().to_string(),
831                        node_a.line,
832                        node_a.column,
833                    )),
834                    new_location: Some(Location::with_column(
835                        file_b.display().to_string(),
836                        node_b.line,
837                        node_b.column,
838                    )),
839                    old_text: Some(node_a.body.clone()),
840                    new_text: Some(node_b.body.clone()),
841                    similarity: Some(compute_similarity(
842                        &node_a.normalized_body,
843                        &node_b.normalized_body,
844                    )),
845                    children: None,
846                    base_changes: None,
847                });
848            } else if node_a.line != node_b.line && !semantic_only {
849                // Same content but moved - only report if not semantic_only
850                changes.push(ASTChange {
851                    change_type: ChangeType::Move,
852                    node_kind: node_a.kind,
853                    name: Some(node_a.name.clone()),
854                    old_location: Some(Location::with_column(
855                        file_a.display().to_string(),
856                        node_a.line,
857                        node_a.column,
858                    )),
859                    new_location: Some(Location::with_column(
860                        file_b.display().to_string(),
861                        node_b.line,
862                        node_b.column,
863                    )),
864                    old_text: None,
865                    new_text: None,
866                    similarity: Some(1.0),
867                    children: None,
868                    base_changes: None,
869                });
870            }
871        }
872    }
873
874    // Collect unmatched nodes
875    let unmatched_a: Vec<(usize, &ExtractedNode)> = nodes_a
876        .iter()
877        .enumerate()
878        .filter(|(i, _)| !matched_a[*i])
879        .collect();
880    let unmatched_b: Vec<(usize, &ExtractedNode)> = nodes_b
881        .iter()
882        .enumerate()
883        .filter(|(i, _)| !matched_b[*i])
884        .collect();
885
886    // Second pass: detect renames (same body, different name)
887    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
888
889    for (_, node_a) in &unmatched_a {
890        let mut best_match: Option<(usize, f64)> = None;
891
892        for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
893            if used_b[j] {
894                continue;
895            }
896            if node_a.kind != node_b.kind {
897                continue;
898            }
899
900            let similarity = compute_similarity(&node_a.normalized_body, &node_b.normalized_body);
901            if similarity >= RENAME_SIMILARITY_THRESHOLD
902                && (best_match.is_none() || similarity > best_match.unwrap().1)
903            {
904                best_match = Some((j, similarity));
905            }
906        }
907
908        if let Some((j, similarity)) = best_match {
909            let (_, node_b) = unmatched_b[j];
910            used_b[j] = true;
911
912            // Mark as renamed
913            changes.push(ASTChange {
914                change_type: ChangeType::Rename,
915                node_kind: node_a.kind,
916                name: Some(node_a.name.clone()),
917                old_location: Some(Location::with_column(
918                    file_a.display().to_string(),
919                    node_a.line,
920                    node_a.column,
921                )),
922                new_location: Some(Location::with_column(
923                    file_b.display().to_string(),
924                    node_b.line,
925                    node_b.column,
926                )),
927                old_text: Some(node_a.name.clone()),
928                new_text: Some(node_b.name.clone()),
929                similarity: Some(similarity),
930                children: None,
931                base_changes: None,
932            });
933        }
934    }
935
936    // Remaining unmatched in A are deletes
937    for (_, node_a) in &unmatched_a {
938        // Check if already matched as rename
939        let is_renamed = changes
940            .iter()
941            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&node_a.name));
942        if !is_renamed {
943            changes.push(ASTChange {
944                change_type: ChangeType::Delete,
945                node_kind: node_a.kind,
946                name: Some(node_a.name.clone()),
947                old_location: Some(Location::with_column(
948                    file_a.display().to_string(),
949                    node_a.line,
950                    node_a.column,
951                )),
952                new_location: None,
953                old_text: None,
954                new_text: None,
955                similarity: None,
956                children: None,
957                base_changes: None,
958            });
959        }
960    }
961
962    // Remaining unmatched in B are inserts
963    for (j, (_, node_b)) in unmatched_b.iter().enumerate() {
964        if !used_b[j] {
965            changes.push(ASTChange {
966                change_type: ChangeType::Insert,
967                node_kind: node_b.kind,
968                name: Some(node_b.name.clone()),
969                old_location: None,
970                new_location: Some(Location::with_column(
971                    file_b.display().to_string(),
972                    node_b.line,
973                    node_b.column,
974                )),
975                old_text: None,
976                new_text: None,
977                similarity: None,
978                children: None,
979                base_changes: None,
980            });
981        }
982    }
983
984    // Sort changes: deletes, renames, updates, inserts
985    changes.sort_by_key(|c| match c.change_type {
986        ChangeType::Delete => 0,
987        ChangeType::Rename => 1,
988        ChangeType::Update => 2,
989        ChangeType::Move => 3,
990        ChangeType::Insert => 4,
991        _ => 5,
992    });
993
994    changes
995}
996
997// =============================================================================
998// Similarity Computation
999// =============================================================================
1000
1001/// Compute similarity between two strings using Jaccard on lines,
1002/// with a character-level fallback for short/single-line bodies.
1003fn compute_similarity(a: &str, b: &str) -> f64 {
1004    if a == b {
1005        return 1.0;
1006    }
1007    if a.is_empty() || b.is_empty() {
1008        return 0.0;
1009    }
1010
1011    // Jaccard similarity on lines
1012    let lines_a: std::collections::HashSet<&str> = a.lines().collect();
1013    let lines_b: std::collections::HashSet<&str> = b.lines().collect();
1014
1015    let intersection = lines_a.intersection(&lines_b).count();
1016    let union = lines_a.union(&lines_b).count();
1017
1018    let line_sim = if union == 0 {
1019        0.0
1020    } else {
1021        intersection as f64 / union as f64
1022    };
1023
1024    // For short bodies (few lines), also compute character-level similarity
1025    // to avoid 0.0 when a single line was slightly modified
1026    if line_sim == 0.0 && lines_a.len() <= 2 && lines_b.len() <= 2 {
1027        return char_jaccard_similarity(a, b);
1028    }
1029
1030    line_sim
1031}
1032
1033/// Character-level Jaccard similarity (bigrams).
1034fn char_jaccard_similarity(a: &str, b: &str) -> f64 {
1035    if a.len() < 2 || b.len() < 2 {
1036        return if a == b { 1.0 } else { 0.0 };
1037    }
1038
1039    let bigrams_a: std::collections::HashSet<&[u8]> = a.as_bytes().windows(2).collect();
1040    let bigrams_b: std::collections::HashSet<&[u8]> = b.as_bytes().windows(2).collect();
1041
1042    let intersection = bigrams_a.intersection(&bigrams_b).count();
1043    let union = bigrams_a.union(&bigrams_b).count();
1044
1045    if union == 0 {
1046        0.0
1047    } else {
1048        intersection as f64 / union as f64
1049    }
1050}
1051
1052// =============================================================================
1053// Text Formatting
1054// =============================================================================
1055
1056/// Format diff report as human-readable text
1057fn format_diff_text(report: &DiffReport) -> String {
1058    let mut out = String::new();
1059
1060    out.push_str("Diff Report\n");
1061    out.push_str("===========\n\n");
1062    out.push_str(&format!("File A: {}\n", report.file_a));
1063    out.push_str(&format!("File B: {}\n", report.file_b));
1064    out.push_str(&format!("Identical: {}\n\n", report.identical));
1065
1066    if report.identical {
1067        out.push_str("No structural changes detected.\n");
1068        return out;
1069    }
1070
1071    out.push_str("Changes:\n");
1072    out.push_str("--------\n");
1073
1074    for change in &report.changes {
1075        let change_type = match change.change_type {
1076            ChangeType::Insert => "+",
1077            ChangeType::Delete => "-",
1078            ChangeType::Update => "~",
1079            ChangeType::Move => ">",
1080            ChangeType::Rename => "R",
1081            ChangeType::Format => "F",
1082            ChangeType::Extract => "E",
1083            ChangeType::Inline => "I",
1084        };
1085
1086        let kind = match change.node_kind {
1087            NodeKind::Function => "function",
1088            NodeKind::Class => "class",
1089            NodeKind::Method => "method",
1090            NodeKind::Field => "field",
1091            NodeKind::Statement => "statement",
1092            NodeKind::Expression => "expression",
1093            NodeKind::Block => "block",
1094        };
1095
1096        let name = change.name.as_deref().unwrap_or("<unknown>");
1097
1098        match change.change_type {
1099            ChangeType::Insert => {
1100                if let Some(ref loc) = change.new_location {
1101                    out.push_str(&format!(
1102                        "  {} {} {} at {}:{}\n",
1103                        change_type, kind, name, loc.file, loc.line
1104                    ));
1105                }
1106            }
1107            ChangeType::Delete => {
1108                if let Some(ref loc) = change.old_location {
1109                    out.push_str(&format!(
1110                        "  {} {} {} at {}:{}\n",
1111                        change_type, kind, name, loc.file, loc.line
1112                    ));
1113                }
1114            }
1115            ChangeType::Update | ChangeType::Move => {
1116                if let (Some(ref old), Some(ref new)) = (&change.old_location, &change.new_location)
1117                {
1118                    out.push_str(&format!(
1119                        "  {} {} {} from {}:{} to {}:{}\n",
1120                        change_type, kind, name, old.file, old.line, new.file, new.line
1121                    ));
1122                }
1123            }
1124            ChangeType::Rename => {
1125                let old_name = change.old_text.as_deref().unwrap_or(name);
1126                let new_name = change.new_text.as_deref().unwrap_or(name);
1127                out.push_str(&format!(
1128                    "  {} {} {} -> {}\n",
1129                    change_type, kind, old_name, new_name
1130                ));
1131            }
1132            _ => {
1133                out.push_str(&format!("  {} {} {}\n", change_type, kind, name));
1134            }
1135        }
1136    }
1137
1138    if let Some(ref summary) = report.summary {
1139        out.push_str("\nSummary:\n");
1140        out.push_str("--------\n");
1141        out.push_str(&format!("  Total changes: {}\n", summary.total_changes));
1142        out.push_str(&format!(
1143            "  Semantic changes: {}\n",
1144            summary.semantic_changes
1145        ));
1146        out.push_str(&format!("  Inserts: {}\n", summary.inserts));
1147        out.push_str(&format!("  Deletes: {}\n", summary.deletes));
1148        out.push_str(&format!("  Updates: {}\n", summary.updates));
1149        out.push_str(&format!("  Renames: {}\n", summary.renames));
1150        out.push_str(&format!("  Moves: {}\n", summary.moves));
1151    }
1152
1153    // L6: File-level structural changes
1154    if let Some(ref file_changes) = report.file_changes {
1155        out.push_str("\nFile-Level Changes:\n");
1156        out.push_str("-------------------\n");
1157        for fc in file_changes {
1158            let change_type = match fc.change_type {
1159                ChangeType::Insert => "+",
1160                ChangeType::Delete => "-",
1161                ChangeType::Update => "~",
1162                _ => "?",
1163            };
1164            out.push_str(&format!("  {} {}\n", change_type, fc.relative_path));
1165            if let Some(ref sigs) = fc.signature_changes {
1166                for sig in sigs {
1167                    out.push_str(&format!("      changed: {}\n", sig));
1168                }
1169            }
1170        }
1171    }
1172
1173    // L7: Module-level changes
1174    if let Some(ref module_changes) = report.module_changes {
1175        out.push_str("\nModule-Level Changes:\n");
1176        out.push_str("---------------------\n");
1177        for mc in module_changes {
1178            let change_type = match mc.change_type {
1179                ChangeType::Insert => "+",
1180                ChangeType::Delete => "-",
1181                ChangeType::Update => "~",
1182                _ => "?",
1183            };
1184            out.push_str(&format!("  {} {}\n", change_type, mc.module_path));
1185            for edge in &mc.imports_added {
1186                let names = if edge.imported_names.is_empty() {
1187                    String::new()
1188                } else {
1189                    format!(" ({})", edge.imported_names.join(", "))
1190                };
1191                out.push_str(&format!("      + import {}{}\n", edge.target_module, names));
1192            }
1193            for edge in &mc.imports_removed {
1194                let names = if edge.imported_names.is_empty() {
1195                    String::new()
1196                } else {
1197                    format!(" ({})", edge.imported_names.join(", "))
1198                };
1199                out.push_str(&format!("      - import {}{}\n", edge.target_module, names));
1200            }
1201        }
1202    }
1203
1204    // L7: Import graph summary
1205    if let Some(ref igs) = report.import_graph_summary {
1206        out.push_str("\nImport Graph Summary:\n");
1207        out.push_str("---------------------\n");
1208        out.push_str(&format!("  Edges in A: {}\n", igs.total_edges_a));
1209        out.push_str(&format!("  Edges in B: {}\n", igs.total_edges_b));
1210        out.push_str(&format!("  Edges added: {}\n", igs.edges_added));
1211        out.push_str(&format!("  Edges removed: {}\n", igs.edges_removed));
1212        out.push_str(&format!(
1213            "  Modules with import changes: {}\n",
1214            igs.modules_with_import_changes
1215        ));
1216    }
1217
1218    // L8: Architecture-level changes
1219    if let Some(ref arch_changes) = report.arch_changes {
1220        out.push_str("\nArchitecture-Level Changes:\n");
1221        out.push_str("---------------------------\n");
1222        for ac in arch_changes {
1223            let change_label = match ac.change_type {
1224                ArchChangeType::LayerMigration => "migration",
1225                ArchChangeType::Added => "added",
1226                ArchChangeType::Removed => "removed",
1227                ArchChangeType::CompositionChanged => "composition changed",
1228                ArchChangeType::CycleIntroduced => "cycle introduced",
1229                ArchChangeType::CycleResolved => "cycle resolved",
1230            };
1231            out.push_str(&format!("  [{}] {}\n", change_label, ac.directory));
1232            if let (Some(ref old), Some(ref new)) = (&ac.old_layer, &ac.new_layer) {
1233                out.push_str(&format!("      {} -> {}\n", old, new));
1234            } else if let Some(ref new) = ac.new_layer {
1235                out.push_str(&format!("      -> {}\n", new));
1236            } else if let Some(ref old) = ac.old_layer {
1237                out.push_str(&format!("      {} ->\n", old));
1238            }
1239            if !ac.migrated_functions.is_empty() {
1240                out.push_str(&format!(
1241                    "      migrated: {}\n",
1242                    ac.migrated_functions.join(", ")
1243                ));
1244            }
1245        }
1246    }
1247
1248    // L8: Architecture diff summary
1249    if let Some(ref arch_summary) = report.arch_summary {
1250        out.push_str("\nArchitecture Summary:\n");
1251        out.push_str("---------------------\n");
1252        out.push_str(&format!(
1253            "  Layer migrations: {}\n",
1254            arch_summary.layer_migrations
1255        ));
1256        out.push_str(&format!(
1257            "  Directories added: {}\n",
1258            arch_summary.directories_added
1259        ));
1260        out.push_str(&format!(
1261            "  Directories removed: {}\n",
1262            arch_summary.directories_removed
1263        ));
1264        out.push_str(&format!(
1265            "  Cycles introduced: {}\n",
1266            arch_summary.cycles_introduced
1267        ));
1268        out.push_str(&format!(
1269            "  Cycles resolved: {}\n",
1270            arch_summary.cycles_resolved
1271        ));
1272        out.push_str(&format!(
1273            "  Stability score: {}\n",
1274            arch_summary.stability_score
1275        ));
1276    }
1277
1278    out
1279}
1280
1281// =============================================================================
1282// Statement-Level Diff (L3) - Zhang-Shasha Tree Edit Distance
1283// =============================================================================
1284
1285/// Statement node kinds per language for tree extraction.
1286fn get_statement_node_kinds(lang: Language) -> &'static [&'static str] {
1287    match lang {
1288        Language::Python => &[
1289            "return_statement",
1290            "if_statement",
1291            "for_statement",
1292            "while_statement",
1293            "expression_statement",
1294            "assert_statement",
1295            "raise_statement",
1296            "try_statement",
1297            "with_statement",
1298            "assignment",
1299            "augmented_assignment",
1300            "delete_statement",
1301            "pass_statement",
1302            "break_statement",
1303            "continue_statement",
1304        ],
1305        Language::TypeScript | Language::JavaScript => &[
1306            "return_statement",
1307            "if_statement",
1308            "for_statement",
1309            "for_in_statement",
1310            "while_statement",
1311            "do_statement",
1312            "expression_statement",
1313            "variable_declaration",
1314            "lexical_declaration",
1315            "throw_statement",
1316            "try_statement",
1317            "switch_statement",
1318            "break_statement",
1319            "continue_statement",
1320        ],
1321        Language::Go => &[
1322            "return_statement",
1323            "if_statement",
1324            "for_statement",
1325            "expression_statement",
1326            "short_var_declaration",
1327            "var_declaration",
1328            "assignment_statement",
1329            "go_statement",
1330            "defer_statement",
1331            "select_statement",
1332            "switch_statement",
1333        ],
1334        Language::Rust => &[
1335            "let_declaration",
1336            "expression_statement",
1337            "return_expression",
1338            "if_expression",
1339            "for_expression",
1340            "while_expression",
1341            "loop_expression",
1342            "match_expression",
1343        ],
1344        Language::Java => &[
1345            "return_statement",
1346            "if_statement",
1347            "for_statement",
1348            "enhanced_for_statement",
1349            "while_statement",
1350            "do_statement",
1351            "expression_statement",
1352            "local_variable_declaration",
1353            "throw_statement",
1354            "try_statement",
1355            "switch_expression",
1356        ],
1357        Language::C | Language::Cpp => &[
1358            "return_statement",
1359            "if_statement",
1360            "for_statement",
1361            "while_statement",
1362            "do_statement",
1363            "expression_statement",
1364            "declaration",
1365            "switch_statement",
1366        ],
1367        Language::Ruby => &[
1368            "return",
1369            "if",
1370            "unless",
1371            "for",
1372            "while",
1373            "until",
1374            "assignment",
1375            "call",
1376            "begin",
1377        ],
1378        Language::Php => &[
1379            "return_statement",
1380            "if_statement",
1381            "for_statement",
1382            "foreach_statement",
1383            "while_statement",
1384            "expression_statement",
1385            "echo_statement",
1386            "throw_expression",
1387            "try_statement",
1388        ],
1389        Language::CSharp => &[
1390            "return_statement",
1391            "if_statement",
1392            "for_statement",
1393            "foreach_statement",
1394            "while_statement",
1395            "expression_statement",
1396            "local_declaration_statement",
1397            "throw_statement",
1398            "try_statement",
1399        ],
1400        Language::Kotlin => &[
1401            "property_declaration",
1402            "assignment",
1403            "if_expression",
1404            "for_statement",
1405            "while_statement",
1406            "do_while_statement",
1407            "return_expression",
1408            "throw_expression",
1409            "try_expression",
1410        ],
1411        Language::Scala => &[
1412            "val_definition",
1413            "var_definition",
1414            "if_expression",
1415            "for_expression",
1416            "while_expression",
1417            "return_expression",
1418            "throw_expression",
1419            "try_expression",
1420            "call_expression",
1421        ],
1422        Language::Swift => &[
1423            "value_binding_pattern",
1424            "if_statement",
1425            "for_in_statement",
1426            "while_statement",
1427            "return_statement",
1428            "throw_statement",
1429            "guard_statement",
1430            "switch_statement",
1431        ],
1432        Language::Elixir => &["call", "if", "case", "cond"],
1433        Language::Lua | Language::Luau => &[
1434            "return_statement",
1435            "if_statement",
1436            "for_statement",
1437            "while_statement",
1438            "variable_declaration",
1439            "assignment_statement",
1440            "function_call",
1441        ],
1442        Language::Ocaml => &[
1443            "let_binding",
1444            "if_expression",
1445            "match_expression",
1446            "application",
1447        ],
1448    }
1449}
1450
1451/// A labeled tree node for the Zhang-Shasha tree edit distance algorithm.
1452#[derive(Debug, Clone)]
1453struct LabeledTreeNode {
1454    /// Node label: "node_kind:significant_text"
1455    label: String,
1456    /// Children (ordered)
1457    children: Vec<LabeledTreeNode>,
1458    /// Source line number (1-indexed) for mapping back to locations
1459    line: u32,
1460}
1461
1462/// Flattened node in postorder for Zhang-Shasha.
1463#[derive(Debug, Clone)]
1464struct PostorderNode {
1465    label: String,
1466    line: u32,
1467    /// Index of leftmost leaf descendant in the postorder array
1468    leftmost_leaf: usize,
1469}
1470
1471/// Edit operation from Zhang-Shasha.
1472#[derive(Debug, Clone)]
1473enum EditOp {
1474    /// Delete node from tree A (index in postorder of A)
1475    Delete { index_a: usize },
1476    /// Insert node from tree B (index in postorder of B)
1477    Insert { index_b: usize },
1478    /// Relabel (update) node A[i] -> B[j]
1479    Relabel { index_a: usize, index_b: usize },
1480}
1481
1482/// Build a labeled tree from a tree-sitter function body node.
1483///
1484/// Walks the AST and picks out statement-level nodes, building an ordered
1485/// tree where each statement is a node and nested statements (e.g., inside
1486/// if-bodies) become children.
1487fn build_labeled_tree(node: Node, source: &[u8], statement_kinds: &[&str]) -> LabeledTreeNode {
1488    let label = build_node_label(node, source);
1489    let line = node.start_position().row as u32 + 1;
1490
1491    let mut children = Vec::new();
1492    let mut cursor = node.walk();
1493    for child in node.children(&mut cursor) {
1494        if statement_kinds.contains(&child.kind()) {
1495            // This child is a statement node - add it and recurse into its body
1496            children.push(build_labeled_tree(child, source, statement_kinds));
1497        } else {
1498            // Not a statement node - look deeper for nested statements
1499            let nested = collect_nested_statements(child, source, statement_kinds);
1500            children.extend(nested);
1501        }
1502    }
1503
1504    LabeledTreeNode {
1505        label,
1506        children,
1507        line,
1508    }
1509}
1510
1511/// Collect statement nodes from non-statement intermediate nodes.
1512fn collect_nested_statements(
1513    node: Node,
1514    source: &[u8],
1515    statement_kinds: &[&str],
1516) -> Vec<LabeledTreeNode> {
1517    let mut result = Vec::new();
1518    let mut cursor = node.walk();
1519    for child in node.children(&mut cursor) {
1520        if statement_kinds.contains(&child.kind()) {
1521            result.push(build_labeled_tree(child, source, statement_kinds));
1522        } else {
1523            result.extend(collect_nested_statements(child, source, statement_kinds));
1524        }
1525    }
1526    result
1527}
1528
1529/// Build a label string for a tree-sitter node.
1530///
1531/// Format: "node_kind:significant_tokens" where significant tokens
1532/// are identifiers and operators (not whitespace or delimiters).
1533fn build_node_label(node: Node, source: &[u8]) -> String {
1534    let kind = node.kind();
1535    let text = node.utf8_text(source).unwrap_or("");
1536
1537    // Extract significant tokens: identifiers, operators, literals
1538    // We take just the first line for conciseness and strip whitespace
1539    let first_line = text.lines().next().unwrap_or("").trim();
1540
1541    // Truncate to avoid huge labels
1542    let significant = if first_line.len() > 120 {
1543        &first_line[..120]
1544    } else {
1545        first_line
1546    };
1547
1548    format!("{}:{}", kind, significant)
1549}
1550
1551/// Extract statement-level subtree from a function body node.
1552///
1553/// Finds the function body (block node) and builds a labeled tree
1554/// from the statements within it.
1555fn extract_statement_tree(
1556    func_node: Node,
1557    source: &[u8],
1558    lang: Language,
1559    statement_kinds: &[&str],
1560) -> LabeledTreeNode {
1561    // Find the function body node
1562    let body_node = find_function_body(func_node, lang);
1563
1564    match body_node {
1565        Some(body) => {
1566            // Build a root node representing the function body
1567            let mut children = Vec::new();
1568            let mut cursor = body.walk();
1569            for child in body.children(&mut cursor) {
1570                if statement_kinds.contains(&child.kind()) {
1571                    children.push(build_labeled_tree(child, source, statement_kinds));
1572                } else {
1573                    children.extend(collect_nested_statements(child, source, statement_kinds));
1574                }
1575            }
1576
1577            LabeledTreeNode {
1578                label: format!("body:{}", func_node.kind()),
1579                children,
1580                line: body.start_position().row as u32 + 1,
1581            }
1582        }
1583        None => {
1584            // Fallback: treat the entire function node as the body
1585            build_labeled_tree(func_node, source, statement_kinds)
1586        }
1587    }
1588}
1589
1590/// Find the body/block node within a function definition.
1591fn find_function_body(func_node: Node, lang: Language) -> Option<Node> {
1592    // Try common field names
1593    if let Some(body) = func_node.child_by_field_name("body") {
1594        return Some(body);
1595    }
1596    if let Some(body) = func_node.child_by_field_name("block") {
1597        return Some(body);
1598    }
1599
1600    // Language-specific body detection
1601    let body_kinds = match lang {
1602        Language::Python => &["block"][..],
1603        Language::TypeScript | Language::JavaScript => &["statement_block"],
1604        Language::Go => &["block"],
1605        Language::Rust => &["block"],
1606        Language::Java => &["block"],
1607        Language::C | Language::Cpp => &["compound_statement"],
1608        Language::Ruby => &["body_statement"],
1609        Language::Php => &["compound_statement"],
1610        Language::CSharp => &["block"],
1611        Language::Kotlin => &["function_body"],
1612        Language::Scala => &["block", "indented_block"],
1613        Language::Swift => &["function_body"],
1614        Language::Elixir => &["do_block"],
1615        Language::Lua | Language::Luau => &["block"],
1616        Language::Ocaml => &["let_binding"],
1617    };
1618
1619    let mut cursor = func_node.walk();
1620    let found = func_node
1621        .children(&mut cursor)
1622        .find(|&child| body_kinds.contains(&child.kind()));
1623    found
1624}
1625
1626/// Count total nodes in a labeled tree.
1627fn count_tree_nodes(tree: &LabeledTreeNode) -> usize {
1628    1 + tree
1629        .children
1630        .iter()
1631        .map(count_tree_nodes)
1632        .sum::<usize>()
1633}
1634
1635// =============================================================================
1636// Zhang-Shasha Tree Edit Distance
1637// =============================================================================
1638
1639/// Flatten a labeled tree into postorder traversal, computing leftmost leaf descendants.
1640fn flatten_postorder(tree: &LabeledTreeNode) -> Vec<PostorderNode> {
1641    let mut nodes = Vec::new();
1642    flatten_postorder_recursive(tree, &mut nodes);
1643    nodes
1644}
1645
1646fn flatten_postorder_recursive(tree: &LabeledTreeNode, nodes: &mut Vec<PostorderNode>) -> usize {
1647    if tree.children.is_empty() {
1648        // Leaf node: leftmost leaf is itself
1649        let idx = nodes.len();
1650        nodes.push(PostorderNode {
1651            label: tree.label.clone(),
1652            line: tree.line,
1653            leftmost_leaf: idx,
1654        });
1655        return idx;
1656    }
1657
1658    // Process children first (postorder)
1659    let mut first_child_leftmost = usize::MAX;
1660    for (i, child) in tree.children.iter().enumerate() {
1661        let child_leftmost = flatten_postorder_recursive(child, nodes);
1662        if i == 0 {
1663            first_child_leftmost = child_leftmost;
1664        }
1665    }
1666
1667    // Now add this node
1668    nodes.push(PostorderNode {
1669        label: tree.label.clone(),
1670        line: tree.line,
1671        leftmost_leaf: first_child_leftmost,
1672    });
1673
1674    // The leftmost leaf of this node is the leftmost leaf of its first child
1675    first_child_leftmost
1676}
1677
1678/// Compute keyroots from a postorder traversal.
1679///
1680/// A keyroot is a node whose leftmost-leaf is different from its parent's
1681/// leftmost-leaf, OR the root node. In practice, we collect the rightmost
1682/// node at each unique leftmost-leaf value.
1683fn compute_keyroots(nodes: &[PostorderNode]) -> Vec<usize> {
1684    let n = nodes.len();
1685    if n == 0 {
1686        return Vec::new();
1687    }
1688
1689    // For each unique leftmost leaf value, keep the highest index (rightmost occurrence)
1690    let mut lr_map: HashMap<usize, usize> = HashMap::new();
1691    for (i, node) in nodes.iter().enumerate() {
1692        lr_map.insert(node.leftmost_leaf, i);
1693    }
1694
1695    let mut keyroots: Vec<usize> = lr_map.into_values().collect();
1696    keyroots.sort();
1697    keyroots
1698}
1699
1700/// Run the Zhang-Shasha tree edit distance algorithm.
1701///
1702/// Returns the edit operations (edit script).
1703///
1704/// Costs: Delete = 1, Insert = 1, Relabel = 0 (same label) or 1 (different label).
1705fn zhang_shasha(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode]) -> Vec<EditOp> {
1706    let na = nodes_a.len();
1707    let nb = nodes_b.len();
1708
1709    if na == 0 && nb == 0 {
1710        return Vec::new();
1711    }
1712    if na == 0 {
1713        // All inserts
1714        return (0..nb).map(|j| EditOp::Insert { index_b: j }).collect();
1715    }
1716    if nb == 0 {
1717        // All deletes
1718        return (0..na).map(|i| EditOp::Delete { index_a: i }).collect();
1719    }
1720
1721    let keyroots_a = compute_keyroots(nodes_a);
1722    let keyroots_b = compute_keyroots(nodes_b);
1723
1724    // Tree distance matrix (1-indexed, 0 means empty tree)
1725    let mut td = vec![vec![0usize; nb + 1]; na + 1];
1726    // Track operations: 0=relabel/match, 1=delete, 2=insert, 3=tree-match
1727    let mut td_ops = vec![vec![0u8; nb + 1]; na + 1];
1728
1729    for &kr_a in &keyroots_a {
1730        for &kr_b in &keyroots_b {
1731            let la = nodes_a[kr_a].leftmost_leaf;
1732            let lb = nodes_b[kr_b].leftmost_leaf;
1733
1734            let rows = kr_a - la + 2;
1735            let cols = kr_b - lb + 2;
1736            let mut fd = vec![vec![0usize; cols]; rows];
1737
1738            // Base cases
1739            for i in 1..rows {
1740                fd[i][0] = fd[i - 1][0] + 1;
1741            }
1742            for j in 1..cols {
1743                fd[0][j] = fd[0][j - 1] + 1;
1744            }
1745
1746            for i in 1..rows {
1747                for j in 1..cols {
1748                    let idx_a = la + i - 1;
1749                    let idx_b = lb + j - 1;
1750
1751                    let cost_relabel = if nodes_a[idx_a].label == nodes_b[idx_b].label {
1752                        0
1753                    } else {
1754                        1
1755                    };
1756
1757                    if nodes_a[idx_a].leftmost_leaf == la && nodes_b[idx_b].leftmost_leaf == lb {
1758                        let delete = fd[i - 1][j] + 1;
1759                        let insert = fd[i][j - 1] + 1;
1760                        let relabel = fd[i - 1][j - 1] + cost_relabel;
1761
1762                        if relabel <= delete && relabel <= insert {
1763                            fd[i][j] = relabel;
1764                            td[idx_a + 1][idx_b + 1] = relabel;
1765                            td_ops[idx_a + 1][idx_b + 1] = if cost_relabel == 0 { 0 } else { 3 };
1766                        } else if delete <= insert {
1767                            fd[i][j] = delete;
1768                            td[idx_a + 1][idx_b + 1] = delete;
1769                            td_ops[idx_a + 1][idx_b + 1] = 1;
1770                        } else {
1771                            fd[i][j] = insert;
1772                            td[idx_a + 1][idx_b + 1] = insert;
1773                            td_ops[idx_a + 1][idx_b + 1] = 2;
1774                        }
1775                    } else {
1776                        let p = nodes_a[idx_a].leftmost_leaf - la;
1777                        let q = nodes_b[idx_b].leftmost_leaf - lb;
1778
1779                        let delete = fd[i - 1][j] + 1;
1780                        let insert = fd[i][j - 1] + 1;
1781                        let tree_match = fd[p][q] + td[idx_a + 1][idx_b + 1];
1782
1783                        if tree_match <= delete && tree_match <= insert {
1784                            fd[i][j] = tree_match;
1785                        } else if delete <= insert {
1786                            fd[i][j] = delete;
1787                        } else {
1788                            fd[i][j] = insert;
1789                        }
1790                    }
1791                }
1792            }
1793        }
1794    }
1795
1796    // Extract edit script using sequence alignment on postorder nodes
1797    // guided by the tree distance computation
1798    let mut ops = Vec::new();
1799    derive_edit_ops_dp(nodes_a, nodes_b, &mut ops);
1800    ops
1801}
1802
1803/// Derive edit operations using DP on the postorder sequences.
1804///
1805/// This produces the edit script by sequence-aligning the postorder
1806/// traversals, which captures the essential edit operations.
1807fn derive_edit_ops_dp(nodes_a: &[PostorderNode], nodes_b: &[PostorderNode], ops: &mut Vec<EditOp>) {
1808    let na = nodes_a.len();
1809    let nb = nodes_b.len();
1810
1811    let mut dp = vec![vec![0usize; nb + 1]; na + 1];
1812    let mut choice = vec![vec![0u8; nb + 1]; na + 1];
1813
1814    for i in 1..=na {
1815        dp[i][0] = i;
1816        choice[i][0] = 1;
1817    }
1818    for j in 1..=nb {
1819        dp[0][j] = j;
1820        choice[0][j] = 2;
1821    }
1822
1823    for i in 1..=na {
1824        for j in 1..=nb {
1825            let cost = if nodes_a[i - 1].label == nodes_b[j - 1].label {
1826                0
1827            } else {
1828                1
1829            };
1830
1831            let del = dp[i - 1][j] + 1;
1832            let ins = dp[i][j - 1] + 1;
1833            let sub = dp[i - 1][j - 1] + cost;
1834
1835            if sub <= del && sub <= ins {
1836                dp[i][j] = sub;
1837                choice[i][j] = if cost == 0 { 0 } else { 3 };
1838            } else if del <= ins {
1839                dp[i][j] = del;
1840                choice[i][j] = 1;
1841            } else {
1842                dp[i][j] = ins;
1843                choice[i][j] = 2;
1844            }
1845        }
1846    }
1847
1848    // Backtrack
1849    let mut i = na;
1850    let mut j = nb;
1851    let mut rev_ops = Vec::new();
1852
1853    while i > 0 || j > 0 {
1854        if i > 0 && j > 0 && (choice[i][j] == 0 || choice[i][j] == 3) {
1855            if choice[i][j] == 3 {
1856                rev_ops.push(EditOp::Relabel {
1857                    index_a: i - 1,
1858                    index_b: j - 1,
1859                });
1860            }
1861            i -= 1;
1862            j -= 1;
1863        } else if i > 0 && (j == 0 || choice[i][j] == 1) {
1864            rev_ops.push(EditOp::Delete { index_a: i - 1 });
1865            i -= 1;
1866        } else if j > 0 {
1867            rev_ops.push(EditOp::Insert { index_b: j - 1 });
1868            j -= 1;
1869        }
1870    }
1871
1872    rev_ops.reverse();
1873    ops.extend(rev_ops);
1874}
1875
1876/// Convert Zhang-Shasha edit operations into ASTChange records.
1877fn edit_ops_to_ast_changes(
1878    ops: &[EditOp],
1879    nodes_a: &[PostorderNode],
1880    nodes_b: &[PostorderNode],
1881    file_a: &Path,
1882    file_b: &Path,
1883) -> Vec<ASTChange> {
1884    let mut changes = Vec::new();
1885
1886    for op in ops {
1887        match op {
1888            EditOp::Delete { index_a } => {
1889                let node = &nodes_a[*index_a];
1890                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1891                changes.push(ASTChange {
1892                    change_type: ChangeType::Delete,
1893                    node_kind: NodeKind::Statement,
1894                    name: Some(stmt_kind.to_string()),
1895                    old_location: Some(Location::new(file_a.display().to_string(), node.line)),
1896                    new_location: None,
1897                    old_text: Some(node.label.clone()),
1898                    new_text: None,
1899                    similarity: None,
1900                    children: None,
1901                    base_changes: None,
1902                });
1903            }
1904            EditOp::Insert { index_b } => {
1905                let node = &nodes_b[*index_b];
1906                let stmt_kind = node.label.split(':').next().unwrap_or("statement");
1907                changes.push(ASTChange {
1908                    change_type: ChangeType::Insert,
1909                    node_kind: NodeKind::Statement,
1910                    name: Some(stmt_kind.to_string()),
1911                    old_location: None,
1912                    new_location: Some(Location::new(file_b.display().to_string(), node.line)),
1913                    old_text: None,
1914                    new_text: Some(node.label.clone()),
1915                    similarity: None,
1916                    children: None,
1917                    base_changes: None,
1918                });
1919            }
1920            EditOp::Relabel { index_a, index_b } => {
1921                let node_a = &nodes_a[*index_a];
1922                let node_b = &nodes_b[*index_b];
1923                let stmt_kind = node_a.label.split(':').next().unwrap_or("statement");
1924                changes.push(ASTChange {
1925                    change_type: ChangeType::Update,
1926                    node_kind: NodeKind::Statement,
1927                    name: Some(stmt_kind.to_string()),
1928                    old_location: Some(Location::new(file_a.display().to_string(), node_a.line)),
1929                    new_location: Some(Location::new(file_b.display().to_string(), node_b.line)),
1930                    old_text: Some(node_a.label.clone()),
1931                    new_text: Some(node_b.label.clone()),
1932                    similarity: None,
1933                    children: None,
1934                    base_changes: None,
1935                });
1936            }
1937        }
1938    }
1939
1940    changes
1941}
1942
1943/// Maximum number of statements before falling back to L4-style Jaccard.
1944const STATEMENT_FALLBACK_THRESHOLD: usize = 200;
1945
1946impl DiffArgs {
1947    /// L3 Statement-level diff: Zhang-Shasha tree edit distance within matched functions.
1948    ///
1949    /// Algorithm:
1950    /// 1. Parse both files and extract functions (reusing L4 infrastructure)
1951    /// 2. Match functions by name
1952    /// 3. For each matched pair with different bodies:
1953    ///    a. Extract statement subtrees from tree-sitter AST
1954    ///    b. Build labeled trees from statement nodes
1955    ///    c. Run Zhang-Shasha tree edit distance
1956    ///    d. Convert edit script to ASTChange children
1957    /// 4. For unmatched functions: report as function-level Insert/Delete
1958    fn run_statement_level_diff(&self) -> Result<DiffReport> {
1959        // Detect language
1960        let lang = Language::from_path(&self.file_a).ok_or_else(|| {
1961            let ext = self
1962                .file_a
1963                .extension()
1964                .map(|e| e.to_string_lossy().to_string())
1965                .unwrap_or_else(|| "unknown".to_string());
1966            RemainingError::parse_error(&self.file_a, format!("Unsupported language: .{}", ext))
1967        })?;
1968
1969        // Read file contents
1970        let source_a = fs::read_to_string(&self.file_a)?;
1971        let source_b = fs::read_to_string(&self.file_b)?;
1972
1973        // Parse both files
1974        let pool = ParserPool::new();
1975        let tree_a = pool.parse(&source_a, lang).map_err(|e| {
1976            RemainingError::parse_error(&self.file_a, format!("Failed to parse: {}", e))
1977        })?;
1978        let tree_b = pool.parse(&source_b, lang).map_err(|e| {
1979            RemainingError::parse_error(&self.file_b, format!("Failed to parse: {}", e))
1980        })?;
1981
1982        // Extract function nodes (reuse L4 infrastructure)
1983        let funcs_a = extract_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
1984        let funcs_b = extract_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
1985
1986        let statement_kinds = get_statement_node_kinds(lang);
1987
1988        // Build name lookup maps
1989        let map_b: HashMap<&str, (usize, &ExtractedNode)> = funcs_b
1990            .iter()
1991            .enumerate()
1992            .map(|(i, n)| (n.name.as_str(), (i, n)))
1993            .collect();
1994
1995        let mut matched_a: Vec<bool> = vec![false; funcs_a.len()];
1996        let mut matched_b: Vec<bool> = vec![false; funcs_b.len()];
1997        let mut changes = Vec::new();
1998
1999        // Pass 1: Match functions by name and compute statement-level diffs
2000        for (i, func_a) in funcs_a.iter().enumerate() {
2001            if let Some(&(j, func_b)) = map_b.get(func_a.name.as_str()) {
2002                matched_a[i] = true;
2003                matched_b[j] = true;
2004
2005                // Check if bodies differ
2006                if func_a.normalized_body != func_b.normalized_body {
2007                    // Find the function nodes in the parsed trees
2008                    let func_node_a =
2009                        find_function_node_by_line(tree_a.root_node(), func_a.line, lang);
2010                    let func_node_b =
2011                        find_function_node_by_line(tree_b.root_node(), func_b.line, lang);
2012
2013                    let stmt_children = match (func_node_a, func_node_b) {
2014                        (Some(node_a), Some(node_b)) => {
2015                            // Build statement trees
2016                            let tree_a_stmts = extract_statement_tree(
2017                                node_a,
2018                                source_a.as_bytes(),
2019                                lang,
2020                                statement_kinds,
2021                            );
2022                            let tree_b_stmts = extract_statement_tree(
2023                                node_b,
2024                                source_b.as_bytes(),
2025                                lang,
2026                                statement_kinds,
2027                            );
2028
2029                            let count_a = count_tree_nodes(&tree_a_stmts);
2030                            let count_b = count_tree_nodes(&tree_b_stmts);
2031
2032                            // Check fallback threshold
2033                            if count_a > STATEMENT_FALLBACK_THRESHOLD
2034                                || count_b > STATEMENT_FALLBACK_THRESHOLD
2035                            {
2036                                // Fall back to L4-style (no statement children)
2037                                None
2038                            } else {
2039                                // Flatten to postorder and run Zhang-Shasha
2040                                let po_a = flatten_postorder(&tree_a_stmts);
2041                                let po_b = flatten_postorder(&tree_b_stmts);
2042
2043                                let edit_ops = zhang_shasha(&po_a, &po_b);
2044
2045                                if edit_ops.is_empty() {
2046                                    None
2047                                } else {
2048                                    let stmt_changes = edit_ops_to_ast_changes(
2049                                        &edit_ops,
2050                                        &po_a,
2051                                        &po_b,
2052                                        &self.file_a,
2053                                        &self.file_b,
2054                                    );
2055                                    if stmt_changes.is_empty() {
2056                                        None
2057                                    } else {
2058                                        Some(stmt_changes)
2059                                    }
2060                                }
2061                            }
2062                        }
2063                        _ => None,
2064                    };
2065
2066                    changes.push(ASTChange {
2067                        change_type: ChangeType::Update,
2068                        node_kind: func_a.kind,
2069                        name: Some(func_a.name.clone()),
2070                        old_location: Some(Location::with_column(
2071                            self.file_a.display().to_string(),
2072                            func_a.line,
2073                            func_a.column,
2074                        )),
2075                        new_location: Some(Location::with_column(
2076                            self.file_b.display().to_string(),
2077                            func_b.line,
2078                            func_b.column,
2079                        )),
2080                        old_text: Some(func_a.body.clone()),
2081                        new_text: Some(func_b.body.clone()),
2082                        similarity: Some(compute_similarity(
2083                            &func_a.normalized_body,
2084                            &func_b.normalized_body,
2085                        )),
2086                        children: stmt_children,
2087                        base_changes: None,
2088                    });
2089                }
2090            }
2091        }
2092
2093        // Pass 2: Detect renames among unmatched functions
2094        let unmatched_a: Vec<(usize, &ExtractedNode)> = funcs_a
2095            .iter()
2096            .enumerate()
2097            .filter(|(i, _)| !matched_a[*i])
2098            .collect();
2099        let unmatched_b: Vec<(usize, &ExtractedNode)> = funcs_b
2100            .iter()
2101            .enumerate()
2102            .filter(|(i, _)| !matched_b[*i])
2103            .collect();
2104
2105        let mut used_b = vec![false; unmatched_b.len()];
2106
2107        for (_, func_a) in &unmatched_a {
2108            let mut best_match: Option<(usize, f64)> = None;
2109            for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2110                if used_b[j] || func_a.kind != func_b.kind {
2111                    continue;
2112                }
2113                let sim = compute_similarity(&func_a.normalized_body, &func_b.normalized_body);
2114                if sim >= RENAME_SIMILARITY_THRESHOLD
2115                    && (best_match.is_none() || sim > best_match.unwrap().1)
2116                {
2117                    best_match = Some((j, sim));
2118                }
2119            }
2120
2121            if let Some((j, sim)) = best_match {
2122                let (_, func_b) = unmatched_b[j];
2123                used_b[j] = true;
2124                changes.push(ASTChange {
2125                    change_type: ChangeType::Rename,
2126                    node_kind: func_a.kind,
2127                    name: Some(func_a.name.clone()),
2128                    old_location: Some(Location::with_column(
2129                        self.file_a.display().to_string(),
2130                        func_a.line,
2131                        func_a.column,
2132                    )),
2133                    new_location: Some(Location::with_column(
2134                        self.file_b.display().to_string(),
2135                        func_b.line,
2136                        func_b.column,
2137                    )),
2138                    old_text: Some(func_a.name.clone()),
2139                    new_text: Some(func_b.name.clone()),
2140                    similarity: Some(sim),
2141                    children: None,
2142                    base_changes: None,
2143                });
2144            }
2145        }
2146
2147        // Pass 3: Remaining unmatched in A are Deletes
2148        for (_, func_a) in &unmatched_a {
2149            let is_renamed = changes.iter().any(|c| {
2150                c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&func_a.name)
2151            });
2152            if !is_renamed {
2153                changes.push(ASTChange {
2154                    change_type: ChangeType::Delete,
2155                    node_kind: func_a.kind,
2156                    name: Some(func_a.name.clone()),
2157                    old_location: Some(Location::with_column(
2158                        self.file_a.display().to_string(),
2159                        func_a.line,
2160                        func_a.column,
2161                    )),
2162                    new_location: None,
2163                    old_text: None,
2164                    new_text: None,
2165                    similarity: None,
2166                    children: None,
2167                    base_changes: None,
2168                });
2169            }
2170        }
2171
2172        // Pass 4: Remaining unmatched in B are Inserts
2173        for (j, (_, func_b)) in unmatched_b.iter().enumerate() {
2174            if !used_b[j] {
2175                changes.push(ASTChange {
2176                    change_type: ChangeType::Insert,
2177                    node_kind: func_b.kind,
2178                    name: Some(func_b.name.clone()),
2179                    old_location: None,
2180                    new_location: Some(Location::with_column(
2181                        self.file_b.display().to_string(),
2182                        func_b.line,
2183                        func_b.column,
2184                    )),
2185                    old_text: None,
2186                    new_text: None,
2187                    similarity: None,
2188                    children: None,
2189                    base_changes: None,
2190                });
2191            }
2192        }
2193
2194        // Build summary
2195        let mut summary = DiffSummary::default();
2196        for change in &changes {
2197            summary.total_changes += 1;
2198            if change.change_type != ChangeType::Format {
2199                summary.semantic_changes += 1;
2200            }
2201            match change.change_type {
2202                ChangeType::Insert => summary.inserts += 1,
2203                ChangeType::Delete => summary.deletes += 1,
2204                ChangeType::Update => summary.updates += 1,
2205                ChangeType::Move => summary.moves += 1,
2206                ChangeType::Rename => summary.renames += 1,
2207                ChangeType::Format => summary.formats += 1,
2208                ChangeType::Extract => summary.extracts += 1,
2209                ChangeType::Inline => {}
2210            }
2211        }
2212
2213        // Sort changes
2214        changes.sort_by_key(|c| match c.change_type {
2215            ChangeType::Delete => 0,
2216            ChangeType::Rename => 1,
2217            ChangeType::Update => 2,
2218            ChangeType::Move => 3,
2219            ChangeType::Insert => 4,
2220            _ => 5,
2221        });
2222
2223        Ok(DiffReport {
2224            file_a: self.file_a.display().to_string(),
2225            file_b: self.file_b.display().to_string(),
2226            identical: changes.is_empty(),
2227            changes,
2228            summary: Some(summary),
2229            granularity: DiffGranularity::Statement,
2230            file_changes: None,
2231            module_changes: None,
2232            import_graph_summary: None,
2233            arch_changes: None,
2234            arch_summary: None,
2235        })
2236    }
2237}
2238
2239/// Find a function tree-sitter node by its start line number.
2240fn find_function_node_by_line(root: Node, target_line: u32, lang: Language) -> Option<Node> {
2241    let func_kinds = get_function_node_kinds(lang);
2242    find_function_node_recursive(root, target_line, func_kinds)
2243}
2244
2245fn find_function_node_recursive<'a>(
2246    node: Node<'a>,
2247    target_line: u32,
2248    func_kinds: &[&str],
2249) -> Option<Node<'a>> {
2250    let line = node.start_position().row as u32 + 1;
2251
2252    if func_kinds.contains(&node.kind()) && line == target_line {
2253        return Some(node);
2254    }
2255
2256    let mut cursor = node.walk();
2257    for child in node.children(&mut cursor) {
2258        if let Some(found) = find_function_node_recursive(child, target_line, func_kinds) {
2259            return Some(found);
2260        }
2261    }
2262
2263    None
2264}
2265
2266// =============================================================================
2267// Class-Level Diff (L5)
2268// =============================================================================
2269
2270/// Information about a class extracted from AST for class-level diffing.
2271#[derive(Debug, Clone)]
2272struct ClassNode {
2273    /// Class name
2274    name: String,
2275    /// Line number (1-indexed)
2276    line: u32,
2277    /// End line number (1-indexed)
2278    end_line: u32,
2279    /// Column
2280    column: u32,
2281    /// Full source text
2282    body: String,
2283    /// Normalized body for comparison
2284    normalized_body: String,
2285    /// Methods within this class
2286    methods: Vec<ExtractedNode>,
2287    /// Class-level fields (assignments in class body)
2288    fields: Vec<FieldNode>,
2289    /// Base classes
2290    bases: Vec<String>,
2291}
2292
2293/// A class-level field (class variable assignment).
2294#[derive(Debug, Clone)]
2295struct FieldNode {
2296    /// Field name
2297    name: String,
2298    /// Line number
2299    line: u32,
2300    /// Column
2301    column: u32,
2302    /// Full text of the assignment
2303    body: String,
2304    /// Normalized body
2305    normalized_body: String,
2306}
2307
2308/// Run a class-level diff between two files.
2309///
2310/// This is the L5 diff algorithm. It extracts classes from both files,
2311/// matches them by name, and then diffs their members (methods, fields, bases).
2312pub fn run_class_diff(
2313    file_a: &Path,
2314    file_b: &Path,
2315    semantic_only: bool,
2316) -> Result<DiffReport> {
2317    // Validate files exist
2318    if !file_a.exists() {
2319        return Err(RemainingError::file_not_found(file_a).into());
2320    }
2321    if !file_b.exists() {
2322        return Err(RemainingError::file_not_found(file_b).into());
2323    }
2324
2325    // Detect language from file_a extension
2326    let lang = Language::from_path(file_a).ok_or_else(|| {
2327        let ext = file_a
2328            .extension()
2329            .map(|e| e.to_string_lossy().to_string())
2330            .unwrap_or_else(|| "unknown".to_string());
2331        RemainingError::parse_error(file_a, format!("Unsupported language: .{}", ext))
2332    })?;
2333
2334    // Read file contents
2335    let source_a = fs::read_to_string(file_a)?;
2336    let source_b = fs::read_to_string(file_b)?;
2337
2338    // Parse both files
2339    let pool = ParserPool::new();
2340    let tree_a = pool
2341        .parse(&source_a, lang)
2342        .map_err(|e| RemainingError::parse_error(file_a, format!("Failed to parse file: {}", e)))?;
2343    let tree_b = pool
2344        .parse(&source_b, lang)
2345        .map_err(|e| RemainingError::parse_error(file_b, format!("Failed to parse file: {}", e)))?;
2346
2347    // Extract class information from both files
2348    let classes_a = extract_class_nodes(tree_a.root_node(), source_a.as_bytes(), lang);
2349    let classes_b = extract_class_nodes(tree_b.root_node(), source_b.as_bytes(), lang);
2350
2351    // Detect class-level changes
2352    let changes = detect_class_changes(&classes_a, &classes_b, file_a, file_b, semantic_only);
2353
2354    // Build summary
2355    let mut summary = DiffSummary::default();
2356    for change in &changes {
2357        summary.total_changes += 1;
2358        if change.change_type != ChangeType::Format {
2359            summary.semantic_changes += 1;
2360        }
2361        match change.change_type {
2362            ChangeType::Insert => summary.inserts += 1,
2363            ChangeType::Delete => summary.deletes += 1,
2364            ChangeType::Update => summary.updates += 1,
2365            ChangeType::Move => summary.moves += 1,
2366            ChangeType::Rename => summary.renames += 1,
2367            ChangeType::Format => summary.formats += 1,
2368            ChangeType::Extract => summary.extracts += 1,
2369            ChangeType::Inline => {}
2370        }
2371    }
2372
2373    let report = DiffReport {
2374        file_a: file_a.display().to_string(),
2375        file_b: file_b.display().to_string(),
2376        identical: changes.is_empty(),
2377        changes,
2378        summary: Some(summary),
2379        granularity: DiffGranularity::Class,
2380        file_changes: None,
2381        module_changes: None,
2382        import_graph_summary: None,
2383        arch_changes: None,
2384        arch_summary: None,
2385    };
2386
2387    Ok(report)
2388}
2389
2390/// Run class-level diff across two directories, pairing files by relative path.
2391/// Skips files with unsupported language extensions.
2392fn run_class_diff_directory(dir_a: &Path, dir_b: &Path, semantic_only: bool) -> Result<DiffReport> {
2393    let files_a = collect_source_files(dir_a)?;
2394    let files_b = collect_source_files(dir_b)?;
2395
2396    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2397    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
2398
2399    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
2400
2401    let mut all_changes = Vec::new();
2402
2403    for rel_path in all_paths {
2404        match (map_a.get(rel_path), map_b.get(rel_path)) {
2405            (Some(path_a), Some(path_b)) => {
2406                // File exists in both -- run class diff, skip on language error
2407                match run_class_diff(path_a, path_b, semantic_only) {
2408                    Ok(sub_report) => all_changes.extend(sub_report.changes),
2409                    Err(_) => continue, // unsupported language, skip
2410                }
2411            }
2412            (None, Some(_)) | (Some(_), None) => {
2413                // Added or removed file -- skip at class level (L6 handles file-level adds/removes)
2414                continue;
2415            }
2416            (None, None) => unreachable!(),
2417        }
2418    }
2419
2420    let mut summary = DiffSummary::default();
2421    for change in &all_changes {
2422        summary.total_changes += 1;
2423        if change.change_type != ChangeType::Format {
2424            summary.semantic_changes += 1;
2425        }
2426        match change.change_type {
2427            ChangeType::Insert => summary.inserts += 1,
2428            ChangeType::Delete => summary.deletes += 1,
2429            ChangeType::Update => summary.updates += 1,
2430            ChangeType::Move => summary.moves += 1,
2431            ChangeType::Rename => summary.renames += 1,
2432            ChangeType::Format => summary.formats += 1,
2433            ChangeType::Extract => summary.extracts += 1,
2434            ChangeType::Inline => {}
2435        }
2436    }
2437
2438    Ok(DiffReport {
2439        file_a: dir_a.display().to_string(),
2440        file_b: dir_b.display().to_string(),
2441        identical: all_changes.is_empty(),
2442        changes: all_changes,
2443        summary: Some(summary),
2444        granularity: DiffGranularity::Class,
2445        file_changes: None,
2446        module_changes: None,
2447        import_graph_summary: None,
2448        arch_changes: None,
2449        arch_summary: None,
2450    })
2451}
2452
2453/// Extract class nodes with their members from the AST.
2454fn extract_class_nodes(root: Node, source: &[u8], lang: Language) -> Vec<ClassNode> {
2455    let mut classes = Vec::new();
2456    let class_kinds = get_class_node_kinds(lang);
2457    let func_kinds = get_function_node_kinds(lang);
2458    let body_kinds = get_class_body_kinds(lang);
2459
2460    extract_class_nodes_recursive(
2461        root,
2462        source,
2463        &mut classes,
2464        lang,
2465        func_kinds,
2466        class_kinds,
2467        body_kinds,
2468    );
2469
2470    // Go: methods are declared at file level with receiver syntax, not inside the struct.
2471    // Scan root-level method_declaration nodes and associate them with their struct.
2472    if lang == Language::Go {
2473        associate_go_receiver_methods(root, source, lang, &mut classes);
2474    }
2475
2476    classes
2477}
2478
2479/// For Go, scan file-level `method_declaration` nodes, parse the receiver type,
2480/// and associate each method with the matching struct's ClassNode.
2481fn associate_go_receiver_methods(
2482    root: Node,
2483    source: &[u8],
2484    lang: Language,
2485    classes: &mut [ClassNode],
2486) {
2487    let source_str = std::str::from_utf8(source).unwrap_or("");
2488    let mut cursor = root.walk();
2489    for child in root.children(&mut cursor) {
2490        if child.kind() != "method_declaration" {
2491            continue;
2492        }
2493        // Extract receiver type name
2494        let receiver_type = match extract_go_receiver_type(child, source) {
2495            Some(name) => name,
2496            None => continue,
2497        };
2498
2499        // Extract method name and build an ExtractedNode
2500        let method_name = match get_function_name(child, lang, source_str) {
2501            Some(name) => name,
2502            None => continue,
2503        };
2504
2505        let params = child
2506            .child_by_field_name("parameters")
2507            .map(|p| node_text(p, source).to_string())
2508            .unwrap_or_default();
2509
2510        let line = child.start_position().row as u32 + 1;
2511        let end_line = child.end_position().row as u32 + 1;
2512        let column = child.start_position().column as u32;
2513        let body = node_text(child, source).to_string();
2514
2515        let extracted =
2516            ExtractedNode::new(method_name, NodeKind::Method, line, end_line, column, body)
2517                .with_params(params)
2518                .with_method_kind();
2519
2520        // Associate with matching struct
2521        for class in classes.iter_mut() {
2522            if class.name == receiver_type {
2523                class.methods.push(extracted);
2524                break;
2525            }
2526        }
2527    }
2528}
2529
2530/// Extract the receiver type name from a Go method_declaration node.
2531///
2532/// Handles both pointer receivers `(f *Foo)` and value receivers `(f Foo)`.
2533/// Returns the bare type name (e.g., "Foo") without the pointer `*`.
2534fn extract_go_receiver_type(method_node: Node, source: &[u8]) -> Option<String> {
2535    // method_declaration -> receiver: parameter_list -> parameter_declaration -> type
2536    let receiver = method_node.child_by_field_name("receiver")?;
2537    let mut recv_cursor = receiver.walk();
2538    for recv_child in receiver.children(&mut recv_cursor) {
2539        if recv_child.kind() == "parameter_declaration" {
2540            if let Some(type_node) = recv_child.child_by_field_name("type") {
2541                return extract_go_type_identifier(type_node, source);
2542            }
2543        }
2544    }
2545    None
2546}
2547
2548/// Recursively extract the type_identifier from a Go type node,
2549/// handling pointer_type wrappers.
2550fn extract_go_type_identifier(type_node: Node, source: &[u8]) -> Option<String> {
2551    match type_node.kind() {
2552        "type_identifier" => Some(node_text(type_node, source).to_string()),
2553        "pointer_type" => {
2554            // pointer_type has a single named child which is the underlying type
2555            let mut cursor = type_node.walk();
2556            for child in type_node.children(&mut cursor) {
2557                if child.is_named() {
2558                    return extract_go_type_identifier(child, source);
2559                }
2560            }
2561            None
2562        }
2563        _ => None,
2564    }
2565}
2566
2567fn extract_class_nodes_recursive(
2568    node: Node,
2569    source: &[u8],
2570    classes: &mut Vec<ClassNode>,
2571    lang: Language,
2572    func_kinds: &[&str],
2573    class_kinds: &[&str],
2574    body_kinds: &[&str],
2575) {
2576    let kind = node.kind();
2577
2578    if class_kinds.contains(&kind) {
2579        if let Some(class_node) = build_class_node(node, source, lang, func_kinds, body_kinds) {
2580            classes.push(class_node);
2581        }
2582        return; // Don't recurse into class children for nested classes at this level
2583    }
2584
2585    for child in node.children(&mut node.walk()) {
2586        extract_class_nodes_recursive(
2587            child,
2588            source,
2589            classes,
2590            lang,
2591            func_kinds,
2592            class_kinds,
2593            body_kinds,
2594        );
2595    }
2596}
2597
2598/// Build a ClassNode from a tree-sitter class node.
2599fn build_class_node(
2600    node: Node,
2601    source: &[u8],
2602    lang: Language,
2603    func_kinds: &[&str],
2604    body_kinds: &[&str],
2605) -> Option<ClassNode> {
2606    // Get class name
2607    let class_name = node
2608        .child_by_field_name("name")
2609        .map(|n| node_text(n, source).to_string())
2610        .or_else(|| {
2611            // Go: type_declaration has no "name" field; the name is in
2612            // the child type_spec node's "name" field.
2613            if lang == Language::Go && node.kind() == "type_declaration" {
2614                let mut cursor = node.walk();
2615                for child in node.children(&mut cursor) {
2616                    if child.kind() == "type_spec" {
2617                        if let Some(name_node) = child.child_by_field_name("name") {
2618                            return Some(node_text(name_node, source).to_string());
2619                        }
2620                    }
2621                }
2622            }
2623            // Fallback: search for first identifier child
2624            let mut cursor = node.walk();
2625            for child in node.children(&mut cursor) {
2626                if child.kind() == "identifier"
2627                    || child.kind() == "type_identifier"
2628                    || child.kind() == "constant"
2629                {
2630                    return Some(node_text(child, source).to_string());
2631                }
2632            }
2633            None
2634        })?;
2635
2636    if class_name.is_empty() {
2637        return None;
2638    }
2639
2640    let line = node.start_position().row as u32 + 1;
2641    let end_line = node.end_position().row as u32 + 1;
2642    let column = node.start_position().column as u32;
2643    let body = node_text(node, source).to_string();
2644    let normalized_body = normalize_body(&body);
2645
2646    // Extract base classes
2647    let bases = extract_bases(node, source, lang);
2648
2649    // Extract methods and fields from class body
2650    let mut methods = Vec::new();
2651    let mut fields = Vec::new();
2652
2653    for child in node.children(&mut node.walk()) {
2654        if body_kinds.contains(&child.kind()) {
2655            extract_class_members(child, source, lang, func_kinds, &mut methods, &mut fields);
2656        }
2657    }
2658
2659    Some(ClassNode {
2660        name: class_name,
2661        line,
2662        end_line,
2663        column,
2664        body,
2665        normalized_body,
2666        methods,
2667        fields,
2668        bases,
2669    })
2670}
2671
2672/// Extract base classes from a class definition node.
2673fn extract_bases(node: Node, source: &[u8], lang: Language) -> Vec<String> {
2674    let mut bases = Vec::new();
2675
2676    match lang {
2677        Language::Python => {
2678            // Python: class Foo(Base1, Base2):
2679            // Look for argument_list or superclasses
2680            if let Some(superclasses) = node.child_by_field_name("superclasses") {
2681                for child in superclasses.children(&mut superclasses.walk()) {
2682                    let text = node_text(child, source).trim().to_string();
2683                    if !text.is_empty() && text != "(" && text != ")" && text != "," {
2684                        bases.push(text);
2685                    }
2686                }
2687            }
2688        }
2689        _ => {
2690            // For other languages, base extraction would be different
2691            // For now, only Python is fully supported for class-level diff
2692        }
2693    }
2694
2695    bases
2696}
2697
2698/// Extract methods and fields from a class body.
2699fn extract_class_members(
2700    body_node: Node,
2701    source: &[u8],
2702    lang: Language,
2703    func_kinds: &[&str],
2704    methods: &mut Vec<ExtractedNode>,
2705    fields: &mut Vec<FieldNode>,
2706) {
2707    for child in body_node.children(&mut body_node.walk()) {
2708        let kind = child.kind();
2709
2710        // Extract methods
2711        if func_kinds.contains(&kind) {
2712            let source_str = std::str::from_utf8(source).unwrap_or("");
2713            if let Some(func_name) = get_function_name(child, lang, source_str) {
2714                let params = child
2715                    .child_by_field_name("parameters")
2716                    .or_else(|| child.child_by_field_name("formal_parameters"))
2717                    .map(|p| node_text(p, source).to_string())
2718                    .unwrap_or_default();
2719
2720                let line = child.start_position().row as u32 + 1;
2721                let end_line = child.end_position().row as u32 + 1;
2722                let column = child.start_position().column as u32;
2723                let body = node_text(child, source).to_string();
2724
2725                let extracted =
2726                    ExtractedNode::new(func_name, NodeKind::Method, line, end_line, column, body)
2727                        .with_params(params)
2728                        .with_method_kind();
2729
2730                methods.push(extracted);
2731            }
2732        }
2733        // Extract fields (Python: expression_statement with assignment)
2734        else if kind == "expression_statement" {
2735            if let Some(field) = extract_field_from_statement(child, source, lang) {
2736                fields.push(field);
2737            }
2738        }
2739    }
2740}
2741
2742/// Extract a field from a statement node (e.g., `timeout = 30`).
2743fn extract_field_from_statement(node: Node, source: &[u8], _lang: Language) -> Option<FieldNode> {
2744    // Look for assignment in this expression_statement
2745    for child in node.children(&mut node.walk()) {
2746        if child.kind() == "assignment" {
2747            // Get the left side (field name)
2748            if let Some(left) = child.child_by_field_name("left") {
2749                let name = node_text(left, source).trim().to_string();
2750                if !name.is_empty() && !name.contains('.') {
2751                    // Skip `self.x = ...` (those are instance vars, not class fields)
2752                    let line = node.start_position().row as u32 + 1;
2753                    let column = node.start_position().column as u32;
2754                    let body = node_text(node, source).to_string();
2755                    let normalized_body = body.trim().to_string();
2756
2757                    return Some(FieldNode {
2758                        name,
2759                        line,
2760                        column,
2761                        body,
2762                        normalized_body,
2763                    });
2764                }
2765            }
2766        }
2767    }
2768    None
2769}
2770
2771/// Detect changes between two sets of class nodes.
2772fn detect_class_changes(
2773    classes_a: &[ClassNode],
2774    classes_b: &[ClassNode],
2775    file_a: &Path,
2776    file_b: &Path,
2777    _semantic_only: bool,
2778) -> Vec<ASTChange> {
2779    let mut changes = Vec::new();
2780
2781    // Build lookup maps by name
2782    let map_b: HashMap<&str, &ClassNode> = classes_b.iter().map(|c| (c.name.as_str(), c)).collect();
2783
2784    // Track which classes have been matched
2785    let mut matched_a: Vec<bool> = vec![false; classes_a.len()];
2786    let mut matched_b: Vec<bool> = vec![false; classes_b.len()];
2787
2788    // First pass: exact name matches
2789    for (i, class_a) in classes_a.iter().enumerate() {
2790        let _ = class_a.end_line;
2791        let _ = &class_a.body;
2792        let _ = &class_a.normalized_body;
2793        if let Some(&class_b) = map_b.get(class_a.name.as_str()) {
2794            matched_a[i] = true;
2795            if let Some(j) = classes_b.iter().position(|c| c.name == class_a.name) {
2796                matched_b[j] = true;
2797            }
2798
2799            // Diff the matched pair
2800            if let Some(change) = diff_class_pair(class_a, class_b, file_a, file_b) {
2801                changes.push(change);
2802            }
2803        }
2804    }
2805
2806    // Collect unmatched classes
2807    let unmatched_a: Vec<(usize, &ClassNode)> = classes_a
2808        .iter()
2809        .enumerate()
2810        .filter(|(i, _)| !matched_a[*i])
2811        .collect();
2812    let unmatched_b: Vec<(usize, &ClassNode)> = classes_b
2813        .iter()
2814        .enumerate()
2815        .filter(|(i, _)| !matched_b[*i])
2816        .collect();
2817
2818    // Second pass: detect renames (same member signatures, different name)
2819    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
2820
2821    for (_, class_a) in &unmatched_a {
2822        let mut best_match: Option<(usize, f64)> = None;
2823
2824        for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2825            if used_b[j] {
2826                continue;
2827            }
2828
2829            let similarity = compute_class_similarity(class_a, class_b);
2830            if similarity >= RENAME_SIMILARITY_THRESHOLD
2831                && (best_match.is_none() || similarity > best_match.unwrap().1)
2832            {
2833                best_match = Some((j, similarity));
2834            }
2835        }
2836
2837        if let Some((j, similarity)) = best_match {
2838            let (_, class_b) = unmatched_b[j];
2839            used_b[j] = true;
2840
2841            changes.push(ASTChange {
2842                change_type: ChangeType::Rename,
2843                node_kind: NodeKind::Class,
2844                name: Some(class_a.name.clone()),
2845                old_location: Some(Location::with_column(
2846                    file_a.display().to_string(),
2847                    class_a.line,
2848                    class_a.column,
2849                )),
2850                new_location: Some(Location::with_column(
2851                    file_b.display().to_string(),
2852                    class_b.line,
2853                    class_b.column,
2854                )),
2855                old_text: Some(class_a.name.clone()),
2856                new_text: Some(class_b.name.clone()),
2857                similarity: Some(similarity),
2858                children: None,
2859                base_changes: None,
2860            });
2861        }
2862    }
2863
2864    // Remaining unmatched in A are deletes
2865    for (_, class_a) in &unmatched_a {
2866        let is_renamed = changes
2867            .iter()
2868            .any(|c| c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&class_a.name));
2869        if !is_renamed {
2870            changes.push(ASTChange {
2871                change_type: ChangeType::Delete,
2872                node_kind: NodeKind::Class,
2873                name: Some(class_a.name.clone()),
2874                old_location: Some(Location::with_column(
2875                    file_a.display().to_string(),
2876                    class_a.line,
2877                    class_a.column,
2878                )),
2879                new_location: None,
2880                old_text: None,
2881                new_text: None,
2882                similarity: None,
2883                children: None,
2884                base_changes: None,
2885            });
2886        }
2887    }
2888
2889    // Remaining unmatched in B are inserts
2890    for (j, (_, class_b)) in unmatched_b.iter().enumerate() {
2891        if !used_b[j] {
2892            changes.push(ASTChange {
2893                change_type: ChangeType::Insert,
2894                node_kind: NodeKind::Class,
2895                name: Some(class_b.name.clone()),
2896                old_location: None,
2897                new_location: Some(Location::with_column(
2898                    file_b.display().to_string(),
2899                    class_b.line,
2900                    class_b.column,
2901                )),
2902                old_text: None,
2903                new_text: None,
2904                similarity: None,
2905                children: None,
2906                base_changes: None,
2907            });
2908        }
2909    }
2910
2911    // Sort changes: deletes first, then renames, updates, inserts
2912    changes.sort_by_key(|c| match c.change_type {
2913        ChangeType::Delete => 0,
2914        ChangeType::Rename => 1,
2915        ChangeType::Update => 2,
2916        ChangeType::Move => 3,
2917        ChangeType::Insert => 4,
2918        _ => 5,
2919    });
2920
2921    changes
2922}
2923
2924/// Diff two matched classes and produce an ASTChange if they differ.
2925fn diff_class_pair(
2926    class_a: &ClassNode,
2927    class_b: &ClassNode,
2928    file_a: &Path,
2929    file_b: &Path,
2930) -> Option<ASTChange> {
2931    let mut children = Vec::new();
2932    let mut has_changes = false;
2933
2934    // 1. Diff methods
2935    diff_methods(
2936        &class_a.methods,
2937        &class_b.methods,
2938        file_a,
2939        file_b,
2940        &mut children,
2941    );
2942
2943    // 2. Diff fields
2944    diff_fields(
2945        &class_a.fields,
2946        &class_b.fields,
2947        file_a,
2948        file_b,
2949        &mut children,
2950    );
2951
2952    // 3. Diff base classes
2953    let base_changes = diff_bases(&class_a.bases, &class_b.bases);
2954
2955    if !children.is_empty() {
2956        has_changes = true;
2957    }
2958    if base_changes.is_some() {
2959        has_changes = true;
2960    }
2961
2962    if !has_changes {
2963        return None; // Classes are identical
2964    }
2965
2966    Some(ASTChange {
2967        change_type: ChangeType::Update,
2968        node_kind: NodeKind::Class,
2969        name: Some(class_a.name.clone()),
2970        old_location: Some(Location::with_column(
2971            file_a.display().to_string(),
2972            class_a.line,
2973            class_a.column,
2974        )),
2975        new_location: Some(Location::with_column(
2976            file_b.display().to_string(),
2977            class_b.line,
2978            class_b.column,
2979        )),
2980        old_text: None,
2981        new_text: None,
2982        similarity: None,
2983        children: if children.is_empty() {
2984            None
2985        } else {
2986            Some(children)
2987        },
2988        base_changes,
2989    })
2990}
2991
2992/// Diff methods between two matched classes.
2993fn diff_methods(
2994    methods_a: &[ExtractedNode],
2995    methods_b: &[ExtractedNode],
2996    file_a: &Path,
2997    file_b: &Path,
2998    children: &mut Vec<ASTChange>,
2999) {
3000    let map_b: HashMap<&str, &ExtractedNode> =
3001        methods_b.iter().map(|m| (m.name.as_str(), m)).collect();
3002
3003    let mut matched_a: Vec<bool> = vec![false; methods_a.len()];
3004    let mut matched_b: Vec<bool> = vec![false; methods_b.len()];
3005
3006    // Exact name match
3007    for (i, method_a) in methods_a.iter().enumerate() {
3008        if let Some(&method_b) = map_b.get(method_a.name.as_str()) {
3009            matched_a[i] = true;
3010            if let Some(j) = methods_b.iter().position(|m| m.name == method_a.name) {
3011                matched_b[j] = true;
3012            }
3013
3014            // Check if body changed
3015            if method_a.normalized_body != method_b.normalized_body {
3016                children.push(ASTChange {
3017                    change_type: ChangeType::Update,
3018                    node_kind: NodeKind::Method,
3019                    name: Some(method_a.name.clone()),
3020                    old_location: Some(Location::with_column(
3021                        file_a.display().to_string(),
3022                        method_a.line,
3023                        method_a.column,
3024                    )),
3025                    new_location: Some(Location::with_column(
3026                        file_b.display().to_string(),
3027                        method_b.line,
3028                        method_b.column,
3029                    )),
3030                    old_text: None,
3031                    new_text: None,
3032                    similarity: Some(compute_similarity(
3033                        &method_a.normalized_body,
3034                        &method_b.normalized_body,
3035                    )),
3036                    children: None,
3037                    base_changes: None,
3038                });
3039            }
3040        }
3041    }
3042
3043    // Collect unmatched
3044    let unmatched_a: Vec<&ExtractedNode> = methods_a
3045        .iter()
3046        .enumerate()
3047        .filter(|(i, _)| !matched_a[*i])
3048        .map(|(_, m)| m)
3049        .collect();
3050    let unmatched_b: Vec<&ExtractedNode> = methods_b
3051        .iter()
3052        .enumerate()
3053        .filter(|(i, _)| !matched_b[*i])
3054        .map(|(_, m)| m)
3055        .collect();
3056
3057    // Rename detection among unmatched methods
3058    let mut used_b: Vec<bool> = vec![false; unmatched_b.len()];
3059
3060    for method_a in &unmatched_a {
3061        let mut best_match: Option<(usize, f64)> = None;
3062
3063        for (j, method_b) in unmatched_b.iter().enumerate() {
3064            if used_b[j] {
3065                continue;
3066            }
3067            let similarity =
3068                compute_similarity(&method_a.normalized_body, &method_b.normalized_body);
3069            if similarity >= RENAME_SIMILARITY_THRESHOLD
3070                && (best_match.is_none() || similarity > best_match.unwrap().1)
3071            {
3072                best_match = Some((j, similarity));
3073            }
3074        }
3075
3076        if let Some((j, similarity)) = best_match {
3077            let method_b = unmatched_b[j];
3078            used_b[j] = true;
3079
3080            children.push(ASTChange {
3081                change_type: ChangeType::Rename,
3082                node_kind: NodeKind::Method,
3083                name: Some(method_a.name.clone()),
3084                old_location: Some(Location::with_column(
3085                    file_a.display().to_string(),
3086                    method_a.line,
3087                    method_a.column,
3088                )),
3089                new_location: Some(Location::with_column(
3090                    file_b.display().to_string(),
3091                    method_b.line,
3092                    method_b.column,
3093                )),
3094                old_text: Some(method_a.name.clone()),
3095                new_text: Some(method_b.name.clone()),
3096                similarity: Some(similarity),
3097                children: None,
3098                base_changes: None,
3099            });
3100        }
3101    }
3102
3103    // Remaining unmatched in A are deletes
3104    for method_a in &unmatched_a {
3105        let is_renamed = children.iter().any(|c| {
3106            c.change_type == ChangeType::Rename && c.name.as_ref() == Some(&method_a.name)
3107        });
3108        if !is_renamed {
3109            children.push(ASTChange {
3110                change_type: ChangeType::Delete,
3111                node_kind: NodeKind::Method,
3112                name: Some(method_a.name.clone()),
3113                old_location: Some(Location::with_column(
3114                    file_a.display().to_string(),
3115                    method_a.line,
3116                    method_a.column,
3117                )),
3118                new_location: None,
3119                old_text: None,
3120                new_text: None,
3121                similarity: None,
3122                children: None,
3123                base_changes: None,
3124            });
3125        }
3126    }
3127
3128    // Remaining unmatched in B are inserts
3129    for (j, method_b) in unmatched_b.iter().enumerate() {
3130        if !used_b[j] {
3131            children.push(ASTChange {
3132                change_type: ChangeType::Insert,
3133                node_kind: NodeKind::Method,
3134                name: Some(method_b.name.clone()),
3135                old_location: None,
3136                new_location: Some(Location::with_column(
3137                    file_b.display().to_string(),
3138                    method_b.line,
3139                    method_b.column,
3140                )),
3141                old_text: None,
3142                new_text: None,
3143                similarity: None,
3144                children: None,
3145                base_changes: None,
3146            });
3147        }
3148    }
3149}
3150
3151/// Diff fields between two matched classes.
3152fn diff_fields(
3153    fields_a: &[FieldNode],
3154    fields_b: &[FieldNode],
3155    file_a: &Path,
3156    file_b: &Path,
3157    children: &mut Vec<ASTChange>,
3158) {
3159    let map_b: HashMap<&str, &FieldNode> = fields_b.iter().map(|f| (f.name.as_str(), f)).collect();
3160
3161    let mut matched_a: Vec<bool> = vec![false; fields_a.len()];
3162    let mut matched_b: Vec<bool> = vec![false; fields_b.len()];
3163
3164    // Exact name match
3165    for (i, field_a) in fields_a.iter().enumerate() {
3166        if let Some(&field_b) = map_b.get(field_a.name.as_str()) {
3167            matched_a[i] = true;
3168            if let Some(j) = fields_b.iter().position(|f| f.name == field_a.name) {
3169                matched_b[j] = true;
3170            }
3171
3172            // Check if value changed
3173            if field_a.normalized_body != field_b.normalized_body {
3174                children.push(ASTChange {
3175                    change_type: ChangeType::Update,
3176                    node_kind: NodeKind::Field,
3177                    name: Some(field_a.name.clone()),
3178                    old_location: Some(Location::with_column(
3179                        file_a.display().to_string(),
3180                        field_a.line,
3181                        field_a.column,
3182                    )),
3183                    new_location: Some(Location::with_column(
3184                        file_b.display().to_string(),
3185                        field_b.line,
3186                        field_b.column,
3187                    )),
3188                    old_text: Some(field_a.body.trim().to_string()),
3189                    new_text: Some(field_b.body.trim().to_string()),
3190                    similarity: None,
3191                    children: None,
3192                    base_changes: None,
3193                });
3194            }
3195        }
3196    }
3197
3198    // Remaining unmatched in A are deletes
3199    for (i, field_a) in fields_a.iter().enumerate() {
3200        if !matched_a[i] {
3201            children.push(ASTChange {
3202                change_type: ChangeType::Delete,
3203                node_kind: NodeKind::Field,
3204                name: Some(field_a.name.clone()),
3205                old_location: Some(Location::with_column(
3206                    file_a.display().to_string(),
3207                    field_a.line,
3208                    field_a.column,
3209                )),
3210                new_location: None,
3211                old_text: None,
3212                new_text: None,
3213                similarity: None,
3214                children: None,
3215                base_changes: None,
3216            });
3217        }
3218    }
3219
3220    // Remaining unmatched in B are inserts
3221    for (j, field_b) in fields_b.iter().enumerate() {
3222        if !matched_b[j] {
3223            children.push(ASTChange {
3224                change_type: ChangeType::Insert,
3225                node_kind: NodeKind::Field,
3226                name: Some(field_b.name.clone()),
3227                old_location: None,
3228                new_location: Some(Location::with_column(
3229                    file_b.display().to_string(),
3230                    field_b.line,
3231                    field_b.column,
3232                )),
3233                old_text: None,
3234                new_text: None,
3235                similarity: None,
3236                children: None,
3237                base_changes: None,
3238            });
3239        }
3240    }
3241}
3242
3243/// Diff base classes between two matched classes.
3244fn diff_bases(bases_a: &[String], bases_b: &[String]) -> Option<BaseChanges> {
3245    let set_a: std::collections::HashSet<&String> = bases_a.iter().collect();
3246    let set_b: std::collections::HashSet<&String> = bases_b.iter().collect();
3247
3248    let added: Vec<String> = set_b.difference(&set_a).map(|s| (*s).clone()).collect();
3249    let removed: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
3250
3251    if added.is_empty() && removed.is_empty() {
3252        None
3253    } else {
3254        Some(BaseChanges { added, removed })
3255    }
3256}
3257
3258/// Compute similarity between two classes based on their member signatures.
3259fn compute_class_similarity(class_a: &ClassNode, class_b: &ClassNode) -> f64 {
3260    // Collect method names + normalized bodies
3261    let method_sigs_a: std::collections::HashSet<String> = class_a
3262        .methods
3263        .iter()
3264        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3265        .collect();
3266    let method_sigs_b: std::collections::HashSet<String> = class_b
3267        .methods
3268        .iter()
3269        .map(|m| format!("{}:{}", m.name, m.normalized_body))
3270        .collect();
3271
3272    let field_sigs_a: std::collections::HashSet<String> = class_a
3273        .fields
3274        .iter()
3275        .map(|f| f.normalized_body.clone())
3276        .collect();
3277    let field_sigs_b: std::collections::HashSet<String> = class_b
3278        .fields
3279        .iter()
3280        .map(|f| f.normalized_body.clone())
3281        .collect();
3282
3283    // Combined Jaccard similarity
3284    let all_a: std::collections::HashSet<&String> =
3285        method_sigs_a.iter().chain(field_sigs_a.iter()).collect();
3286    let all_b: std::collections::HashSet<&String> =
3287        method_sigs_b.iter().chain(field_sigs_b.iter()).collect();
3288
3289    if all_a.is_empty() && all_b.is_empty() {
3290        // Both empty classes - consider identical
3291        return 1.0;
3292    }
3293
3294    let intersection = all_a.intersection(&all_b).count();
3295    let union = all_a.union(&all_b).count();
3296
3297    if union == 0 {
3298        0.0
3299    } else {
3300        intersection as f64 / union as f64
3301    }
3302}
3303
3304// =============================================================================
3305// L6: File-Level Diff
3306// =============================================================================
3307
3308/// Recognized source file extensions for directory walking.
3309const SOURCE_EXTENSIONS: &[&str] = &[
3310    "py", "rs", "ts", "tsx", "js", "jsx", "go", "java", "c", "h", "cpp", "hpp", "cc", "cxx", "rb",
3311    "php", "cs", "kt", "scala", "swift", "ex", "exs", "lua", "ml", "mli", "luau",
3312];
3313
3314/// Walk a directory and collect source files with their relative paths.
3315fn collect_source_files(root: &Path) -> Result<Vec<(String, PathBuf)>> {
3316    let mut files = Vec::new();
3317    collect_source_files_recursive(root, root, &mut files)?;
3318    files.sort_by(|a, b| a.0.cmp(&b.0));
3319    Ok(files)
3320}
3321
3322fn collect_source_files_recursive(
3323    root: &Path,
3324    current: &Path,
3325    files: &mut Vec<(String, PathBuf)>,
3326) -> Result<()> {
3327    for entry in fs::read_dir(current)? {
3328        let entry = entry?;
3329        let path = entry.path();
3330        if path.is_dir() {
3331            collect_source_files_recursive(root, &path, files)?;
3332        } else if path.is_file() {
3333            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
3334                if SOURCE_EXTENSIONS.contains(&ext) {
3335                    let rel = path
3336                        .strip_prefix(root)
3337                        .unwrap_or(&path)
3338                        .to_string_lossy()
3339                        .replace('\\', "/");
3340                    files.push((rel, path));
3341                }
3342            }
3343        }
3344    }
3345    Ok(())
3346}
3347
3348/// Compute a structural fingerprint for a source file.
3349///
3350/// The fingerprint is a hash of the sorted list of function/class signatures
3351/// extracted via tree-sitter. Two files with the same structural definitions
3352/// (regardless of whitespace/comments) produce the same fingerprint.
3353fn compute_structural_fingerprint(path: &Path) -> Result<(u64, Vec<String>)> {
3354    let lang = match Language::from_path(path) {
3355        Some(l) => l,
3356        None => {
3357            // Fallback: hash the raw content for unsupported languages
3358            let content = fs::read_to_string(path)?;
3359            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3360            content.hash(&mut hasher);
3361            return Ok((hasher.finish(), vec![]));
3362        }
3363    };
3364
3365    let source = fs::read_to_string(path)?;
3366    let pool = ParserPool::new();
3367    let tree = match pool.parse(&source, lang) {
3368        Ok(t) => t,
3369        Err(_) => {
3370            // Parse failure: hash raw content
3371            let mut hasher = std::collections::hash_map::DefaultHasher::new();
3372            source.hash(&mut hasher);
3373            return Ok((hasher.finish(), vec![]));
3374        }
3375    };
3376
3377    let nodes = extract_nodes(tree.root_node(), source.as_bytes(), lang);
3378
3379    // Build sorted list of signatures: "kind:name(params)|body_hash"
3380    // We include a hash of the normalized body so that body-only changes
3381    // (same name/params but different implementation) alter the fingerprint.
3382    let mut signatures: Vec<String> = nodes
3383        .iter()
3384        .map(|n| {
3385            let kind = match n.kind {
3386                NodeKind::Function => "fn",
3387                NodeKind::Class => "class",
3388                NodeKind::Method => "method",
3389                NodeKind::Field => "field",
3390                _ => "other",
3391            };
3392            let sig = if n.params.is_empty() {
3393                format!("{}:{}", kind, n.name)
3394            } else {
3395                format!("{}:{}({})", kind, n.name, n.params)
3396            };
3397            // Append a body hash so body-only changes are detected
3398            let mut body_hasher = std::collections::hash_map::DefaultHasher::new();
3399            n.normalized_body.hash(&mut body_hasher);
3400            format!("{}|{}", sig, body_hasher.finish())
3401        })
3402        .collect();
3403    signatures.sort();
3404
3405    let mut hasher = std::collections::hash_map::DefaultHasher::new();
3406    for sig in &signatures {
3407        sig.hash(&mut hasher);
3408    }
3409    let fingerprint = hasher.finish();
3410
3411    Ok((fingerprint, signatures))
3412}
3413
3414/// Run L6 file-level diff between two directories.
3415fn run_file_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3416    let files_a = collect_source_files(dir_a)?;
3417    let files_b = collect_source_files(dir_b)?;
3418
3419    // Build maps: relative_path -> full_path
3420    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3421    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(rel, p)| (rel.as_str(), p)).collect();
3422
3423    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3424
3425    let mut file_changes = Vec::new();
3426    let mut has_any_change = false;
3427
3428    for rel_path in all_paths {
3429        match (map_a.get(rel_path), map_b.get(rel_path)) {
3430            (Some(path_a), Some(path_b)) => {
3431                // File exists in both directories
3432                let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3433                let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3434
3435                if fp_a == fp_b {
3436                    // Identical structure - skip or include as no-change
3437                    // (tests filter these out anyway)
3438                } else {
3439                    has_any_change = true;
3440                    // Find which signatures differ
3441                    let set_a: HashSet<&String> = sigs_a.iter().collect();
3442                    let set_b: HashSet<&String> = sigs_b.iter().collect();
3443                    let changed: Vec<String> = set_a
3444                        .symmetric_difference(&set_b)
3445                        .map(|s| (*s).clone())
3446                        .collect();
3447
3448                    file_changes.push(FileLevelChange {
3449                        relative_path: rel_path.to_string(),
3450                        change_type: ChangeType::Update,
3451                        old_fingerprint: Some(fp_a),
3452                        new_fingerprint: Some(fp_b),
3453                        signature_changes: if changed.is_empty() {
3454                            None
3455                        } else {
3456                            Some(changed)
3457                        },
3458                    });
3459                }
3460            }
3461            (None, Some(path_b)) => {
3462                // Added file
3463                has_any_change = true;
3464                let (fp_b, _) = compute_structural_fingerprint(path_b)?;
3465                file_changes.push(FileLevelChange {
3466                    relative_path: rel_path.to_string(),
3467                    change_type: ChangeType::Insert,
3468                    old_fingerprint: None,
3469                    new_fingerprint: Some(fp_b),
3470                    signature_changes: None,
3471                });
3472            }
3473            (Some(path_a), None) => {
3474                // Removed file
3475                has_any_change = true;
3476                let (fp_a, _) = compute_structural_fingerprint(path_a)?;
3477                file_changes.push(FileLevelChange {
3478                    relative_path: rel_path.to_string(),
3479                    change_type: ChangeType::Delete,
3480                    old_fingerprint: Some(fp_a),
3481                    new_fingerprint: None,
3482                    signature_changes: None,
3483                });
3484            }
3485            (None, None) => unreachable!(),
3486        }
3487    }
3488
3489    Ok(DiffReport {
3490        file_a: dir_a.display().to_string(),
3491        file_b: dir_b.display().to_string(),
3492        identical: !has_any_change,
3493        changes: Vec::new(),
3494        summary: None,
3495        granularity: DiffGranularity::File,
3496        file_changes: Some(file_changes),
3497        module_changes: None,
3498        import_graph_summary: None,
3499        arch_changes: None,
3500        arch_summary: None,
3501    })
3502}
3503
3504// =============================================================================
3505// L7: Module-Level Diff
3506// =============================================================================
3507
3508/// An import edge used internally during graph building.
3509#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3510struct InternalImportEdge {
3511    source_file: String,
3512    target_module: String,
3513    imported_names: Vec<String>,
3514}
3515
3516/// Parse Python import statements from a file using regex.
3517///
3518/// Recognizes:
3519/// - `from X import Y, Z`
3520/// - `import X`
3521fn parse_python_imports(source: &str, relative_path: &str) -> Vec<InternalImportEdge> {
3522    let mut edges = Vec::new();
3523
3524    // Match "from X import Y, Z"
3525    let from_re = Regex::new(r"(?m)^(?:\s*)from\s+([\w.]+)\s+import\s+(.+)$").unwrap();
3526    for cap in from_re.captures_iter(source) {
3527        let target = cap[1].to_string();
3528        let names_str = &cap[2];
3529        let names: Vec<String> = names_str
3530            .split(',')
3531            .map(|n| n.trim().to_string())
3532            .filter(|n| !n.is_empty())
3533            .collect();
3534        edges.push(InternalImportEdge {
3535            source_file: relative_path.to_string(),
3536            target_module: target,
3537            imported_names: names,
3538        });
3539    }
3540
3541    // Match "import X" (but not "from X import Y" which is already handled)
3542    let import_re = Regex::new(r"(?m)^(?:\s*)import\s+([\w.]+)$").unwrap();
3543    for cap in import_re.captures_iter(source) {
3544        let target = cap[1].to_string();
3545        edges.push(InternalImportEdge {
3546            source_file: relative_path.to_string(),
3547            target_module: target,
3548            imported_names: vec![],
3549        });
3550    }
3551
3552    edges
3553}
3554
3555/// Parse imports for a single file using CallGraphLanguageSupport.
3556///
3557/// Returns `Some(edges)` if a handler could parse the file, `None` otherwise.
3558/// On handler parse failure for Python files, falls back to regex parsing.
3559fn parse_file_imports(
3560    registry: &LanguageRegistry,
3561    source: &str,
3562    full_path: &Path,
3563    rel_path: &str,
3564) -> Vec<InternalImportEdge> {
3565    let ext = match full_path.extension().and_then(|e| e.to_str()) {
3566        Some(e) => format!(".{}", e),
3567        None => return Vec::new(),
3568    };
3569
3570    let is_python = ext == ".py" || ext == ".pyi";
3571
3572    // Try the language handler from the registry
3573    if let Some(handler) = registry.get_by_extension(&ext) {
3574        if let Ok(import_defs) = handler.parse_imports(source, full_path) {
3575            return import_defs
3576                .into_iter()
3577                .map(|def| InternalImportEdge {
3578                    source_file: rel_path.to_string(),
3579                    target_module: def.module,
3580                    imported_names: def.names,
3581                })
3582                .collect();
3583        }
3584    }
3585
3586    // Fallback: regex-based parsing for Python files only
3587    if is_python {
3588        return parse_python_imports(source, rel_path);
3589    }
3590
3591    Vec::new()
3592}
3593
3594/// Build import graph for all source files in a directory.
3595///
3596/// Uses `CallGraphLanguageSupport::parse_imports()` from tldr-core for
3597/// multi-language support (Python, TypeScript, Go, Rust, Java, C#, etc.).
3598/// Falls back to regex-based `parse_python_imports()` for Python files
3599/// when the core API fails, and skips import parsing for files whose
3600/// language is unsupported or whose handler returns an error.
3601fn build_import_graph(root: &Path) -> Result<Vec<InternalImportEdge>> {
3602    let files = collect_source_files(root)?;
3603    let registry = LanguageRegistry::with_defaults();
3604    let mut all_edges = Vec::new();
3605
3606    for (rel_path, full_path) in &files {
3607        let source = fs::read_to_string(full_path)?;
3608        let edges = parse_file_imports(&registry, &source, full_path, rel_path);
3609        all_edges.extend(edges);
3610    }
3611
3612    Ok(all_edges)
3613}
3614
3615/// Convert an internal edge to the public ImportEdge type.
3616fn to_public_edge(edge: &InternalImportEdge) -> ImportEdge {
3617    ImportEdge {
3618        source_file: edge.source_file.clone(),
3619        target_module: edge.target_module.clone(),
3620        imported_names: edge.imported_names.clone(),
3621    }
3622}
3623
3624/// Create a comparable key for an import edge (for set operations).
3625fn edge_key(edge: &InternalImportEdge) -> String {
3626    format!(
3627        "{}->{}:{}",
3628        edge.source_file,
3629        edge.target_module,
3630        edge.imported_names.join(",")
3631    )
3632}
3633
3634/// Run L7 module-level diff between two directories.
3635fn run_module_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3636    // Build import graphs
3637    let edges_a = build_import_graph(dir_a)?;
3638    let edges_b = build_import_graph(dir_b)?;
3639
3640    // Build edge key sets for comparison
3641    let keys_a: HashSet<String> = edges_a.iter().map(edge_key).collect();
3642    let keys_b: HashSet<String> = edges_b.iter().map(edge_key).collect();
3643
3644    // Edges added (in B but not in A)
3645    let added_keys: HashSet<&String> = keys_b.difference(&keys_a).collect();
3646    let removed_keys: HashSet<&String> = keys_a.difference(&keys_b).collect();
3647
3648    // Get added/removed edges
3649    let added_edges: Vec<&InternalImportEdge> = edges_b
3650        .iter()
3651        .filter(|e| added_keys.contains(&edge_key(e)))
3652        .collect();
3653    let removed_edges: Vec<&InternalImportEdge> = edges_a
3654        .iter()
3655        .filter(|e| removed_keys.contains(&edge_key(e)))
3656        .collect();
3657
3658    // Also run L6 file-level diff for context
3659    let files_a = collect_source_files(dir_a)?;
3660    let files_b = collect_source_files(dir_b)?;
3661    let map_a: HashMap<&str, &PathBuf> = files_a.iter().map(|(r, p)| (r.as_str(), p)).collect();
3662    let map_b: HashMap<&str, &PathBuf> = files_b.iter().map(|(r, p)| (r.as_str(), p)).collect();
3663    let all_paths: BTreeSet<&str> = map_a.keys().chain(map_b.keys()).copied().collect();
3664
3665    // Build per-module changes
3666    let mut module_changes: Vec<ModuleLevelChange> = Vec::new();
3667    let mut modules_with_import_changes = 0usize;
3668
3669    for rel_path in &all_paths {
3670        let in_a = map_a.contains_key(rel_path);
3671        let in_b = map_b.contains_key(rel_path);
3672
3673        // Determine module change type
3674        let change_type = if !in_a && in_b {
3675            ChangeType::Insert
3676        } else if in_a && !in_b {
3677            ChangeType::Delete
3678        } else {
3679            ChangeType::Update
3680        };
3681
3682        // Gather import changes for this module
3683        let mod_added: Vec<ImportEdge> = added_edges
3684            .iter()
3685            .filter(|e| e.source_file == *rel_path)
3686            .map(|e| to_public_edge(e))
3687            .collect();
3688        let mod_removed: Vec<ImportEdge> = removed_edges
3689            .iter()
3690            .filter(|e| e.source_file == *rel_path)
3691            .map(|e| to_public_edge(e))
3692            .collect();
3693
3694        // Compute file-level change if both exist
3695        let file_change = if in_a && in_b {
3696            let path_a = map_a[rel_path];
3697            let path_b = map_b[rel_path];
3698            let (fp_a, sigs_a) = compute_structural_fingerprint(path_a)?;
3699            let (fp_b, sigs_b) = compute_structural_fingerprint(path_b)?;
3700            if fp_a != fp_b {
3701                let set_a: HashSet<&String> = sigs_a.iter().collect();
3702                let set_b: HashSet<&String> = sigs_b.iter().collect();
3703                let changed: Vec<String> = set_a
3704                    .symmetric_difference(&set_b)
3705                    .map(|s| (*s).clone())
3706                    .collect();
3707                Some(FileLevelChange {
3708                    relative_path: rel_path.to_string(),
3709                    change_type: ChangeType::Update,
3710                    old_fingerprint: Some(fp_a),
3711                    new_fingerprint: Some(fp_b),
3712                    signature_changes: if changed.is_empty() {
3713                        None
3714                    } else {
3715                        Some(changed)
3716                    },
3717                })
3718            } else {
3719                None
3720            }
3721        } else {
3722            None
3723        };
3724
3725        // Only include modules with actual changes
3726        let has_import_changes = !mod_added.is_empty() || !mod_removed.is_empty();
3727        let has_file_change = file_change.is_some();
3728        let is_new_or_deleted =
3729            change_type == ChangeType::Insert || change_type == ChangeType::Delete;
3730
3731        if has_import_changes || has_file_change || is_new_or_deleted {
3732            if has_import_changes {
3733                modules_with_import_changes += 1;
3734            }
3735
3736            // For new modules, all their imports count as added
3737            let final_added = if change_type == ChangeType::Insert && mod_added.is_empty() {
3738                // Gather all imports for this new file
3739                edges_b
3740                    .iter()
3741                    .filter(|e| e.source_file == *rel_path)
3742                    .map(to_public_edge)
3743                    .collect()
3744            } else {
3745                mod_added
3746            };
3747            // For deleted modules, all their imports count as removed
3748            let final_removed = if change_type == ChangeType::Delete && mod_removed.is_empty() {
3749                edges_a
3750                    .iter()
3751                    .filter(|e| e.source_file == *rel_path)
3752                    .map(to_public_edge)
3753                    .collect()
3754            } else {
3755                mod_removed
3756            };
3757
3758            // Recheck after expanding
3759            let has_expanded_imports = !final_added.is_empty() || !final_removed.is_empty();
3760            if has_expanded_imports && !has_import_changes {
3761                modules_with_import_changes += 1;
3762            }
3763
3764            module_changes.push(ModuleLevelChange {
3765                module_path: rel_path.to_string(),
3766                change_type,
3767                imports_added: final_added,
3768                imports_removed: final_removed,
3769                file_change,
3770            });
3771        }
3772    }
3773
3774    let summary = ImportGraphSummary {
3775        total_edges_a: edges_a.len(),
3776        total_edges_b: edges_b.len(),
3777        edges_added: added_keys.len(),
3778        edges_removed: removed_keys.len(),
3779        modules_with_import_changes,
3780    };
3781
3782    let identical = module_changes.is_empty() && added_keys.is_empty() && removed_keys.is_empty();
3783
3784    Ok(DiffReport {
3785        file_a: dir_a.display().to_string(),
3786        file_b: dir_b.display().to_string(),
3787        identical,
3788        changes: Vec::new(),
3789        summary: None,
3790        granularity: DiffGranularity::Module,
3791        file_changes: None,
3792        module_changes: Some(module_changes),
3793        import_graph_summary: Some(summary),
3794        arch_changes: None,
3795        arch_summary: None,
3796    })
3797}
3798
3799// =============================================================================
3800// L8: Architecture-Level Diff
3801// =============================================================================
3802
3803/// Classify a directory name into an architectural layer.
3804fn classify_directory_layer(dir_name: &str) -> String {
3805    let lower = dir_name.to_lowercase();
3806    match lower.as_str() {
3807        "api" | "routes" | "handlers" | "endpoints" | "views" | "controllers" => "api".to_string(),
3808        "core" | "models" | "domain" | "entities" => "core".to_string(),
3809        "utils" | "helpers" | "lib" | "common" | "shared" => "utility".to_string(),
3810        "middleware" | "interceptors" | "filters" => "middleware".to_string(),
3811        "services" | "service" => "service".to_string(),
3812        "tests" | "test" | "spec" | "specs" => "test".to_string(),
3813        "config" | "settings" | "conf" => "config".to_string(),
3814        "db" | "database" | "migrations" | "repositories" | "repo" => "data".to_string(),
3815        _ => "other".to_string(),
3816    }
3817}
3818
3819/// Classify a directory using import-based fan-in/fan-out analysis.
3820///
3821/// For directories whose name doesn't match a known pattern ("other"),
3822/// we use the import graph to infer the architectural role:
3823/// - High fan-out + low fan-in  -> "entry" (entry points that depend on many modules)
3824/// - Low fan-out  + high fan-in -> "utility" (leaf modules imported by many)
3825/// - Balanced                   -> "service" (intermediate layer)
3826fn classify_by_import_flow(
3827    dir_name: &str,
3828    edges: &[InternalImportEdge],
3829    all_dirs: &HashSet<String>,
3830) -> String {
3831    // Count fan-out: how many distinct external directories does this dir import from?
3832    let fan_out: usize = edges
3833        .iter()
3834        .filter(|e| {
3835            e.source_file
3836                .split('/')
3837                .next()
3838                .map(|d| d == dir_name)
3839                .unwrap_or(false)
3840        })
3841        .filter(|e| {
3842            // Target module references a different top-level directory
3843            let target_first = e
3844                .target_module
3845                .split('/')
3846                .next()
3847                .or_else(|| e.target_module.split('.').next())
3848                .unwrap_or("");
3849            all_dirs.contains(target_first) && target_first != dir_name
3850        })
3851        .map(|e| e.target_module.clone())
3852        .collect::<HashSet<_>>()
3853        .len();
3854
3855    // Count fan-in: how many edges from OTHER directories target files in this dir?
3856    let fan_in: usize = edges
3857        .iter()
3858        .filter(|e| {
3859            let source_dir = e.source_file.split('/').next().unwrap_or("");
3860            source_dir != dir_name
3861        })
3862        .filter(|e| {
3863            let target_first = e
3864                .target_module
3865                .split('/')
3866                .next()
3867                .or_else(|| e.target_module.split('.').next())
3868                .unwrap_or("");
3869            target_first == dir_name
3870        })
3871        .count();
3872
3873    if fan_in == 0 && fan_out == 0 {
3874        return "other".to_string();
3875    }
3876
3877    // Classify based on ratio
3878    if fan_out > 0 && fan_in == 0 {
3879        "entry".to_string()
3880    } else if fan_in > fan_out * 2 {
3881        "utility".to_string()
3882    } else if fan_out > fan_in * 2 {
3883        "entry".to_string()
3884    } else {
3885        "service".to_string()
3886    }
3887}
3888
3889/// Collect top-level directories containing source files, classifying each
3890/// into an architectural layer.
3891///
3892/// Uses two-pass classification:
3893/// 1. Name-based heuristic (e.g., "api/" -> api, "utils/" -> utility)
3894/// 2. Import-based fan-in/fan-out analysis for "other" directories
3895fn collect_arch_directories(root: &Path) -> Result<HashMap<String, String>> {
3896    let mut dirs: HashMap<String, String> = HashMap::new();
3897    let files = collect_source_files(root)?;
3898
3899    // Pass 1: classify by name
3900    for (rel_path, _) in &files {
3901        if let Some(first_dir) = rel_path.split('/').next() {
3902            if rel_path.contains('/') && !dirs.contains_key(first_dir) {
3903                let layer = classify_directory_layer(first_dir);
3904                dirs.insert(first_dir.to_string(), layer);
3905            }
3906        }
3907    }
3908
3909    // Pass 2: for directories classified as "other", try import-based classification
3910    let other_dirs: Vec<String> = dirs
3911        .iter()
3912        .filter(|(_, layer)| *layer == "other")
3913        .map(|(name, _)| name.clone())
3914        .collect();
3915
3916    if !other_dirs.is_empty() {
3917        // Build import graph to analyze import flow
3918        if let Ok(edges) = build_import_graph(root) {
3919            let all_dir_names: HashSet<String> = dirs.keys().cloned().collect();
3920            for dir_name in &other_dirs {
3921                let inferred = classify_by_import_flow(dir_name, &edges, &all_dir_names);
3922                if inferred != "other" {
3923                    dirs.insert(dir_name.clone(), inferred);
3924                }
3925            }
3926        }
3927    }
3928
3929    Ok(dirs)
3930}
3931
3932/// Run L8 architecture-level diff between two directories.
3933fn run_arch_level_diff(dir_a: &Path, dir_b: &Path) -> Result<DiffReport> {
3934    let dirs_a = collect_arch_directories(dir_a)?;
3935    let dirs_b = collect_arch_directories(dir_b)?;
3936
3937    let all_dirs: BTreeSet<&str> = dirs_a
3938        .keys()
3939        .chain(dirs_b.keys())
3940        .map(|s| s.as_str())
3941        .collect();
3942
3943    let mut arch_changes: Vec<ArchLevelChange> = Vec::new();
3944    let mut directories_added = 0usize;
3945    let mut directories_removed = 0usize;
3946    let mut layer_migrations = 0usize;
3947    let mut changed_dirs = 0usize;
3948    let total_dirs = all_dirs.len();
3949
3950    for dir_name in &all_dirs {
3951        let in_a = dirs_a.get(*dir_name);
3952        let in_b = dirs_b.get(*dir_name);
3953
3954        match (in_a, in_b) {
3955            (Some(layer_a), Some(layer_b)) => {
3956                if layer_a != layer_b {
3957                    // Layer migration
3958                    changed_dirs += 1;
3959                    layer_migrations += 1;
3960                    arch_changes.push(ArchLevelChange {
3961                        directory: dir_name.to_string(),
3962                        change_type: ArchChangeType::LayerMigration,
3963                        old_layer: Some(layer_a.clone()),
3964                        new_layer: Some(layer_b.clone()),
3965                        migrated_functions: Vec::new(),
3966                    });
3967                }
3968                // Same layer = no change (stable)
3969            }
3970            (None, Some(layer_b)) => {
3971                // Added directory
3972                changed_dirs += 1;
3973                directories_added += 1;
3974                arch_changes.push(ArchLevelChange {
3975                    directory: dir_name.to_string(),
3976                    change_type: ArchChangeType::Added,
3977                    old_layer: None,
3978                    new_layer: Some(layer_b.clone()),
3979                    migrated_functions: Vec::new(),
3980                });
3981            }
3982            (Some(layer_a), None) => {
3983                // Removed directory
3984                changed_dirs += 1;
3985                directories_removed += 1;
3986                arch_changes.push(ArchLevelChange {
3987                    directory: dir_name.to_string(),
3988                    change_type: ArchChangeType::Removed,
3989                    old_layer: Some(layer_a.clone()),
3990                    new_layer: None,
3991                    migrated_functions: Vec::new(),
3992                });
3993            }
3994            (None, None) => unreachable!(),
3995        }
3996    }
3997
3998    let stability_score = if total_dirs == 0 {
3999        1.0
4000    } else {
4001        1.0 - (changed_dirs as f64 / total_dirs as f64)
4002    };
4003
4004    let summary = ArchDiffSummary {
4005        layer_migrations,
4006        directories_added,
4007        directories_removed,
4008        cycles_introduced: 0,
4009        cycles_resolved: 0,
4010        stability_score,
4011    };
4012
4013    let identical = arch_changes.is_empty();
4014
4015    Ok(DiffReport {
4016        file_a: dir_a.display().to_string(),
4017        file_b: dir_b.display().to_string(),
4018        identical,
4019        changes: Vec::new(),
4020        summary: None,
4021        granularity: DiffGranularity::Architecture,
4022        file_changes: None,
4023        module_changes: None,
4024        import_graph_summary: None,
4025        arch_changes: Some(arch_changes),
4026        arch_summary: Some(summary),
4027    })
4028}
4029
4030// =============================================================================
4031// Tests
4032// =============================================================================
4033
4034#[cfg(test)]
4035mod tests {
4036    use super::*;
4037
4038    const SAMPLE_A: &str = r#"
4039def original_function(x):
4040    return x * 2
4041
4042def renamed_later(a, b):
4043    return a + b
4044
4045def will_be_deleted():
4046    return "goodbye"
4047
4048class OriginalClass:
4049    def method_one(self):
4050        return 1
4051"#;
4052
4053    const SAMPLE_B: &str = r#"
4054def original_function(x):
4055    # Modified implementation
4056    return x * 3
4057
4058def better_name(a, b):
4059    return a + b
4060
4061def new_function():
4062    return "hello"
4063
4064class OriginalClass:
4065    def method_one(self):
4066        return 1
4067
4068    def method_two(self):
4069        return 2
4070"#;
4071
4072    /// Parse Python source for tests using the language-aware ParserPool
4073    fn parse_python(source: &str) -> tree_sitter::Tree {
4074        let pool = ParserPool::new();
4075        pool.parse(source, Language::Python).unwrap()
4076    }
4077
4078    #[test]
4079    fn test_extract_nodes() {
4080        let tree = parse_python(SAMPLE_A);
4081        let nodes = extract_nodes(tree.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4082
4083        // Should find: original_function, renamed_later, will_be_deleted, OriginalClass, method_one
4084        assert!(
4085            nodes.len() >= 5,
4086            "Expected at least 5 nodes, got {}",
4087            nodes.len()
4088        );
4089
4090        let names: Vec<&str> = nodes.iter().map(|n| n.name.as_str()).collect();
4091        assert!(names.contains(&"original_function"));
4092        assert!(names.contains(&"renamed_later"));
4093        assert!(names.contains(&"will_be_deleted"));
4094        assert!(names.contains(&"OriginalClass"));
4095        assert!(names.contains(&"method_one"));
4096    }
4097
4098    #[test]
4099    fn test_detect_update() {
4100        let tree_a = parse_python(SAMPLE_A);
4101        let tree_b = parse_python(SAMPLE_B);
4102
4103        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4104        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4105
4106        let file_a = PathBuf::from("a.py");
4107        let file_b = PathBuf::from("b.py");
4108        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4109
4110        // original_function should be detected as Update
4111        let updates: Vec<_> = changes
4112            .iter()
4113            .filter(|c| c.change_type == ChangeType::Update)
4114            .collect();
4115        assert!(!updates.is_empty(), "Should detect at least one update");
4116        assert!(
4117            updates
4118                .iter()
4119                .any(|c| c.name.as_deref() == Some("original_function")),
4120            "original_function should be marked as updated"
4121        );
4122    }
4123
4124    #[test]
4125    fn test_detect_insert() {
4126        let tree_a = parse_python(SAMPLE_A);
4127        let tree_b = parse_python(SAMPLE_B);
4128
4129        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4130        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4131
4132        let file_a = PathBuf::from("a.py");
4133        let file_b = PathBuf::from("b.py");
4134        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4135
4136        // new_function and method_two should be detected as Insert
4137        let inserts: Vec<_> = changes
4138            .iter()
4139            .filter(|c| c.change_type == ChangeType::Insert)
4140            .collect();
4141        assert!(!inserts.is_empty(), "Should detect insertions");
4142    }
4143
4144    #[test]
4145    fn test_detect_delete() {
4146        let tree_a = parse_python(SAMPLE_A);
4147        let tree_b = parse_python(SAMPLE_B);
4148
4149        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4150        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4151
4152        let file_a = PathBuf::from("a.py");
4153        let file_b = PathBuf::from("b.py");
4154        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4155
4156        // will_be_deleted should be detected as Delete
4157        let deletes: Vec<_> = changes
4158            .iter()
4159            .filter(|c| c.change_type == ChangeType::Delete)
4160            .collect();
4161        assert!(!deletes.is_empty(), "Should detect deletions");
4162        assert!(
4163            deletes
4164                .iter()
4165                .any(|c| c.name.as_deref() == Some("will_be_deleted")),
4166            "will_be_deleted should be marked as deleted"
4167        );
4168    }
4169
4170    #[test]
4171    fn test_detect_rename() {
4172        let tree_a = parse_python(SAMPLE_A);
4173        let tree_b = parse_python(SAMPLE_B);
4174
4175        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4176        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_B.as_bytes(), Language::Python);
4177
4178        let file_a = PathBuf::from("a.py");
4179        let file_b = PathBuf::from("b.py");
4180        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, false);
4181
4182        // renamed_later -> better_name should be detected as Rename
4183        let renames: Vec<_> = changes
4184            .iter()
4185            .filter(|c| c.change_type == ChangeType::Rename)
4186            .collect();
4187        assert!(!renames.is_empty(), "Should detect renames");
4188    }
4189
4190    #[test]
4191    fn test_identical_files() {
4192        let tree_a = parse_python(SAMPLE_A);
4193        let tree_b = parse_python(SAMPLE_A); // Same content
4194
4195        let nodes_a = extract_nodes(tree_a.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4196        let nodes_b = extract_nodes(tree_b.root_node(), SAMPLE_A.as_bytes(), Language::Python);
4197
4198        let file_a = PathBuf::from("a.py");
4199        let file_b = PathBuf::from("b.py");
4200        let changes = detect_changes(&nodes_a, &nodes_b, &file_a, &file_b, true); // semantic_only
4201
4202        assert!(
4203            changes.is_empty(),
4204            "Identical files should have no semantic changes"
4205        );
4206    }
4207
4208    #[test]
4209    fn test_compute_similarity() {
4210        assert_eq!(compute_similarity("abc", "abc"), 1.0);
4211        assert_eq!(compute_similarity("", ""), 1.0); // two empty strings are equal
4212        assert!(compute_similarity("a\nb\nc", "a\nb\nd") >= 0.5); // Jaccard: 2/4 = 0.5
4213    }
4214
4215    #[test]
4216    fn test_normalize_body() {
4217        // Test that normalize_body skips the signature line and strips comments
4218        let body = "def foo():\n    # pure comment line\n    return 1  # inline comment";
4219        let normalized = normalize_body(body);
4220        // Should skip "def foo():" (first line), filter "# pure comment line" (comment-only)
4221        // and strip "# inline comment" from the return line
4222        assert!(!normalized.contains('#'), "Comments should be removed");
4223        assert!(
4224            !normalized.contains("def foo"),
4225            "Signature should be skipped"
4226        );
4227        assert!(normalized.contains("return 1"), "Body should remain");
4228    }
4229
4230    // =========================================================================
4231    // format_diff_text: L6-L8 rendering tests
4232    // =========================================================================
4233
4234    #[test]
4235    fn test_format_diff_text_renders_file_changes() {
4236        let mut report = DiffReport::new("dir_a/", "dir_b/");
4237        report.identical = false;
4238        report.file_changes = Some(vec![
4239            FileLevelChange {
4240                relative_path: "src/main.py".to_string(),
4241                change_type: ChangeType::Update,
4242                old_fingerprint: Some(12345),
4243                new_fingerprint: Some(67890),
4244                signature_changes: Some(vec!["fn foo()".to_string()]),
4245            },
4246            FileLevelChange {
4247                relative_path: "src/new_module.py".to_string(),
4248                change_type: ChangeType::Insert,
4249                old_fingerprint: None,
4250                new_fingerprint: Some(11111),
4251                signature_changes: None,
4252            },
4253            FileLevelChange {
4254                relative_path: "src/removed.py".to_string(),
4255                change_type: ChangeType::Delete,
4256                old_fingerprint: Some(99999),
4257                new_fingerprint: None,
4258                signature_changes: None,
4259            },
4260        ]);
4261
4262        let text = format_diff_text(&report);
4263        assert!(
4264            text.contains("File-Level Changes"),
4265            "Should have file-level section header"
4266        );
4267        assert!(text.contains("src/main.py"), "Should mention updated file");
4268        assert!(
4269            text.contains("src/new_module.py"),
4270            "Should mention added file"
4271        );
4272        assert!(
4273            text.contains("src/removed.py"),
4274            "Should mention removed file"
4275        );
4276    }
4277
4278    #[test]
4279    fn test_format_diff_text_renders_module_changes() {
4280        let mut report = DiffReport::new("dir_a/", "dir_b/");
4281        report.identical = false;
4282        report.module_changes = Some(vec![ModuleLevelChange {
4283            module_path: "src/utils.py".to_string(),
4284            change_type: ChangeType::Update,
4285            imports_added: vec![ImportEdge {
4286                source_file: "src/utils.py".to_string(),
4287                target_module: "os.path".to_string(),
4288                imported_names: vec!["join".to_string()],
4289            }],
4290            imports_removed: vec![],
4291            file_change: None,
4292        }]);
4293
4294        let text = format_diff_text(&report);
4295        assert!(
4296            text.contains("Module-Level Changes"),
4297            "Should have module-level section header"
4298        );
4299        assert!(
4300            text.contains("src/utils.py"),
4301            "Should mention the module path"
4302        );
4303        assert!(
4304            text.contains("os.path"),
4305            "Should mention added import target"
4306        );
4307    }
4308
4309    #[test]
4310    fn test_format_diff_text_renders_import_graph_summary() {
4311        let mut report = DiffReport::new("dir_a/", "dir_b/");
4312        report.identical = false;
4313        report.import_graph_summary = Some(ImportGraphSummary {
4314            total_edges_a: 10,
4315            total_edges_b: 15,
4316            edges_added: 7,
4317            edges_removed: 2,
4318            modules_with_import_changes: 3,
4319        });
4320
4321        let text = format_diff_text(&report);
4322        assert!(
4323            text.contains("Import Graph"),
4324            "Should have import graph section"
4325        );
4326        assert!(text.contains("7"), "Should show edges added");
4327        assert!(text.contains("2"), "Should show edges removed");
4328    }
4329
4330    #[test]
4331    fn test_format_diff_text_renders_arch_changes() {
4332        let mut report = DiffReport::new("dir_a/", "dir_b/");
4333        report.identical = false;
4334        report.arch_changes = Some(vec![
4335            ArchLevelChange {
4336                directory: "src/api/".to_string(),
4337                change_type: ArchChangeType::LayerMigration,
4338                old_layer: Some("presentation".to_string()),
4339                new_layer: Some("business".to_string()),
4340                migrated_functions: vec!["handle_request".to_string()],
4341            },
4342            ArchLevelChange {
4343                directory: "src/new_service/".to_string(),
4344                change_type: ArchChangeType::Added,
4345                old_layer: None,
4346                new_layer: Some("service".to_string()),
4347                migrated_functions: vec![],
4348            },
4349        ]);
4350
4351        let text = format_diff_text(&report);
4352        assert!(
4353            text.contains("Architecture-Level Changes"),
4354            "Should have arch section header"
4355        );
4356        assert!(
4357            text.contains("src/api/"),
4358            "Should mention migrated directory"
4359        );
4360        assert!(text.contains("presentation"), "Should show old layer");
4361        assert!(text.contains("business"), "Should show new layer");
4362        assert!(
4363            text.contains("src/new_service/"),
4364            "Should mention added directory"
4365        );
4366    }
4367
4368    #[test]
4369    fn test_format_diff_text_renders_arch_summary() {
4370        let mut report = DiffReport::new("dir_a/", "dir_b/");
4371        report.identical = false;
4372        report.arch_summary = Some(ArchDiffSummary {
4373            layer_migrations: 2,
4374            directories_added: 1,
4375            directories_removed: 0,
4376            cycles_introduced: 1,
4377            cycles_resolved: 0,
4378            stability_score: 0.75,
4379        });
4380
4381        let text = format_diff_text(&report);
4382        assert!(
4383            text.contains("Architecture Summary"),
4384            "Should have arch summary section"
4385        );
4386        assert!(text.contains("0.75"), "Should show stability score");
4387    }
4388
4389    #[test]
4390    fn test_format_diff_text_identical_skips_higher_levels() {
4391        // When identical, format_diff_text returns early, so even if higher-level
4392        // fields were somehow set, they should not appear.
4393        let mut report = DiffReport::new("a.py", "b.py");
4394        report.identical = true;
4395        report.file_changes = Some(vec![FileLevelChange {
4396            relative_path: "should_not_appear.py".to_string(),
4397            change_type: ChangeType::Insert,
4398            old_fingerprint: None,
4399            new_fingerprint: Some(1),
4400            signature_changes: None,
4401        }]);
4402
4403        let text = format_diff_text(&report);
4404        assert!(
4405            !text.contains("should_not_appear"),
4406            "Identical report should skip all change sections"
4407        );
4408        assert!(
4409            text.contains("No structural changes"),
4410            "Should show identical message"
4411        );
4412    }
4413}