tldr_cli/commands/bugbot/l2/engines/
tldr_differential.rs

1//! TldrDifferentialEngine -- L2 engine that invokes the `tldr` CLI binary.
2//!
3//! Replaces the bespoke DeltaEngine by running `tldr` subcommands (complexity,
4//! cognitive, contracts, smells, calls, deps, coupling, cohesion, dead) on
5//! baseline and current file revisions, diffing the JSON outputs to detect
6//! regressions.
7//!
8//! # Finding Types
9//!
10//! | ID | Finding Type | Category | Source command |
11//! |----|-------------|----------|---------------|
12//! | 1 | `complexity-increase` | LOCAL | `tldr complexity` |
13//! | 2 | `cognitive-increase` | LOCAL | `tldr cognitive` |
14//! | 3 | `contract-removed` | LOCAL | `tldr contracts` |
15//! | 4 | `smell-introduced` | LOCAL | `tldr smells` |
16//! | 5 | `call-graph-change` | FLOW | `tldr calls` |
17//! | 6 | `dependency-change` | FLOW | derived from `tldr calls` |
18//! | 7 | `coupling-increase` | FLOW | `tldr coupling` |
19//! | 8 | `cohesion-decrease` | FLOW | `tldr cohesion` |
20//! | 9 | `dead-code-introduced` | FLOW | `tldr dead` |
21//! | 10 | `downstream-impact` | IMPACT | derived from `tldr calls` |
22//! | 11 | `breaking-change-risk` | IMPACT | derived from `tldr calls` |
23//!
24//! # Architecture
25//!
26//! For LOCAL commands: writes baseline/current source to temp files, runs
27//! `tldr <command> <tmpfile> --format json`, parses JSON, diffs metrics per
28//! function, and emits findings for regressions.
29//!
30//! For FLOW commands: `tldr calls` is run once for the current project by the
31//! `analyze()` entry point, and the resulting JSON is cached and passed to
32//! `analyze_flow_commands`, `analyze_downstream_impact`, and
33//! `analyze_function_impact`. The deps, downstream-impact, and
34//! breaking-change-risk findings are all derived in-memory from the cached
35//! call graph, eliminating separate `tldr deps`, `tldr whatbreaks`, and
36//! redundant `tldr calls` subprocess calls. Only baseline `tldr calls`,
37//! baseline/current `tldr cohesion`, and `tldr dead` still require
38//! subprocess execution. The `dead` command uses count-only analysis
39//! (no baseline worktree needed).
40
41use std::collections::hash_map::DefaultHasher;
42use std::collections::{BTreeMap, BTreeSet, HashMap};
43use std::hash::{Hash, Hasher};
44use std::path::{Path, PathBuf};
45use std::process::Command;
46use std::time::{Duration, Instant};
47
48use tempfile::TempDir;
49
50use super::super::context::L2Context;
51use super::super::types::{AnalyzerStatus, L2AnalyzerOutput};
52use super::super::L2Engine;
53use crate::commands::bugbot::dead::is_test_function;
54use crate::commands::bugbot::types::BugbotFinding;
55
56/// Category of a tldr command: LOCAL (per-file) or FLOW (project-wide).
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58enum TldrCategory {
59    /// Per-file command: run on individual temp files.
60    Local,
61    /// Project-wide command: run on the project root directory.
62    Flow,
63}
64
65/// Configuration for a single tldr subcommand.
66#[derive(Debug, Clone)]
67struct TldrCommand {
68    /// Human-readable name (also used in finding_type).
69    name: &'static str,
70    /// CLI arguments passed to `tldr` (e.g., `["complexity"]`).
71    args: &'static [&'static str],
72    /// Whether this command operates per-file or project-wide.
73    category: TldrCategory,
74}
75
76/// All tldr commands that this engine runs.
77const TLDR_COMMANDS: &[TldrCommand] = &[
78    // LOCAL (per-file, parse per-function from output):
79    TldrCommand { name: "complexity", args: &["complexity"], category: TldrCategory::Local },
80    TldrCommand { name: "cognitive", args: &["cognitive"], category: TldrCategory::Local },
81    TldrCommand { name: "contracts", args: &["contracts"], category: TldrCategory::Local },
82    TldrCommand { name: "smells", args: &["smells"], category: TldrCategory::Local },
83    // FLOW (project-wide, run on project root):
84    TldrCommand { name: "calls", args: &["calls"], category: TldrCategory::Flow },
85    TldrCommand { name: "deps", args: &["deps"], category: TldrCategory::Flow },
86    TldrCommand { name: "coupling", args: &["coupling"], category: TldrCategory::Flow },
87    TldrCommand { name: "cohesion", args: &["cohesion"], category: TldrCategory::Flow },
88    TldrCommand { name: "dead", args: &["dead"], category: TldrCategory::Flow },
89];
90
91/// The set of finding types that TldrDifferentialEngine can produce.
92const FINDING_TYPES: &[&str] = &[
93    "complexity-increase",
94    "cognitive-increase",
95    "contract-removed",
96    "smell-introduced",
97    "call-graph-change",
98    "dependency-change",
99    "coupling-increase",
100    "cohesion-decrease",
101    "dead-code-introduced",
102    "downstream-impact",
103    "breaking-change-risk",
104];
105
106/// Maximum bytes of stdout to retain from a tldr subprocess.
107const MAX_OUTPUT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
108
109/// L2 engine that invokes the `tldr` CLI binary for differential analysis.
110///
111/// Runs tldr subcommands on baseline and current file revisions, diffs
112/// the JSON metrics, and produces findings for regressions. The `analyze()`
113/// entry point runs `tldr calls` once for the current project, then passes
114/// the cached call graph JSON to flow, downstream, and function impact
115/// analysis methods. Deps, downstream impact, and breaking-change-risk
116/// findings are derived in-memory from the call graph. Only baseline calls,
117/// cohesion, and dead code analysis require separate subprocess calls.
118/// Uses subprocess execution with configurable timeout.
119pub struct TldrDifferentialEngine {
120    /// Timeout per tldr command in seconds.
121    timeout_secs: u64,
122}
123
124impl TldrDifferentialEngine {
125    /// Create a new TldrDifferentialEngine with the default 30-second timeout.
126    pub fn new() -> Self {
127        Self { timeout_secs: 30 }
128    }
129
130    /// Create a new TldrDifferentialEngine with a custom timeout.
131    pub fn with_timeout(timeout_secs: u64) -> Self {
132        Self { timeout_secs }
133    }
134
135    /// Run a tldr subcommand and parse its JSON output.
136    ///
137    /// Spawns `tldr` with the given arguments as a subprocess, captures
138    /// stdout, and parses as JSON. Returns `Err` on spawn failure, timeout,
139    /// or JSON parse failure. Truncates output to `MAX_OUTPUT_BYTES`.
140    ///
141    /// The caller is responsible for building the full argument list including
142    /// `--format json`.
143    fn run_tldr_command(
144        &self,
145        args: &[&str],
146        target: &Path,
147    ) -> Result<serde_json::Value, String> {
148        let target_str = target.to_string_lossy().to_string();
149        let mut full_args: Vec<String> = args.iter().map(|a| a.to_string()).collect();
150        full_args.push(target_str);
151        full_args.push("--format".to_string());
152        full_args.push("json".to_string());
153        self.run_tldr_raw(&full_args)
154    }
155
156    /// Run a tldr subcommand that requires per-function invocation.
157    ///
158    /// Spawns `tldr <command> <file> <function> --format json`. Used for
159    /// `complexity` and `contracts` which require a function name argument.
160    fn run_tldr_per_function(
161        &self,
162        command: &str,
163        file: &Path,
164        function_name: &str,
165    ) -> Result<serde_json::Value, String> {
166        let file_str = file.to_string_lossy().to_string();
167        let args = vec![
168            command.to_string(),
169            file_str,
170            function_name.to_string(),
171            "--format".to_string(),
172            "json".to_string(),
173        ];
174        self.run_tldr_raw(&args)
175    }
176
177    /// Run a tldr flow command with language filtering and gitignore respect.
178    ///
179    /// Unlike `run_tldr_command`, this method appends `--lang <language>` to
180    /// restrict analysis to the relevant language, and `--respect-ignore` (for
181    /// commands that support it) to skip files matched by `.gitignore`. This
182    /// prevents flow commands from scanning thousands of irrelevant files
183    /// (markdown, test fixtures, corpus data) and timing out.
184    fn run_tldr_flow_command(
185        &self,
186        cmd_name: &str,
187        args: &[&str],
188        target: &Path,
189        language: &str,
190    ) -> Result<serde_json::Value, String> {
191        let target_str = target.to_string_lossy().to_string();
192        let mut full_args: Vec<String> = args.iter().map(|a| a.to_string()).collect();
193        full_args.push(target_str);
194        full_args.push("--lang".to_string());
195        full_args.push(language.to_string());
196        // Only pass --respect-ignore for commands that support it.
197        // Currently only `calls` supports this flag.
198        if cmd_name == "calls" {
199            full_args.push("--respect-ignore".to_string());
200        }
201        full_args.push("--format".to_string());
202        full_args.push("json".to_string());
203        self.run_tldr_raw(&full_args)
204    }
205
206    /// Low-level: spawn `tldr` with the given arguments, capture stdout, parse as JSON.
207    fn run_tldr_raw(
208        &self,
209        args: &[String],
210    ) -> Result<serde_json::Value, String> {
211        let child = Command::new("tldr")
212            .args(args)
213            .stdout(std::process::Stdio::piped())
214            .stderr(std::process::Stdio::piped())
215            .spawn();
216
217        let child = match child {
218            Ok(c) => c,
219            Err(e) => return Err(format!("Failed to spawn 'tldr': {}", e)),
220        };
221
222        // Simple timeout: wait in a thread, kill if exceeded.
223        let timeout = Duration::from_secs(self.timeout_secs);
224        let child_id = child.id();
225        let timed_out = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
226        let timed_out_clone = timed_out.clone();
227
228        let _watchdog = std::thread::spawn(move || {
229            std::thread::sleep(timeout);
230            timed_out_clone.store(true, std::sync::atomic::Ordering::SeqCst);
231            #[cfg(unix)]
232            unsafe {
233                libc::kill(child_id as libc::pid_t, libc::SIGKILL);
234            }
235            #[cfg(windows)]
236            unsafe {
237                let handle = windows_sys::Win32::System::Threading::OpenProcess(
238                    windows_sys::Win32::System::Threading::PROCESS_TERMINATE,
239                    0,
240                    child_id,
241                );
242                if handle != 0 {
243                    windows_sys::Win32::System::Threading::TerminateProcess(handle, 1);
244                    windows_sys::Win32::Foundation::CloseHandle(handle);
245                }
246            }
247        });
248
249        let output = child
250            .wait_with_output()
251            .map_err(|e| format!("Failed to read tldr output: {}", e))?;
252
253        if timed_out.load(std::sync::atomic::Ordering::SeqCst) {
254            return Err(format!("Timeout after {}s", self.timeout_secs));
255        }
256
257        let raw_stdout = String::from_utf8_lossy(&output.stdout).to_string();
258        let stdout = if raw_stdout.len() > MAX_OUTPUT_BYTES {
259            let mut truncated = raw_stdout;
260            truncated.truncate(MAX_OUTPUT_BYTES);
261            if let Some(last_newline) = truncated.rfind('\n') {
262                truncated.truncate(last_newline + 1);
263            }
264            truncated
265        } else {
266            raw_stdout
267        };
268
269        if stdout.trim().is_empty() {
270            return Err(format!(
271                "tldr {} produced empty output (exit code: {:?}, stderr: {})",
272                args.first().map(|s| s.as_str()).unwrap_or("?"),
273                output.status.code(),
274                String::from_utf8_lossy(&output.stderr),
275            ));
276        }
277
278        serde_json::from_str(&stdout)
279            .map_err(|e| format!("Failed to parse tldr JSON: {} (first 200 chars: {:?})", e, &stdout[..stdout.len().min(200)]))
280    }
281
282    /// Run all LOCAL commands on baseline and current temp files for a single changed file.
283    ///
284    /// Commands fall into two categories:
285    /// - **File-level** (`cognitive`, `smells`): accept a file path, return all functions.
286    /// - **Per-function** (`complexity`, `contracts`): require `<FILE> <FUNCTION>`, so we first
287    ///   discover function names via `cognitive` then invoke per-function.
288    fn analyze_local_commands(
289        &self,
290        file_path: &Path,
291        baseline_source: &str,
292        current_source: &str,
293        partial_reasons: &mut Vec<String>,
294    ) -> Vec<BugbotFinding> {
295        let mut findings = Vec::new();
296
297        let ext = file_path
298            .extension()
299            .and_then(|e| e.to_str())
300            .unwrap_or("py");
301
302        // Create temp dir for this file's analysis
303        let tmp_dir = match TempDir::new() {
304            Ok(d) => d,
305            Err(e) => {
306                partial_reasons.push(format!("tmpdir creation failed: {}", e));
307                return findings;
308            }
309        };
310
311        let baseline_file = tmp_dir.path().join(format!("baseline.{}", ext));
312        let current_file = tmp_dir.path().join(format!("current.{}", ext));
313
314        if std::fs::write(&baseline_file, baseline_source).is_err() {
315            partial_reasons.push(format!("write baseline tmpfile failed for {}", file_path.display()));
316            return findings;
317        }
318        if std::fs::write(&current_file, current_source).is_err() {
319            partial_reasons.push(format!("write current tmpfile failed for {}", file_path.display()));
320            return findings;
321        }
322
323        // === File-level commands: cognitive, smells ===
324        // These accept a path and return all functions or smells.
325        for cmd_name in &["cognitive", "smells"] {
326            let baseline_result = self.run_tldr_command(&[cmd_name], &baseline_file);
327            let current_result = self.run_tldr_command(&[cmd_name], &current_file);
328
329            match (baseline_result, current_result) {
330                (Ok(baseline_json), Ok(current_json)) => {
331                    let cmd_findings = self.diff_local_metrics(
332                        cmd_name,
333                        file_path,
334                        &baseline_json,
335                        &current_json,
336                    );
337                    findings.extend(cmd_findings);
338                }
339                (Err(e), _) | (_, Err(e)) => {
340                    partial_reasons.push(format!(
341                        "tldr {} failed for {}: {}",
342                        cmd_name,
343                        file_path.display(),
344                        e,
345                    ));
346                }
347            }
348        }
349
350        // === Per-function commands: complexity, contracts ===
351        // Discover function names from the cognitive output (which lists all functions).
352        let baseline_funcs = Self::discover_function_names_from_cognitive(
353            &self.run_tldr_command(&["cognitive"], &baseline_file),
354        );
355        let current_funcs = Self::discover_function_names_from_cognitive(
356            &self.run_tldr_command(&["cognitive"], &current_file),
357        );
358
359        // --- complexity: per-function ---
360        {
361            let mut baseline_entries: Vec<(String, serde_json::Value)> = Vec::new();
362            for func in &baseline_funcs {
363                match self.run_tldr_per_function("complexity", &baseline_file, func) {
364                    Ok(json) => baseline_entries.push((func.clone(), json)),
365                    Err(e) => {
366                        partial_reasons.push(format!("tldr complexity {} baseline: {}", func, e));
367                    }
368                }
369            }
370
371            let mut current_entries: Vec<(String, serde_json::Value)> = Vec::new();
372            for func in &current_funcs {
373                match self.run_tldr_per_function("complexity", &current_file, func) {
374                    Ok(json) => current_entries.push((func.clone(), json)),
375                    Err(e) => {
376                        partial_reasons.push(format!("tldr complexity {} current: {}", func, e));
377                    }
378                }
379            }
380
381            // Build aggregated JSON for diffing (wrap per-function results into
382            // the same { "functions": [...] } shape the diff_local_metrics expects)
383            let baseline_agg = Self::aggregate_per_function_complexity(&baseline_entries);
384            let current_agg = Self::aggregate_per_function_complexity(&current_entries);
385
386            let complexity_findings = self.diff_local_metrics(
387                "complexity",
388                file_path,
389                &baseline_agg,
390                &current_agg,
391            );
392            findings.extend(complexity_findings);
393        }
394
395        // --- contracts: per-function ---
396        {
397            let mut baseline_entries: Vec<(String, serde_json::Value)> = Vec::new();
398            for func in &baseline_funcs {
399                match self.run_tldr_per_function("contracts", &baseline_file, func) {
400                    Ok(json) => baseline_entries.push((func.clone(), json)),
401                    Err(e) => {
402                        partial_reasons.push(format!("tldr contracts {} baseline: {}", func, e));
403                    }
404                }
405            }
406
407            // For current contracts, also attempt functions that only appear in
408            // baseline_funcs. Cognitive discovery can miss simple functions (e.g.,
409            // `name()`, `default()`), so without this, functions present in
410            // baseline but absent from current_funcs would be falsely reported
411            // as "function deleted" by diff_contracts.
412            let current_func_set: std::collections::HashSet<&str> =
413                current_funcs.iter().map(|s| s.as_str()).collect();
414            let all_current_candidates: Vec<String> = current_funcs
415                .iter()
416                .cloned()
417                .chain(
418                    baseline_funcs
419                        .iter()
420                        .filter(|f| !current_func_set.contains(f.as_str()))
421                        .cloned(),
422                )
423                .collect();
424
425            let mut current_entries: Vec<(String, serde_json::Value)> = Vec::new();
426            for func in &all_current_candidates {
427                match self.run_tldr_per_function("contracts", &current_file, func) {
428                    Ok(json) => current_entries.push((func.clone(), json)),
429                    Err(e) => {
430                        partial_reasons.push(format!("tldr contracts {} current: {}", func, e));
431                    }
432                }
433            }
434
435            let baseline_agg = Self::aggregate_per_function_contracts(&baseline_entries);
436            let current_agg = Self::aggregate_per_function_contracts(&current_entries);
437
438            let contract_findings = self.diff_contracts(
439                file_path,
440                &baseline_agg,
441                &current_agg,
442                &all_current_candidates,
443            );
444            findings.extend(contract_findings);
445        }
446
447        findings
448    }
449
450    /// Discover function names from a cognitive command result.
451    ///
452    /// The cognitive JSON output has `{ "functions": [{ "name": "..." }, ...] }`.
453    /// Returns the list of function names found, or empty vec on error.
454    fn discover_function_names_from_cognitive(
455        result: &Result<serde_json::Value, String>,
456    ) -> Vec<String> {
457        match result {
458            Ok(json) => {
459                Self::extract_function_entries(json)
460                    .into_iter()
461                    .map(|(name, _)| name)
462                    .filter(|name| !is_test_function(name))
463                    .collect()
464            }
465            Err(_) => Vec::new(),
466        }
467    }
468
469    /// Aggregate per-function complexity results into the standard `{ "functions": [...] }` shape.
470    ///
471    /// Each per-function call returns `{ "function": "name", "cyclomatic": N, ... }`.
472    /// We wrap them into `{ "functions": [{ "name": "...", "cyclomatic": N }] }` for diff_local_metrics.
473    fn aggregate_per_function_complexity(entries: &[(String, serde_json::Value)]) -> serde_json::Value {
474        let functions: Vec<serde_json::Value> = entries
475            .iter()
476            .map(|(name, json)| {
477                let cyclomatic = json.get("cyclomatic").and_then(|v| v.as_f64()).unwrap_or(0.0);
478                let line = json.get("lines_of_code").and_then(|v| v.as_u64()).unwrap_or(1);
479                serde_json::json!({
480                    "name": name,
481                    "cyclomatic": cyclomatic,
482                    "line": line,
483                })
484            })
485            .collect();
486        serde_json::json!({ "functions": functions })
487    }
488
489    /// Aggregate per-function contracts results into the standard `{ "functions": [...] }` shape.
490    ///
491    /// Each per-function call returns `{ "function": "name", "preconditions": [...], ... }`.
492    fn aggregate_per_function_contracts(entries: &[(String, serde_json::Value)]) -> serde_json::Value {
493        let functions: Vec<serde_json::Value> = entries
494            .iter()
495            .map(|(name, json)| {
496                let preconditions = json.get("preconditions").cloned().unwrap_or(serde_json::json!([]));
497                let postconditions = json.get("postconditions").cloned().unwrap_or(serde_json::json!([]));
498                serde_json::json!({
499                    "name": name,
500                    "preconditions": preconditions,
501                    "postconditions": postconditions,
502                })
503            })
504            .collect();
505        serde_json::json!({ "functions": functions })
506    }
507
508    /// Diff baseline vs current JSON from a local tldr command.
509    ///
510    /// The JSON structure varies by command, but we use a generic approach:
511    /// look for per-function metrics (arrays of objects with "name" and numeric
512    /// fields), then compare matching functions.
513    fn diff_local_metrics(
514        &self,
515        command_name: &str,
516        file_path: &Path,
517        baseline_json: &serde_json::Value,
518        current_json: &serde_json::Value,
519    ) -> Vec<BugbotFinding> {
520        let mut findings = Vec::new();
521
522        match command_name {
523            "complexity" => {
524                findings.extend(self.diff_numeric_metrics(
525                    "complexity-increase",
526                    "cyclomatic",
527                    file_path,
528                    baseline_json,
529                    current_json,
530                ));
531            }
532            "cognitive" => {
533                findings.extend(self.diff_numeric_metrics(
534                    "cognitive-increase",
535                    "cognitive",
536                    file_path,
537                    baseline_json,
538                    current_json,
539                ));
540            }
541            "contracts" => {
542                // Note: When called via diff_local_metrics (fallback path),
543                // we don't have known_current_funcs context, so pass empty
544                // slice. The primary contracts path in analyze_per_function
545                // passes actual current_funcs for accurate deletion detection.
546                findings.extend(self.diff_contracts(
547                    file_path,
548                    baseline_json,
549                    current_json,
550                    &[],
551                ));
552            }
553            "smells" => {
554                findings.extend(self.diff_smells(
555                    file_path,
556                    baseline_json,
557                    current_json,
558                ));
559            }
560            _ => {}
561        }
562
563        findings
564    }
565
566    /// Extract function entries from tldr JSON output.
567    ///
568    /// Tldr commands typically output an object with a "functions" or "results"
569    /// array, where each entry has a "name" field. We try several common keys.
570    fn extract_function_entries(json: &serde_json::Value) -> Vec<(String, &serde_json::Value)> {
571        let mut entries = Vec::new();
572
573        // Try common top-level array keys
574        for key in &["functions", "results", "items", "entries", "metrics"] {
575            if let Some(arr) = json.get(key).and_then(|v| v.as_array()) {
576                for item in arr {
577                    if let Some(name) = item.get("name").and_then(|n| n.as_str()) {
578                        entries.push((name.to_string(), item));
579                    }
580                }
581                if !entries.is_empty() {
582                    return entries;
583                }
584            }
585        }
586
587        // Try the root itself if it's an array
588        if let Some(arr) = json.as_array() {
589            for item in arr {
590                if let Some(name) = item.get("name").and_then(|n| n.as_str()) {
591                    entries.push((name.to_string(), item));
592                }
593            }
594        }
595
596        entries
597    }
598
599    /// Diff a single numeric metric between baseline and current JSON.
600    ///
601    /// Finds matching functions by name, extracts the specified metric field,
602    /// and emits a finding if the value increased beyond the threshold.
603    fn diff_numeric_metrics(
604        &self,
605        finding_type: &str,
606        metric_field: &str,
607        file_path: &Path,
608        baseline_json: &serde_json::Value,
609        current_json: &serde_json::Value,
610    ) -> Vec<BugbotFinding> {
611        let mut findings = Vec::new();
612
613        let baseline_entries = Self::extract_function_entries(baseline_json);
614        let current_entries = Self::extract_function_entries(current_json);
615
616        let baseline_map: std::collections::HashMap<&str, &serde_json::Value> = baseline_entries
617            .iter()
618            .map(|(name, val)| (name.as_str(), *val))
619            .collect();
620
621        for (func_name, current_entry) in &current_entries {
622            let Some(baseline_entry) = baseline_map.get(func_name.as_str()) else {
623                // New function -- report as info for awareness
624                if let Some(current_val) = current_entry.get(metric_field).and_then(|v| v.as_f64()) {
625                    if current_val > 10.0 {
626                        findings.push(BugbotFinding {
627                            finding_type: finding_type.to_string(),
628                            severity: "info".to_string(),
629                            file: file_path.to_path_buf(),
630                            function: func_name.clone(),
631                            line: current_entry.get("line").and_then(|l| l.as_u64()).unwrap_or(1) as usize,
632                            message: format!(
633                                "New function `{}` has {} = {:.1}",
634                                func_name, metric_field, current_val,
635                            ),
636                            evidence: serde_json::json!({
637                                "command": finding_type.replace("-increase", ""),
638                                "metric": metric_field,
639                                "current_value": current_val,
640                                "new_function": true,
641                            }),
642                            confidence: Some("DETERMINISTIC".to_string()),
643                            finding_id: Some(compute_finding_id(finding_type, file_path, func_name, 0)),
644                        });
645                    }
646                }
647                continue;
648            };
649
650            let baseline_val = baseline_entry.get(metric_field).and_then(|v| v.as_f64()).unwrap_or(0.0);
651            let current_val = current_entry.get(metric_field).and_then(|v| v.as_f64()).unwrap_or(0.0);
652
653            if current_val > baseline_val {
654                let delta = current_val - baseline_val;
655
656                // Skip trivial absolute changes. Small deltas (e.g., 2→4) fire
657                // due to high percentage but are not actionable. Thresholds:
658                //   cognitive: delta >= 3  (informed by real-world validation)
659                //   complexity: delta >= 2  (cyclomatic is coarser-grained)
660                let min_delta = match finding_type {
661                    "cognitive-increase" => 3.0,
662                    "complexity-increase" => 2.0,
663                    _ => 1.0,
664                };
665                if delta < min_delta {
666                    continue;
667                }
668
669                let pct_increase = if baseline_val > 0.0 {
670                    (delta / baseline_val) * 100.0
671                } else {
672                    100.0
673                };
674
675                let severity = if pct_increase > 50.0 {
676                    "high"
677                } else if pct_increase > 20.0 {
678                    "medium"
679                } else {
680                    "low"
681                };
682
683                let line = current_entry.get("line").and_then(|l| l.as_u64()).unwrap_or(1) as usize;
684
685                findings.push(BugbotFinding {
686                    finding_type: finding_type.to_string(),
687                    severity: severity.to_string(),
688                    file: file_path.to_path_buf(),
689                    function: func_name.clone(),
690                    line,
691                    message: format!(
692                        "`{}` {} increased by {:.1} ({:.1} -> {:.1}, +{:.0}%)",
693                        func_name, metric_field, delta, baseline_val, current_val, pct_increase,
694                    ),
695                    evidence: serde_json::json!({
696                        "command": finding_type.replace("-increase", ""),
697                        "metric": metric_field,
698                        "old_value": baseline_val,
699                        "new_value": current_val,
700                        "delta": delta,
701                        "pct_increase": pct_increase,
702                    }),
703                    confidence: Some("DETERMINISTIC".to_string()),
704                    finding_id: Some(compute_finding_id(finding_type, file_path, func_name, line)),
705                });
706            }
707        }
708
709        findings
710    }
711
712    /// Diff contracts between baseline and current.
713    ///
714    /// Detects contracts (pre/postconditions) present in baseline but absent
715    /// in current, emitting a "contract-removed" finding.
716    ///
717    /// `known_current_funcs` contains the function names that actually exist in
718    /// the current version (from the AST diff). This prevents false positives
719    /// when `tldr contracts` fails to extract a function — without this check,
720    /// an extraction failure would be misinterpreted as "function deleted".
721    fn diff_contracts(
722        &self,
723        file_path: &Path,
724        baseline_json: &serde_json::Value,
725        current_json: &serde_json::Value,
726        known_current_funcs: &[String],
727    ) -> Vec<BugbotFinding> {
728        let mut findings = Vec::new();
729
730        let baseline_entries = Self::extract_function_entries(baseline_json);
731        let current_entries = Self::extract_function_entries(current_json);
732
733        let current_names: std::collections::HashSet<String> = current_entries
734            .iter()
735            .map(|(name, _)| name.clone())
736            .collect();
737
738        // Count contracts per function in baseline
739        let baseline_contract_count = |entry: &serde_json::Value| -> usize {
740            let pre = entry.get("preconditions").and_then(|v| v.as_array()).map(|a| a.len()).unwrap_or(0);
741            let post = entry.get("postconditions").and_then(|v| v.as_array()).map(|a| a.len()).unwrap_or(0);
742            pre + post
743        };
744
745        let current_map: std::collections::HashMap<&str, &serde_json::Value> = current_entries
746            .iter()
747            .map(|(name, val)| (name.as_str(), *val))
748            .collect();
749
750        for (func_name, baseline_entry) in &baseline_entries {
751            let b_count = baseline_contract_count(baseline_entry);
752            if b_count == 0 {
753                continue;
754            }
755
756            if let Some(current_entry) = current_map.get(func_name.as_str()) {
757                let c_count = baseline_contract_count(current_entry);
758                if c_count < b_count {
759                    let removed = b_count - c_count;
760                    findings.push(BugbotFinding {
761                        finding_type: "contract-removed".to_string(),
762                        severity: "medium".to_string(),
763                        file: file_path.to_path_buf(),
764                        function: func_name.clone(),
765                        line: 1,
766                        message: format!(
767                            "`{}` lost {} contract(s) ({} -> {})",
768                            func_name, removed, b_count, c_count,
769                        ),
770                        evidence: serde_json::json!({
771                            "command": "contracts",
772                            "baseline_contracts": b_count,
773                            "current_contracts": c_count,
774                            "removed": removed,
775                        }),
776                        confidence: Some("DETERMINISTIC".to_string()),
777                        finding_id: Some(compute_finding_id("contract-removed", file_path, func_name, 1)),
778                    });
779                }
780            } else if !current_names.contains(func_name.as_str()) {
781                // Check if the function actually exists in current version.
782                // If it does, contracts extraction just failed — not a deletion.
783                // That failure is already captured in partial_reasons upstream.
784                if known_current_funcs.iter().any(|f| f == func_name) {
785                    continue;
786                }
787                // Function with contracts was genuinely deleted
788                findings.push(BugbotFinding {
789                    finding_type: "contract-removed".to_string(),
790                    severity: "high".to_string(),
791                    file: file_path.to_path_buf(),
792                    function: func_name.clone(),
793                    line: 1,
794                    message: format!(
795                        "`{}` with {} contract(s) was removed entirely",
796                        func_name, b_count,
797                    ),
798                    evidence: serde_json::json!({
799                        "command": "contracts",
800                        "baseline_contracts": b_count,
801                        "current_contracts": 0,
802                        "function_deleted": true,
803                    }),
804                    confidence: Some("DETERMINISTIC".to_string()),
805                    finding_id: Some(compute_finding_id("contract-removed", file_path, func_name, 0)),
806                });
807            }
808        }
809
810        findings
811    }
812
813    /// Diff smells between baseline and current.
814    ///
815    /// Detects new code smells introduced in current that were not present in
816    /// baseline.
817    fn diff_smells(
818        &self,
819        file_path: &Path,
820        baseline_json: &serde_json::Value,
821        current_json: &serde_json::Value,
822    ) -> Vec<BugbotFinding> {
823        let mut findings = Vec::new();
824
825        let count_smells = |json: &serde_json::Value| -> usize {
826            // Smells output typically has a top-level "smells" or "issues" array
827            for key in &["smells", "issues", "findings", "results"] {
828                if let Some(arr) = json.get(key).and_then(|v| v.as_array()) {
829                    return arr.len();
830                }
831            }
832            if let Some(arr) = json.as_array() {
833                return arr.len();
834            }
835            0
836        };
837
838        let baseline_count = count_smells(baseline_json);
839        let current_count = count_smells(current_json);
840
841        // Skip when baseline has zero smells (new file) — no regression possible
842        if baseline_count == 0 {
843            return findings;
844        }
845
846        if current_count > baseline_count {
847            let introduced = current_count - baseline_count;
848
849            // Extract current smell entries directly
850            let current_smells: Vec<&serde_json::Value> = {
851                let mut result = Vec::new();
852                for key in &["smells", "issues", "findings", "results"] {
853                    if let Some(arr) = current_json.get(key).and_then(|v| v.as_array()) {
854                        result = arr.iter().collect();
855                        break;
856                    }
857                }
858                if result.is_empty() {
859                    if let Some(arr) = current_json.as_array() {
860                        result = arr.iter().collect();
861                    }
862                }
863                result
864            };
865
866            // Smell types that are too noisy to report. message_chain fires on
867            // idiomatic Rust iterator chains; long_parameter_list fires on
868            // constructors and builders that legitimately need many params.
869            const SUPPRESSED_SMELL_TYPES: &[&str] = &[
870                "message_chain",
871                "long_parameter_list",
872            ];
873
874            // Report each new smell (the last `introduced` entries are likely new)
875            for (i, smell) in current_smells.iter().rev().take(introduced).enumerate() {
876                let smell_type = smell.get("smell_type").or_else(|| smell.get("type")).or_else(|| smell.get("kind")).and_then(|v| v.as_str()).unwrap_or("unknown");
877
878                if SUPPRESSED_SMELL_TYPES.contains(&smell_type) {
879                    continue;
880                }
881
882                let func_name = smell.get("function").or_else(|| smell.get("name")).and_then(|v| v.as_str()).unwrap_or("(file-level)");
883                let line = smell.get("line").and_then(|l| l.as_u64()).unwrap_or(1) as usize;
884
885                // Severity by smell type: structural issues are medium,
886                // style issues stay low.
887                let severity = match smell_type {
888                    "god_class" | "feature_envy" | "data_clump" => "medium",
889                    _ => "low",
890                };
891
892                findings.push(BugbotFinding {
893                    finding_type: "smell-introduced".to_string(),
894                    severity: severity.to_string(),
895                    file: file_path.to_path_buf(),
896                    function: func_name.to_string(),
897                    line,
898                    message: format!(
899                        "New code smell `{}` introduced (total smells: {} -> {})",
900                        smell_type, baseline_count, current_count,
901                    ),
902                    evidence: serde_json::json!({
903                        "command": "smells",
904                        "smell_type": smell_type,
905                        "baseline_smell_count": baseline_count,
906                        "current_smell_count": current_count,
907                        "introduced": introduced,
908                        "index": i,
909                    }),
910                    confidence: Some("DETERMINISTIC".to_string()),
911                    finding_id: Some(compute_finding_id("smell-introduced", file_path, func_name, line)),
912                });
913            }
914        }
915
916        findings
917    }
918
919    /// Run all FLOW commands on the project root with baseline comparison.
920    ///
921    /// Creates a git worktree at `base_ref` for baseline, runs each flow
922    /// command on both baseline and current, and diffs the JSON outputs to
923    /// detect regressions. The `dead` command uses count-only analysis
924    /// (no baseline needed). Calls and deps use the cached `current_calls_json`
925    /// when available (deps are derived in-memory from the call graph).
926    /// Cohesion still requires a separate subprocess call. The `coupling`
927    /// command is skipped because it requires file pairs, not a project root.
928    ///
929    /// When `current_calls_json` is `Some`, only the baseline `tldr calls` and
930    /// baseline/current `tldr cohesion` subprocesses are spawned (3 calls
931    /// instead of 6). When `None`, falls back to running all subprocesses.
932    fn analyze_flow_commands(
933        &self,
934        project: &Path,
935        base_ref: &str,
936        language: &str,
937        current_calls_json: Option<&serde_json::Value>,
938        partial_reasons: &mut Vec<String>,
939    ) -> Vec<BugbotFinding> {
940        let mut findings = Vec::new();
941
942        // Flow commands analyze entire projects -- give them 5 minutes.
943        // The previous max(self.timeout_secs, 60) was too aggressive and
944        // killed legitimate long-running analysis on large repos.
945        let flow_engine = TldrDifferentialEngine::with_timeout(300);
946
947        // === Dead code: count-only, no baseline needed ===
948        for cmd in TLDR_COMMANDS.iter().filter(|c| c.category == TldrCategory::Flow && c.name == "dead") {
949            match flow_engine.run_tldr_flow_command(cmd.name, cmd.args, project, language) {
950                Ok(json) => {
951                    let dead_count = Self::count_dead_code_entries(&json);
952                    if dead_count > 0 {
953                        findings.push(BugbotFinding {
954                            finding_type: "dead-code-introduced".to_string(),
955                            severity: "info".to_string(),
956                            file: PathBuf::from("(project)"),
957                            function: "(project-level)".to_string(),
958                            line: 0,
959                            message: format!(
960                                "{} dead code entries detected in project",
961                                dead_count,
962                            ),
963                            evidence: serde_json::json!({
964                                "command": cmd.name,
965                                "dead_code_count": dead_count,
966                            }),
967                            confidence: Some("DETERMINISTIC".to_string()),
968                            finding_id: Some(compute_finding_id(
969                                "dead-code-introduced",
970                                Path::new("(project)"),
971                                "(project-level)",
972                                0,
973                            )),
974                        });
975                    }
976                }
977                Err(e) => {
978                    partial_reasons.push(format!("tldr {} failed: {}", cmd.name, e));
979                }
980            }
981        }
982
983        // === Try cached baseline call graph before creating a worktree ===
984        //
985        // Resolve base_ref to a commit hash and check if we have a cached
986        // baseline call graph for that commit. On cache hit we can diff
987        // calls/deps without a worktree (cohesion still needs one).
988        use crate::commands::bugbot::first_run::{
989            load_cached_baseline_call_graph, resolve_git_ref, save_baseline_call_graph,
990        };
991
992        let base_commit = resolve_git_ref(project, base_ref).ok();
993        let cached_baseline = base_commit
994            .as_deref()
995            .and_then(|hash| load_cached_baseline_call_graph(project, hash));
996
997        // Track whether we already handled calls/deps via cache
998        let mut calls_deps_done = false;
999
1000        if let Some(ref cached_cg) = cached_baseline {
1001            // --- Cache hit: diff calls/deps without worktree ---
1002            let current_calls_result: Result<std::borrow::Cow<'_, serde_json::Value>, String> =
1003                if let Some(cached) = current_calls_json {
1004                    Ok(std::borrow::Cow::Borrowed(cached))
1005                } else {
1006                    flow_engine
1007                        .run_tldr_flow_command("calls", &["calls"], project, language)
1008                        .map(std::borrow::Cow::Owned)
1009                };
1010
1011            match &current_calls_result {
1012                Ok(current_json) => {
1013                    findings.extend(self.diff_calls_json(cached_cg, current_json.as_ref()));
1014
1015                    let baseline_deps = Self::derive_deps_from_calls(cached_cg);
1016                    let current_deps = Self::derive_deps_from_calls(current_json.as_ref());
1017                    findings.extend(self.diff_deps_json(&baseline_deps, &current_deps));
1018                    calls_deps_done = true;
1019                }
1020                Err(e) => {
1021                    partial_reasons.push(format!("tldr calls (current) failed: {}", e));
1022                    calls_deps_done = true; // don't retry via worktree
1023                }
1024            }
1025        }
1026
1027        // === Baseline worktree for calls/deps (cache miss) + cohesion ===
1028        //
1029        // We still need a worktree for cohesion (always) and for calls/deps
1030        // when no cached baseline is available.
1031        let needs_worktree = true; // cohesion always needs baseline worktree
1032
1033        if needs_worktree {
1034            let baseline_dir = match tempfile::tempdir() {
1035                Ok(d) => d,
1036                Err(e) => {
1037                    partial_reasons.push(format!("tmpdir for baseline worktree: {}", e));
1038                    return findings;
1039                }
1040            };
1041            let worktree_path = baseline_dir.path().join("baseline");
1042
1043            let worktree_ok = match Command::new("git")
1044                .args(["worktree", "add", &worktree_path.to_string_lossy(), base_ref])
1045                .current_dir(project)
1046                .stdout(std::process::Stdio::null())
1047                .stderr(std::process::Stdio::piped())
1048                .status()
1049            {
1050                Ok(status) if status.success() => true,
1051                Ok(status) => {
1052                    partial_reasons.push(format!(
1053                        "git worktree add failed (exit {}); skipping baseline flow diff",
1054                        status
1055                    ));
1056                    false
1057                }
1058                Err(e) => {
1059                    partial_reasons.push(format!("git worktree add: {}; skipping baseline flow diff", e));
1060                    false
1061                }
1062            };
1063
1064            if worktree_ok {
1065                // Copy .tldrignore to worktree so baseline analysis uses
1066                // consistent filtering (vendored code excluded from both sides).
1067                let tldrignore_src = project.join(".tldrignore");
1068                if tldrignore_src.exists() {
1069                    let _ = std::fs::copy(&tldrignore_src, worktree_path.join(".tldrignore"));
1070                }
1071
1072                // --- Calls/deps: only if not already handled via cache ---
1073                if !calls_deps_done {
1074                    let baseline_calls = flow_engine.run_tldr_flow_command("calls", &["calls"], &worktree_path, language);
1075                    let current_calls_result: Result<std::borrow::Cow<'_, serde_json::Value>, String> =
1076                        if let Some(cached) = current_calls_json {
1077                            Ok(std::borrow::Cow::Borrowed(cached))
1078                        } else {
1079                            flow_engine
1080                                .run_tldr_flow_command("calls", &["calls"], project, language)
1081                                .map(std::borrow::Cow::Owned)
1082                        };
1083
1084                    match (&baseline_calls, &current_calls_result) {
1085                        (Ok(baseline_json), Ok(current_json)) => {
1086                            // Diff call graph edges
1087                            findings.extend(self.diff_calls_json(baseline_json, current_json.as_ref()));
1088
1089                            // Derive deps from calls in-memory instead of running `tldr deps`
1090                            let baseline_deps = Self::derive_deps_from_calls(baseline_json);
1091                            let current_deps = Self::derive_deps_from_calls(current_json.as_ref());
1092                            findings.extend(self.diff_deps_json(&baseline_deps, &current_deps));
1093
1094                            // Cache the baseline for next run (non-fatal).
1095                            if let Some(ref hash) = base_commit {
1096                                let _ = save_baseline_call_graph(project, baseline_json, hash, language);
1097                            }
1098                        }
1099                        (Err(e), _) => {
1100                            partial_reasons.push(format!("tldr calls (baseline) failed: {}", e));
1101                        }
1102                        (_, Err(e)) => {
1103                            partial_reasons.push(format!("tldr calls (current) failed: {}", e));
1104                        }
1105                    }
1106                }
1107
1108                // --- Cohesion: separate subprocess (requires LCOM4, not derivable from calls) ---
1109                for cmd in TLDR_COMMANDS.iter().filter(|c| c.category == TldrCategory::Flow && c.name == "cohesion") {
1110                    let baseline_result = flow_engine.run_tldr_flow_command(cmd.name, cmd.args, &worktree_path, language);
1111                    let current_result = flow_engine.run_tldr_flow_command(cmd.name, cmd.args, project, language);
1112                    match (baseline_result, current_result) {
1113                        (Ok(baseline_json), Ok(current_json)) => {
1114                            findings.extend(self.diff_cohesion_json(&baseline_json, &current_json));
1115                        }
1116                        (Err(e), _) => {
1117                            partial_reasons.push(format!("tldr cohesion (baseline) failed: {}", e));
1118                        }
1119                        (_, Err(e)) => {
1120                            partial_reasons.push(format!("tldr cohesion (current) failed: {}", e));
1121                        }
1122                    }
1123                }
1124
1125                // Clean up worktree
1126                let _ = Command::new("git")
1127                    .args(["worktree", "remove", "--force", &worktree_path.to_string_lossy()])
1128                    .current_dir(project)
1129                    .stdout(std::process::Stdio::null())
1130                    .stderr(std::process::Stdio::null())
1131                    .status();
1132            }
1133        }
1134
1135        findings
1136    }
1137
1138    /// Parse `tldr whatbreaks` JSON output into findings for a single file.
1139    ///
1140    /// Extracts `summary.importer_count`, `summary.direct_caller_count`, and
1141    /// `summary.affected_test_count` from the JSON. Emits a `downstream-impact`
1142    /// finding if `importer_count > 0` or `caller_count > 0`.
1143    ///
1144    /// Severity: `high` if importer_count > 10, `medium` if > 3, else `low`.
1145    fn parse_whatbreaks_findings(
1146        file_path: &Path,
1147        json: &serde_json::Value,
1148    ) -> Vec<BugbotFinding> {
1149        let mut findings = Vec::new();
1150
1151        let summary = json.get("summary").unwrap_or(json);
1152        let importer_count = summary
1153            .get("importer_count")
1154            .and_then(|v| v.as_u64())
1155            .unwrap_or(0);
1156        let caller_count = summary
1157            .get("direct_caller_count")
1158            .and_then(|v| v.as_u64())
1159            .unwrap_or(0);
1160        let test_count = summary
1161            .get("affected_test_count")
1162            .and_then(|v| v.as_u64())
1163            .unwrap_or(0);
1164
1165        if importer_count > 0 || caller_count > 0 {
1166            let severity = if importer_count > 10 {
1167                "high"
1168            } else if importer_count > 3 {
1169                "medium"
1170            } else {
1171                "low"
1172            };
1173
1174            findings.push(BugbotFinding {
1175                finding_type: "downstream-impact".to_string(),
1176                severity: severity.to_string(),
1177                file: file_path.to_path_buf(),
1178                function: "(file-level)".to_string(),
1179                line: 0,
1180                message: format!(
1181                    "Changed file has {} importers, {} direct callers, {} affected tests",
1182                    importer_count, caller_count, test_count,
1183                ),
1184                evidence: serde_json::json!({
1185                    "command": "whatbreaks",
1186                    "importer_count": importer_count,
1187                    "direct_caller_count": caller_count,
1188                    "affected_test_count": test_count,
1189                }),
1190                confidence: Some("DETERMINISTIC".to_string()),
1191                finding_id: Some(compute_finding_id(
1192                    "downstream-impact",
1193                    file_path,
1194                    "(file-level)",
1195                    0,
1196                )),
1197            });
1198        }
1199
1200        findings
1201    }
1202
1203    /// Parse `tldr impact` JSON output into findings for a single function.
1204    ///
1205    /// Looks for `targets.<function_name>.caller_count` and
1206    /// `targets.<function_name>.callers` in the JSON. Emits a
1207    /// `breaking-change-risk` finding if caller_count > 0.
1208    ///
1209    /// Severity: `high` if caller_count > 5, `medium` if 2-5, `info` if 1.
1210    ///
1211    /// Note: No longer called by `analyze_function_impact` (which now uses
1212    /// `parse_impact_findings_from_callgraph`), but retained for parsing
1213    /// raw `tldr impact` JSON output in other contexts and tested directly.
1214    pub fn parse_impact_findings(
1215        function_name: &str,
1216        json: &serde_json::Value,
1217    ) -> Vec<BugbotFinding> {
1218        let mut findings = Vec::new();
1219
1220        // Try targets.<function_name>.caller_count first
1221        let (caller_count, callers_preview) = if let Some(target) =
1222            json.get("targets").and_then(|t| t.get(function_name))
1223        {
1224            let count = target
1225                .get("caller_count")
1226                .and_then(|v| v.as_u64())
1227                .unwrap_or(0);
1228            let callers: Vec<String> = target
1229                .get("callers")
1230                .and_then(|v| v.as_array())
1231                .map(|arr| {
1232                    arr.iter()
1233                        .take(5)
1234                        .map(|c| {
1235                            let file = c.get("file").and_then(|v| v.as_str()).unwrap_or("?");
1236                            let func = c.get("function").and_then(|v| v.as_str()).unwrap_or("?");
1237                            format!("{}::{}", file, func)
1238                        })
1239                        .collect()
1240                })
1241                .unwrap_or_default();
1242            (count, callers)
1243        } else {
1244            // Fallback: try top-level caller_count
1245            let count = json
1246                .get("caller_count")
1247                .and_then(|v| v.as_u64())
1248                .unwrap_or(0);
1249            let callers: Vec<String> = json
1250                .get("callers")
1251                .and_then(|v| v.as_array())
1252                .map(|arr| {
1253                    arr.iter()
1254                        .take(5)
1255                        .map(|c| {
1256                            let file = c.get("file").and_then(|v| v.as_str()).unwrap_or("?");
1257                            let func = c.get("function").and_then(|v| v.as_str()).unwrap_or("?");
1258                            format!("{}::{}", file, func)
1259                        })
1260                        .collect()
1261                })
1262                .unwrap_or_default();
1263            (count, callers)
1264        };
1265
1266        if caller_count > 0 {
1267            let severity = if caller_count > 5 {
1268                "high"
1269            } else if caller_count >= 2 {
1270                "medium"
1271            } else {
1272                "info"
1273            };
1274
1275            findings.push(BugbotFinding {
1276                finding_type: "breaking-change-risk".to_string(),
1277                severity: severity.to_string(),
1278                file: PathBuf::from("(project)"),
1279                function: function_name.to_string(),
1280                line: 0,
1281                message: format!(
1282                    "Function `{}` has {} callers that may be affected by changes",
1283                    function_name, caller_count,
1284                ),
1285                evidence: serde_json::json!({
1286                    "command": "impact",
1287                    "caller_count": caller_count,
1288                    "callers_preview": callers_preview,
1289                }),
1290                confidence: Some("DETERMINISTIC".to_string()),
1291                finding_id: Some(compute_finding_id(
1292                    "breaking-change-risk",
1293                    Path::new("(project)"),
1294                    function_name,
1295                    0,
1296                )),
1297            });
1298        }
1299
1300        findings
1301    }
1302
1303    /// Build a reverse caller map from `tldr calls` JSON output.
1304    ///
1305    /// Inverts call graph edges so that each `dst_func` maps to a list of
1306    /// `(src_file, src_func)` pairs representing its callers. Edges with
1307    /// missing `src_file`, `src_func`, or `dst_func` fields are silently
1308    /// skipped.
1309    fn build_reverse_caller_map(
1310        calls_json: &serde_json::Value,
1311    ) -> HashMap<String, Vec<(String, String)>> {
1312        let mut map: HashMap<String, Vec<(String, String)>> = HashMap::new();
1313
1314        if let Some(edges) = calls_json.get("edges").and_then(|v| v.as_array()) {
1315            for edge in edges {
1316                let src_file = edge.get("src_file").and_then(|v| v.as_str());
1317                let src_func = edge.get("src_func").and_then(|v| v.as_str());
1318                let dst_func = edge.get("dst_func").and_then(|v| v.as_str());
1319
1320                if let (Some(sf), Some(sfn), Some(df)) = (src_file, src_func, dst_func) {
1321                    map.entry(df.to_string())
1322                        .or_default()
1323                        .push((sf.to_string(), sfn.to_string()));
1324                }
1325            }
1326        }
1327
1328        map
1329    }
1330
1331    /// Generate `breaking-change-risk` findings from a pre-built caller list.
1332    ///
1333    /// Unlike `parse_impact_findings` which parses `tldr impact` JSON, this
1334    /// method accepts an already-resolved list of `(file, function)` callers
1335    /// from the reverse caller map built by `build_reverse_caller_map`.
1336    ///
1337    /// Severity thresholds match `parse_impact_findings`:
1338    /// - `>5` callers = `high`
1339    /// - `2..=5` callers = `medium`
1340    /// - `1` caller = `info`
1341    /// - `0` callers = no finding emitted
1342    ///
1343    /// The evidence `command` field is set to `"calls"` (not `"impact"`).
1344    fn parse_impact_findings_from_callgraph(
1345        func_name: &str,
1346        callers: &[(String, String)],
1347    ) -> Vec<BugbotFinding> {
1348        let mut findings = Vec::new();
1349        let caller_count = callers.len();
1350
1351        if caller_count == 0 {
1352            return findings;
1353        }
1354
1355        let severity = if caller_count > 5 {
1356            "high"
1357        } else if caller_count >= 2 {
1358            "medium"
1359        } else {
1360            "info"
1361        };
1362
1363        let callers_preview: Vec<String> = callers
1364            .iter()
1365            .take(5)
1366            .map(|(file, func)| format!("{}::{}", file, func))
1367            .collect();
1368
1369        findings.push(BugbotFinding {
1370            finding_type: "breaking-change-risk".to_string(),
1371            severity: severity.to_string(),
1372            file: PathBuf::from("(project)"),
1373            function: func_name.to_string(),
1374            line: 0,
1375            message: format!(
1376                "Function `{}` has {} callers that may be affected by changes",
1377                func_name, caller_count
1378            ),
1379            evidence: serde_json::json!({
1380                "command": "calls",
1381                "caller_count": caller_count,
1382                "callers_preview": callers_preview,
1383            }),
1384            confidence: Some("DETERMINISTIC".to_string()),
1385            finding_id: Some(compute_finding_id(
1386                "breaking-change-risk",
1387                Path::new("(project)"),
1388                func_name,
1389                0,
1390            )),
1391        });
1392
1393        findings
1394    }
1395
1396    /// Detect downstream dependencies for changed files.
1397    ///
1398    /// When `current_calls_json` is `Some`, derives downstream impact metrics
1399    /// in-memory from the cached call graph JSON using `derive_downstream_from_calls`,
1400    /// eliminating per-file `tldr whatbreaks` subprocess calls.
1401    ///
1402    /// When `current_calls_json` is `None`, falls back to running
1403    /// `tldr whatbreaks <relative_path> --type file --quick <project> --lang <language> --format json`
1404    /// per changed file. Uses a 300-second timeout to accommodate large projects.
1405    fn analyze_downstream_impact(
1406        &self,
1407        project: &Path,
1408        changed_files: &[PathBuf],
1409        language: &str,
1410        current_calls_json: Option<&serde_json::Value>,
1411        partial_reasons: &mut Vec<String>,
1412    ) -> Vec<BugbotFinding> {
1413        let mut findings = Vec::new();
1414
1415        if let Some(calls_json) = current_calls_json {
1416            // Derive downstream impact from cached calls JSON
1417            let changed_file_strs: Vec<&str> = changed_files
1418                .iter()
1419                .map(|p| p.strip_prefix(project).unwrap_or(p))
1420                .filter_map(|p| p.to_str())
1421                .collect();
1422
1423            let downstream_results =
1424                Self::derive_downstream_from_calls(calls_json, &changed_file_strs);
1425            for (file_str, metrics) in &downstream_results {
1426                let file_path = project.join(file_str);
1427                let wb_json = serde_json::json!({ "summary": metrics });
1428                findings.extend(Self::parse_whatbreaks_findings(&file_path, &wb_json));
1429            }
1430        } else {
1431            // Fallback: run tldr whatbreaks subprocess per file
1432            let flow_engine = TldrDifferentialEngine::with_timeout(300);
1433
1434            for file_path in changed_files {
1435                let relative = file_path.strip_prefix(project).unwrap_or(file_path);
1436                let rel_str = relative.to_string_lossy().to_string();
1437
1438                let args = vec![
1439                    "whatbreaks".to_string(),
1440                    rel_str.clone(),
1441                    "--type".to_string(),
1442                    "file".to_string(),
1443                    "--quick".to_string(),
1444                    project.to_string_lossy().to_string(),
1445                    "--lang".to_string(),
1446                    language.to_string(),
1447                    "--format".to_string(),
1448                    "json".to_string(),
1449                ];
1450
1451                match flow_engine.run_tldr_raw(&args) {
1452                    Ok(json) => {
1453                        findings.extend(Self::parse_whatbreaks_findings(file_path, &json));
1454                    }
1455                    Err(e) => {
1456                        partial_reasons
1457                            .push(format!("tldr whatbreaks {} failed: {}", rel_str, e));
1458                    }
1459                }
1460            }
1461        }
1462
1463        findings
1464    }
1465
1466    /// Detect callers of changed functions via a single `tldr calls` invocation.
1467    ///
1468    /// Discovers function names via `tldr cognitive` on each changed file,
1469    /// caps the total at 20 functions, then uses the call graph to build a
1470    /// reverse caller map. Each discovered function is looked up to produce
1471    /// `breaking-change-risk` findings.
1472    ///
1473    /// When `current_calls_json` is `Some`, the cached call graph JSON is
1474    /// reused instead of running a `tldr calls` subprocess. When `None`,
1475    /// falls back to running `tldr calls` once at the project level.
1476    ///
1477    /// If `tldr calls` fails (and no cache is available), the error is logged
1478    /// to `partial_reasons` and an empty findings list is returned.
1479    fn analyze_function_impact(
1480        &self,
1481        project: &Path,
1482        changed_files: &[PathBuf],
1483        language: &str,
1484        current_calls_json: Option<&serde_json::Value>,
1485        partial_reasons: &mut Vec<String>,
1486    ) -> Vec<BugbotFinding> {
1487        let mut findings = Vec::new();
1488        let impact_engine = TldrDifferentialEngine::with_timeout(60);
1489
1490        // Step 1: Discover function names from changed files via cognitive analysis.
1491        let mut all_functions: Vec<String> = Vec::new();
1492        for file_path in changed_files {
1493            let relative = file_path.strip_prefix(project).unwrap_or(file_path);
1494            let full_path = project.join(relative);
1495
1496            let cognitive_result =
1497                impact_engine.run_tldr_command(&["cognitive"], &full_path);
1498            let func_names =
1499                Self::discover_function_names_from_cognitive(&cognitive_result);
1500            all_functions.extend(func_names);
1501        }
1502
1503        // Cap at 20 functions to limit analysis scope.
1504        all_functions.truncate(20);
1505
1506        if all_functions.is_empty() {
1507            return findings;
1508        }
1509
1510        // Step 2: Use cached calls JSON or run `tldr calls` once at project level.
1511        let calls_json_owned: Option<serde_json::Value>;
1512        let calls_json_ref: &serde_json::Value = if let Some(cached) = current_calls_json {
1513            cached
1514        } else {
1515            let args = vec![
1516                "calls".to_string(),
1517                project.to_string_lossy().to_string(),
1518                "--lang".to_string(),
1519                language.to_string(),
1520                "--format".to_string(),
1521                "json".to_string(),
1522            ];
1523
1524            match impact_engine.run_tldr_raw(&args) {
1525                Ok(json) => {
1526                    calls_json_owned = Some(json);
1527                    calls_json_owned.as_ref().unwrap()
1528                }
1529                Err(e) => {
1530                    partial_reasons.push(format!("tldr calls failed: {}", e));
1531                    return findings;
1532                }
1533            }
1534        };
1535
1536        // Step 3: Build reverse map (dst_func -> [(src_file, src_func)]).
1537        let reverse_map = Self::build_reverse_caller_map(calls_json_ref);
1538
1539        // Step 4: Look up callers for each discovered function.
1540        for func_name in &all_functions {
1541            let callers = reverse_map.get(func_name).cloned().unwrap_or_default();
1542            findings.extend(Self::parse_impact_findings_from_callgraph(
1543                func_name, &callers,
1544            ));
1545        }
1546
1547        findings
1548    }
1549
1550    /// Diff call graph edges between baseline and current.
1551    ///
1552    /// Extracts `edges` arrays from both JSON values, builds sets of
1553    /// `(src_file::src_func, dst_file::dst_func)` pairs, and reports
1554    /// new/removed edges as findings. More than 5 new edges produces a
1555    /// medium-severity summary finding.
1556    ///
1557    /// The actual `tldr calls --format json` schema uses:
1558    /// ```json
1559    /// { "edges": [{ "src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct" }] }
1560    /// ```
1561    fn diff_calls_json(
1562        &self,
1563        baseline: &serde_json::Value,
1564        current: &serde_json::Value,
1565    ) -> Vec<BugbotFinding> {
1566        let mut findings = Vec::new();
1567
1568        let extract_edges = |json: &serde_json::Value| -> std::collections::HashSet<(String, String)> {
1569            let mut set = std::collections::HashSet::new();
1570            if let Some(edges) = json.get("edges").and_then(|v| v.as_array()) {
1571                for edge in edges {
1572                    let from = format!(
1573                        "{}::{}",
1574                        edge.get("src_file").and_then(|v| v.as_str()).unwrap_or("?"),
1575                        edge.get("src_func").and_then(|v| v.as_str()).unwrap_or("?"),
1576                    );
1577                    let to = format!(
1578                        "{}::{}",
1579                        edge.get("dst_file").and_then(|v| v.as_str()).unwrap_or("?"),
1580                        edge.get("dst_func").and_then(|v| v.as_str()).unwrap_or("?"),
1581                    );
1582                    if from != "?::?" && to != "?::?" {
1583                        set.insert((from, to));
1584                    }
1585                }
1586            }
1587            set
1588        };
1589
1590        let baseline_edges = extract_edges(baseline);
1591        let current_edges = extract_edges(current);
1592
1593        // New edges: in current but not in baseline
1594        let new_edges: Vec<&(String, String)> = current_edges.difference(&baseline_edges).collect();
1595        // Removed edges: in baseline but not in current
1596        let removed_edges: Vec<&(String, String)> = baseline_edges.difference(&current_edges).collect();
1597
1598        if new_edges.is_empty() && removed_edges.is_empty() {
1599            return findings;
1600        }
1601
1602        // Report individual new edges as info
1603        for (from, to) in &new_edges {
1604            findings.push(BugbotFinding {
1605                finding_type: "call-graph-change".to_string(),
1606                severity: "info".to_string(),
1607                file: PathBuf::from("(project)"),
1608                function: "(project-level)".to_string(),
1609                line: 0,
1610                message: format!("New call edge: {} -> {}", from, to),
1611                evidence: serde_json::json!({
1612                    "change": "added",
1613                    "from": from,
1614                    "to": to,
1615                }),
1616                confidence: Some("DETERMINISTIC".to_string()),
1617                finding_id: Some(compute_finding_id(
1618                    "call-graph-change",
1619                    Path::new("(project)"),
1620                    &format!("{}:{}", from, to),
1621                    0,
1622                )),
1623            });
1624        }
1625
1626        // Report individual removed edges as info
1627        for (from, to) in &removed_edges {
1628            findings.push(BugbotFinding {
1629                finding_type: "call-graph-change".to_string(),
1630                severity: "info".to_string(),
1631                file: PathBuf::from("(project)"),
1632                function: "(project-level)".to_string(),
1633                line: 0,
1634                message: format!("Removed call edge: {} -> {}", from, to),
1635                evidence: serde_json::json!({
1636                    "change": "removed",
1637                    "from": from,
1638                    "to": to,
1639                }),
1640                confidence: Some("DETERMINISTIC".to_string()),
1641                finding_id: Some(compute_finding_id(
1642                    "call-graph-change",
1643                    Path::new("(project)"),
1644                    &format!("removed:{}:{}", from, to),
1645                    0,
1646                )),
1647            });
1648        }
1649
1650        // Summary finding at medium severity if many new edges
1651        if new_edges.len() > 5 {
1652            findings.push(BugbotFinding {
1653                finding_type: "call-graph-change".to_string(),
1654                severity: "medium".to_string(),
1655                file: PathBuf::from("(project)"),
1656                function: "(project-level)".to_string(),
1657                line: 0,
1658                message: format!(
1659                    "Significant call graph change: {} new edges, {} removed edges",
1660                    new_edges.len(),
1661                    removed_edges.len(),
1662                ),
1663                evidence: serde_json::json!({
1664                    "new_edge_count": new_edges.len(),
1665                    "removed_edge_count": removed_edges.len(),
1666                }),
1667                confidence: Some("DETERMINISTIC".to_string()),
1668                finding_id: Some(compute_finding_id(
1669                    "call-graph-change",
1670                    Path::new("(project)"),
1671                    "(summary)",
1672                    0,
1673                )),
1674            });
1675        }
1676
1677        findings
1678    }
1679
1680    /// Diff module dependencies between baseline and current.
1681    ///
1682    /// Compares `circular_dependencies` arrays: new circular deps get "high"
1683    /// severity. Compares `internal_dependencies` counts: significant increase
1684    /// gets "medium".
1685    ///
1686    /// The actual `tldr deps --format json` schema uses:
1687    /// ```json
1688    /// {
1689    ///   "internal_dependencies": { "file.rs": ["dep1.rs", "dep2.rs"], ... },
1690    ///   "circular_dependencies": [{ "path": ["a.rs", "b.rs", "a.rs"], "len": 3 }, ...],
1691    ///   "stats": { "total_internal_deps": 42, ... }
1692    /// }
1693    /// ```
1694    fn diff_deps_json(
1695        &self,
1696        baseline: &serde_json::Value,
1697        current: &serde_json::Value,
1698    ) -> Vec<BugbotFinding> {
1699        let mut findings = Vec::new();
1700
1701        // Extract circular dependencies as sets of sorted module lists.
1702        // Each circular dep is an object with a "path" array of module names.
1703        let extract_circular = |json: &serde_json::Value| -> std::collections::HashSet<String> {
1704            let mut set = std::collections::HashSet::new();
1705            if let Some(circs) = json.get("circular_dependencies").and_then(|v| v.as_array()) {
1706                for circ in circs {
1707                    // Each circular dep is an object: { "path": ["a.rs", "b.rs"], "len": N }
1708                    if let Some(path) = circ.get("path").and_then(|v| v.as_array()) {
1709                        let mut names: Vec<String> = path
1710                            .iter()
1711                            .filter_map(|m| m.as_str().map(|s| s.to_string()))
1712                            .collect();
1713                        names.sort();
1714                        set.insert(names.join(","));
1715                    }
1716                }
1717            }
1718            set
1719        };
1720
1721        let baseline_circular = extract_circular(baseline);
1722        let current_circular = extract_circular(current);
1723
1724        // New circular dependencies = high severity regression
1725        let new_circular: Vec<&String> = current_circular.difference(&baseline_circular).collect();
1726        for circ in &new_circular {
1727            findings.push(BugbotFinding {
1728                finding_type: "dependency-change".to_string(),
1729                severity: "high".to_string(),
1730                file: PathBuf::from("(project)"),
1731                function: "(project-level)".to_string(),
1732                line: 0,
1733                message: format!("New circular dependency detected: {}", circ),
1734                evidence: serde_json::json!({
1735                    "change": "new_circular",
1736                    "modules": circ,
1737                }),
1738                confidence: Some("DETERMINISTIC".to_string()),
1739                finding_id: Some(compute_finding_id(
1740                    "dependency-change",
1741                    Path::new("(project)"),
1742                    &format!("circular:{}", circ),
1743                    0,
1744                )),
1745            });
1746        }
1747
1748        // Compare internal dependency counts.
1749        // `internal_dependencies` is a dict (file -> [deps]), so count total deps
1750        // across all files. Alternatively, use `stats.total_internal_deps` if available.
1751        let count_internal_deps = |json: &serde_json::Value| -> usize {
1752            // Prefer stats.total_internal_deps for accuracy
1753            if let Some(total) = json.get("stats")
1754                .and_then(|s| s.get("total_internal_deps"))
1755                .and_then(|v| v.as_u64())
1756            {
1757                return total as usize;
1758            }
1759            // Fallback: sum up all dependency arrays in the dict
1760            json.get("internal_dependencies")
1761                .and_then(|v| v.as_object())
1762                .map(|obj| obj.values()
1763                    .filter_map(|v| v.as_array())
1764                    .map(|a| a.len())
1765                    .sum())
1766                .unwrap_or(0)
1767        };
1768
1769        let baseline_dep_count = count_internal_deps(baseline);
1770        let current_dep_count = count_internal_deps(current);
1771
1772        if current_dep_count > baseline_dep_count {
1773            let increase = current_dep_count - baseline_dep_count;
1774            // Significant increase = more than 20% growth or >5 new deps
1775            if increase > 5 || (baseline_dep_count > 0 && increase * 100 / baseline_dep_count > 20) {
1776                findings.push(BugbotFinding {
1777                    finding_type: "dependency-change".to_string(),
1778                    severity: "medium".to_string(),
1779                    file: PathBuf::from("(project)"),
1780                    function: "(project-level)".to_string(),
1781                    line: 0,
1782                    message: format!(
1783                        "Internal dependency count increased: {} -> {} (+{})",
1784                        baseline_dep_count, current_dep_count, increase,
1785                    ),
1786                    evidence: serde_json::json!({
1787                        "change": "dependency_count_increase",
1788                        "baseline_count": baseline_dep_count,
1789                        "current_count": current_dep_count,
1790                        "increase": increase,
1791                    }),
1792                    confidence: Some("DETERMINISTIC".to_string()),
1793                    finding_id: Some(compute_finding_id(
1794                        "dependency-change",
1795                        Path::new("(project)"),
1796                        "(dep-count)",
1797                        0,
1798                    )),
1799                });
1800            }
1801        }
1802
1803        findings
1804    }
1805
1806    /// Diff coupling metrics between baseline and current.
1807    ///
1808    /// Builds maps of `module -> {ca, ce, instability}` from `martin_metrics`
1809    /// arrays. Flags modules where instability increased or efferent coupling
1810    /// (ce) increased significantly.
1811    ///
1812    /// Note: No longer called by `analyze_flow_commands` (coupling is skipped
1813    /// because it requires file pairs, not a project root), but retained for
1814    /// diffing raw `tldr coupling` JSON output in other contexts and tested
1815    /// directly.
1816    pub fn diff_coupling_json(
1817        &self,
1818        baseline: &serde_json::Value,
1819        current: &serde_json::Value,
1820    ) -> Vec<BugbotFinding> {
1821        let mut findings = Vec::new();
1822
1823        let extract_metrics = |json: &serde_json::Value| -> std::collections::HashMap<String, (f64, f64, f64)> {
1824            let mut map = std::collections::HashMap::new();
1825            if let Some(metrics) = json.get("martin_metrics").and_then(|v| v.as_array()) {
1826                for entry in metrics {
1827                    let module = entry.get("module").and_then(|v| v.as_str()).unwrap_or("");
1828                    if module.is_empty() {
1829                        continue;
1830                    }
1831                    let ca = entry.get("ca").and_then(|v| v.as_f64()).unwrap_or(0.0);
1832                    let ce = entry.get("ce").and_then(|v| v.as_f64()).unwrap_or(0.0);
1833                    let instability = entry.get("instability").and_then(|v| v.as_f64()).unwrap_or(0.0);
1834                    map.insert(module.to_string(), (ca, ce, instability));
1835                }
1836            }
1837            map
1838        };
1839
1840        let baseline_metrics = extract_metrics(baseline);
1841        let current_metrics = extract_metrics(current);
1842
1843        for (module, (_, curr_ce, curr_instability)) in &current_metrics {
1844            if let Some((_, base_ce, base_instability)) = baseline_metrics.get(module) {
1845                // Flag instability increase
1846                let instability_delta = curr_instability - base_instability;
1847                let ce_delta = curr_ce - base_ce;
1848
1849                if instability_delta > 0.05 || ce_delta > 2.0 {
1850                    let severity = if instability_delta > 0.3 || ce_delta > 5.0 {
1851                        "high"
1852                    } else if instability_delta > 0.1 || ce_delta > 3.0 {
1853                        "medium"
1854                    } else {
1855                        "low"
1856                    };
1857
1858                    findings.push(BugbotFinding {
1859                        finding_type: "coupling-increase".to_string(),
1860                        severity: severity.to_string(),
1861                        file: PathBuf::from("(project)"),
1862                        function: "(project-level)".to_string(),
1863                        line: 0,
1864                        message: format!(
1865                            "Module '{}': instability {:.2} -> {:.2} (delta {:.2}), ce {} -> {}",
1866                            module, base_instability, curr_instability, instability_delta,
1867                            base_ce, curr_ce,
1868                        ),
1869                        evidence: serde_json::json!({
1870                            "module": module,
1871                            "baseline_instability": base_instability,
1872                            "current_instability": curr_instability,
1873                            "instability_delta": instability_delta,
1874                            "baseline_ce": base_ce,
1875                            "current_ce": curr_ce,
1876                            "ce_delta": ce_delta,
1877                        }),
1878                        confidence: Some("DETERMINISTIC".to_string()),
1879                        finding_id: Some(compute_finding_id(
1880                            "coupling-increase",
1881                            Path::new("(project)"),
1882                            module,
1883                            0,
1884                        )),
1885                    });
1886                }
1887            }
1888        }
1889
1890        findings
1891    }
1892
1893    /// Diff class cohesion (LCOM4) between baseline and current.
1894    ///
1895    /// Builds maps of `class name -> lcom4` from `classes` arrays.
1896    /// LCOM4 increase = less cohesive = regression. New classes with
1897    /// high LCOM4 (>3) get "info" findings.
1898    ///
1899    /// The actual `tldr cohesion --format json` schema uses:
1900    /// ```json
1901    /// { "classes": [{ "class_name": "Foo", "lcom4": 3, ... }] }
1902    /// ```
1903    fn diff_cohesion_json(
1904        &self,
1905        baseline: &serde_json::Value,
1906        current: &serde_json::Value,
1907    ) -> Vec<BugbotFinding> {
1908        let mut findings = Vec::new();
1909
1910        let extract_lcom4 = |json: &serde_json::Value| -> std::collections::HashMap<String, f64> {
1911            let mut map = std::collections::HashMap::new();
1912            if let Some(classes) = json.get("classes").and_then(|v| v.as_array()) {
1913                for cls in classes {
1914                    // Try "class_name" first (actual schema), fall back to "name"
1915                    let name = cls.get("class_name")
1916                        .or_else(|| cls.get("name"))
1917                        .and_then(|v| v.as_str())
1918                        .unwrap_or("");
1919                    if name.is_empty() {
1920                        continue;
1921                    }
1922                    let lcom4 = cls.get("lcom4").and_then(|v| v.as_f64()).unwrap_or(0.0);
1923                    map.insert(name.to_string(), lcom4);
1924                }
1925            }
1926            map
1927        };
1928
1929        let baseline_lcom = extract_lcom4(baseline);
1930        let current_lcom = extract_lcom4(current);
1931
1932        for (class_name, curr_lcom4) in &current_lcom {
1933            if let Some(base_lcom4) = baseline_lcom.get(class_name) {
1934                // LCOM4 increase = cohesion decrease = regression
1935                let delta = curr_lcom4 - base_lcom4;
1936                if delta > 0.5 {
1937                    let severity = if delta > 3.0 {
1938                        "high"
1939                    } else if delta > 1.0 {
1940                        "medium"
1941                    } else {
1942                        "low"
1943                    };
1944
1945                    findings.push(BugbotFinding {
1946                        finding_type: "cohesion-decrease".to_string(),
1947                        severity: severity.to_string(),
1948                        file: PathBuf::from("(project)"),
1949                        function: "(project-level)".to_string(),
1950                        line: 0,
1951                        message: format!(
1952                            "Class '{}': LCOM4 increased {} -> {} (less cohesive)",
1953                            class_name, base_lcom4, curr_lcom4,
1954                        ),
1955                        evidence: serde_json::json!({
1956                            "class": class_name,
1957                            "baseline_lcom4": base_lcom4,
1958                            "current_lcom4": curr_lcom4,
1959                            "delta": delta,
1960                        }),
1961                        confidence: Some("DETERMINISTIC".to_string()),
1962                        finding_id: Some(compute_finding_id(
1963                            "cohesion-decrease",
1964                            Path::new("(project)"),
1965                            class_name,
1966                            0,
1967                        )),
1968                    });
1969                }
1970            } else {
1971                // New class: flag if LCOM4 is high
1972                if *curr_lcom4 > 3.0 {
1973                    findings.push(BugbotFinding {
1974                        finding_type: "cohesion-decrease".to_string(),
1975                        severity: "info".to_string(),
1976                        file: PathBuf::from("(project)"),
1977                        function: "(project-level)".to_string(),
1978                        line: 0,
1979                        message: format!(
1980                            "New class '{}' has high LCOM4 ({}): consider splitting",
1981                            class_name, curr_lcom4,
1982                        ),
1983                        evidence: serde_json::json!({
1984                            "class": class_name,
1985                            "lcom4": curr_lcom4,
1986                            "new_class": true,
1987                        }),
1988                        confidence: Some("DETERMINISTIC".to_string()),
1989                        finding_id: Some(compute_finding_id(
1990                            "cohesion-decrease",
1991                            Path::new("(project)"),
1992                            class_name,
1993                            0,
1994                        )),
1995                    });
1996                }
1997            }
1998        }
1999
2000        findings
2001    }
2002
2003    /// Count dead code entries from `tldr dead` JSON output.
2004    ///
2005    /// The actual output uses `"dead_functions"` and `"possibly_dead"` arrays,
2006    /// plus a `"total_count"` field for convenience.
2007    fn count_dead_code_entries(json: &serde_json::Value) -> usize {
2008        // Try the summary field first
2009        if let Some(total) = json.get("total_count").and_then(|v| v.as_u64()) {
2010            return total as usize;
2011        }
2012        // Fallback: count array entries
2013        for key in &["dead_functions", "possibly_dead", "dead_code", "unreachable", "functions", "results"] {
2014            if let Some(arr) = json.get(key).and_then(|v| v.as_array()) {
2015                return arr.len();
2016            }
2017        }
2018        if let Some(arr) = json.as_array() {
2019            return arr.len();
2020        }
2021        0
2022    }
2023
2024    /// Derive module-level dependency information from a call-graph JSON.
2025    ///
2026    /// Reads `calls_json["edges"]`, groups cross-file edges into a dependency
2027    /// map (`src_file -> [dst_file, ...]`), and detects circular dependencies
2028    /// (A depends on B AND B depends on A).
2029    ///
2030    /// Returns a JSON value with `internal_dependencies`, `circular_dependencies`,
2031    /// and `stats.total_internal_deps`.
2032    pub fn derive_deps_from_calls(calls_json: &serde_json::Value) -> serde_json::Value {
2033        let empty_edges: Vec<serde_json::Value> = Vec::new();
2034        let edges = calls_json
2035            .get("edges")
2036            .and_then(|v| v.as_array())
2037            .unwrap_or(&empty_edges);
2038
2039        // Build dependency map: src_file -> BTreeSet<dst_file>
2040        let mut dep_map: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
2041        for edge in edges {
2042            let src_file = edge.get("src_file").and_then(|v| v.as_str()).unwrap_or("");
2043            let dst_file = edge.get("dst_file").and_then(|v| v.as_str()).unwrap_or("");
2044            // Skip intra-file edges
2045            if src_file.is_empty() || dst_file.is_empty() || src_file == dst_file {
2046                continue;
2047            }
2048            dep_map
2049                .entry(src_file.to_string())
2050                .or_default()
2051                .insert(dst_file.to_string());
2052        }
2053
2054        // Count total unique dependency pairs
2055        let total_internal_deps: usize = dep_map.values().map(|s| s.len()).sum();
2056
2057        // Detect circular dependencies: A depends on B AND B depends on A
2058        let mut circular: Vec<serde_json::Value> = Vec::new();
2059        let mut seen_cycles: BTreeSet<(String, String)> = BTreeSet::new();
2060        for (src, destinations) in &dep_map {
2061            for dst in destinations {
2062                if let Some(reverse_deps) = dep_map.get(dst) {
2063                    if reverse_deps.contains(src) {
2064                        let (a, b) = if src < dst {
2065                            (src.clone(), dst.clone())
2066                        } else {
2067                            (dst.clone(), src.clone())
2068                        };
2069                        if seen_cycles.insert((a.clone(), b.clone())) {
2070                            circular.push(serde_json::json!({
2071                                "path": [a, b]
2072                            }));
2073                        }
2074                    }
2075                }
2076            }
2077        }
2078
2079        // Build internal_dependencies as JSON object with sorted arrays
2080        let internal_deps: serde_json::Map<String, serde_json::Value> = dep_map
2081            .into_iter()
2082            .map(|(k, v)| {
2083                let arr: Vec<serde_json::Value> = v
2084                    .into_iter()
2085                    .map(serde_json::Value::String)
2086                    .collect();
2087                (k, serde_json::Value::Array(arr))
2088            })
2089            .collect();
2090
2091        serde_json::json!({
2092            "internal_dependencies": internal_deps,
2093            "circular_dependencies": circular,
2094            "stats": {
2095                "total_internal_deps": total_internal_deps
2096            }
2097        })
2098    }
2099
2100    /// Derive Martin coupling metrics (Ca, Ce, Instability) from a call-graph JSON.
2101    ///
2102    /// For each cross-file edge, increments efferent coupling (Ce) for the caller
2103    /// file and afferent coupling (Ca) for the callee file. Uses sets for
2104    /// deduplication: Ce counts unique destination files, Ca counts unique source
2105    /// files. Instability = Ce / (Ca + Ce).
2106    pub fn derive_coupling_from_calls(calls_json: &serde_json::Value) -> serde_json::Value {
2107        let empty_edges: Vec<serde_json::Value> = Vec::new();
2108        let edges = calls_json
2109            .get("edges")
2110            .and_then(|v| v.as_array())
2111            .unwrap_or(&empty_edges);
2112
2113        // Ce: for each module, the set of unique modules it calls (efferent)
2114        let mut ce_map: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
2115        // Ca: for each module, the set of unique modules that call into it (afferent)
2116        let mut ca_map: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
2117
2118        for edge in edges {
2119            let src_file = edge.get("src_file").and_then(|v| v.as_str()).unwrap_or("");
2120            let dst_file = edge.get("dst_file").and_then(|v| v.as_str()).unwrap_or("");
2121            // Skip intra-file edges
2122            if src_file.is_empty() || dst_file.is_empty() || src_file == dst_file {
2123                continue;
2124            }
2125            ce_map
2126                .entry(src_file.to_string())
2127                .or_default()
2128                .insert(dst_file.to_string());
2129            ca_map
2130                .entry(dst_file.to_string())
2131                .or_default()
2132                .insert(src_file.to_string());
2133        }
2134
2135        // Collect all modules
2136        let mut all_modules: BTreeSet<String> = BTreeSet::new();
2137        for k in ce_map.keys() {
2138            all_modules.insert(k.clone());
2139        }
2140        for k in ca_map.keys() {
2141            all_modules.insert(k.clone());
2142        }
2143
2144        let mut metrics: Vec<serde_json::Value> = Vec::new();
2145        for module in &all_modules {
2146            let ca = ca_map.get(module).map_or(0, |s| s.len());
2147            let ce = ce_map.get(module).map_or(0, |s| s.len());
2148            let instability = if ca + ce == 0 {
2149                0.0
2150            } else {
2151                ce as f64 / (ca + ce) as f64
2152            };
2153            metrics.push(serde_json::json!({
2154                "module": module,
2155                "ca": ca,
2156                "ce": ce,
2157                "instability": instability
2158            }));
2159        }
2160
2161        serde_json::json!({
2162            "martin_metrics": metrics
2163        })
2164    }
2165
2166    /// Derive downstream impact metrics for a set of changed files from a call-graph JSON.
2167    ///
2168    /// For each changed file, finds all cross-file edges where that file is the
2169    /// callee (`dst_file`). Counts importers, direct callers, and affected test
2170    /// files (using a path/name heuristic). Always returns one entry per changed
2171    /// file, even when counts are zero.
2172    pub fn derive_downstream_from_calls(
2173        calls_json: &serde_json::Value,
2174        changed_files: &[&str],
2175    ) -> Vec<(String, serde_json::Value)> {
2176        let empty_edges: Vec<serde_json::Value> = Vec::new();
2177        let edges = calls_json
2178            .get("edges")
2179            .and_then(|v| v.as_array())
2180            .unwrap_or(&empty_edges);
2181
2182        let mut results: Vec<(String, serde_json::Value)> = Vec::new();
2183
2184        for &changed_file in changed_files {
2185            let mut importers: BTreeSet<String> = BTreeSet::new();
2186            let mut test_importers: BTreeSet<String> = BTreeSet::new();
2187
2188            for edge in edges {
2189                let src_file = edge.get("src_file").and_then(|v| v.as_str()).unwrap_or("");
2190                let dst_file = edge.get("dst_file").and_then(|v| v.as_str()).unwrap_or("");
2191
2192                // Only count unique source files calling INTO the changed file
2193                if dst_file == changed_file && src_file != changed_file && !src_file.is_empty() {
2194                    importers.insert(src_file.to_string());
2195                    if src_file.contains("test") {
2196                        test_importers.insert(src_file.to_string());
2197                    }
2198                }
2199            }
2200
2201            let importer_count = importers.len() as u64;
2202            let affected_test_count = test_importers.len() as u64;
2203
2204            results.push((
2205                changed_file.to_string(),
2206                serde_json::json!({
2207                    "importer_count": importer_count,
2208                    "direct_caller_count": importer_count,
2209                    "affected_test_count": affected_test_count
2210                }),
2211            ));
2212        }
2213
2214        results
2215    }
2216}
2217
2218/// Compute a deterministic finding ID from the finding's key fields.
2219///
2220/// Uses `DefaultHasher` (SipHash) over `(finding_type, file_path, function_name, line)`
2221/// and formats the result as a lowercase hex string.
2222fn compute_finding_id(finding_type: &str, file: &Path, function: &str, line: usize) -> String {
2223    let mut hasher = DefaultHasher::new();
2224    finding_type.hash(&mut hasher);
2225    file.to_string_lossy().as_ref().hash(&mut hasher);
2226    function.hash(&mut hasher);
2227    line.hash(&mut hasher);
2228    format!("{:x}", hasher.finish())
2229}
2230
2231impl Default for TldrDifferentialEngine {
2232    fn default() -> Self {
2233        Self::new()
2234    }
2235}
2236
2237impl L2Engine for TldrDifferentialEngine {
2238    fn name(&self) -> &'static str {
2239        "TldrDifferentialEngine"
2240    }
2241
2242    fn finding_types(&self) -> &[&'static str] {
2243        FINDING_TYPES
2244    }
2245
2246    fn analyze(&self, ctx: &L2Context) -> L2AnalyzerOutput {
2247        let start = Instant::now();
2248        let mut all_findings = Vec::new();
2249        let mut partial_reasons = Vec::new();
2250
2251        // === LOCAL commands: per-file analysis (parallelized across cores) ===
2252        let work_items: Vec<_> = ctx
2253            .changed_files
2254            .iter()
2255            .filter_map(|file_path| {
2256                let baseline = ctx.baseline_contents.get(file_path)?;
2257                let current = ctx.current_contents.get(file_path)?;
2258                Some((file_path, baseline.as_str(), current.as_str()))
2259            })
2260            .collect();
2261
2262        let functions_skipped = ctx.changed_files.len() - work_items.len();
2263        let functions_analyzed = work_items.len();
2264
2265        let num_threads = std::thread::available_parallelism()
2266            .map(|n| n.get())
2267            .unwrap_or(1)
2268            .min(work_items.len().max(1));
2269
2270        if num_threads <= 1 || work_items.len() <= 1 {
2271            for (file_path, baseline_src, current_src) in &work_items {
2272                let mut file_reasons = Vec::new();
2273                let file_findings =
2274                    self.analyze_local_commands(file_path, baseline_src, current_src, &mut file_reasons);
2275                all_findings.extend(file_findings);
2276                partial_reasons.extend(file_reasons);
2277            }
2278        } else {
2279            let chunk_size = work_items.len().div_ceil(num_threads);
2280            std::thread::scope(|s| {
2281                let handles: Vec<_> = work_items
2282                    .chunks(chunk_size)
2283                    .map(|chunk| {
2284                        s.spawn(move || {
2285                            let mut findings = Vec::new();
2286                            let mut reasons = Vec::new();
2287                            for (file_path, baseline_src, current_src) in chunk {
2288                                let file_findings = self.analyze_local_commands(
2289                                    file_path,
2290                                    baseline_src,
2291                                    current_src,
2292                                    &mut reasons,
2293                                );
2294                                findings.extend(file_findings);
2295                            }
2296                            (findings, reasons)
2297                        })
2298                    })
2299                    .collect();
2300
2301                for handle in handles {
2302                    if let Ok((findings, reasons)) = handle.join() {
2303                        all_findings.extend(findings);
2304                        partial_reasons.extend(reasons);
2305                    }
2306                }
2307            });
2308        }
2309
2310        // === Run `tldr calls` ONCE for the current project ===
2311        let language_str = ctx.language.as_str();
2312        let calls_engine = TldrDifferentialEngine::with_timeout(300);
2313        let current_calls_json = calls_engine
2314            .run_tldr_flow_command("calls", &["calls"], &ctx.project, language_str)
2315            .ok();
2316
2317        // === FLOW commands: project-wide analysis ===
2318        let flow_findings = self.analyze_flow_commands(
2319            &ctx.project,
2320            &ctx.base_ref,
2321            language_str,
2322            current_calls_json.as_ref(),
2323            &mut partial_reasons,
2324        );
2325        all_findings.extend(flow_findings);
2326
2327        // === IMPACT commands: downstream dependency analysis ===
2328        let impact_findings = self.analyze_downstream_impact(
2329            &ctx.project,
2330            &ctx.changed_files,
2331            language_str,
2332            current_calls_json.as_ref(),
2333            &mut partial_reasons,
2334        );
2335        all_findings.extend(impact_findings);
2336
2337        let func_impact_findings = self.analyze_function_impact(
2338            &ctx.project,
2339            &ctx.changed_files,
2340            language_str,
2341            current_calls_json.as_ref(),
2342            &mut partial_reasons,
2343        );
2344        all_findings.extend(func_impact_findings);
2345
2346        let duration_ms = start.elapsed().as_millis() as u64;
2347
2348        let status = if partial_reasons.is_empty() {
2349            AnalyzerStatus::Complete
2350        } else {
2351            AnalyzerStatus::Partial {
2352                reason: partial_reasons.join("; "),
2353            }
2354        };
2355
2356        L2AnalyzerOutput {
2357            findings: all_findings,
2358            status,
2359            duration_ms,
2360            functions_analyzed,
2361            functions_skipped,
2362        }
2363    }
2364}
2365
2366#[cfg(test)]
2367mod tests {
2368    use super::*;
2369    use crate::commands::bugbot::l2::context::{FunctionDiff, L2Context};
2370    use std::collections::HashMap;
2371    use std::path::PathBuf;
2372    use tldr_core::Language;
2373
2374    fn empty_context() -> L2Context {
2375        L2Context::new(
2376            PathBuf::from("/tmp/test-project"),
2377            Language::Rust,
2378            vec![],
2379            FunctionDiff {
2380                changed: vec![],
2381                inserted: vec![],
2382                deleted: vec![],
2383            },
2384            HashMap::new(),
2385            HashMap::new(),
2386            HashMap::new(),
2387        )
2388    }
2389
2390    // =========================================================================
2391    // Engine metadata tests
2392    // =========================================================================
2393
2394    #[test]
2395    fn test_engine_name() {
2396        let engine = TldrDifferentialEngine::new();
2397        assert_eq!(engine.name(), "TldrDifferentialEngine");
2398    }
2399
2400    #[test]
2401    fn test_finding_types() {
2402        let engine = TldrDifferentialEngine::new();
2403        let types = engine.finding_types();
2404        assert_eq!(types.len(), 11);
2405        assert!(types.contains(&"complexity-increase"));
2406        assert!(types.contains(&"cognitive-increase"));
2407        assert!(types.contains(&"contract-removed"));
2408        assert!(types.contains(&"smell-introduced"));
2409        assert!(types.contains(&"call-graph-change"));
2410        assert!(types.contains(&"dependency-change"));
2411        assert!(types.contains(&"coupling-increase"));
2412        assert!(types.contains(&"cohesion-decrease"));
2413        assert!(types.contains(&"dead-code-introduced"));
2414        assert!(types.contains(&"downstream-impact"));
2415        assert!(types.contains(&"breaking-change-risk"));
2416    }
2417
2418    #[test]
2419    fn test_default() {
2420        let engine = TldrDifferentialEngine::default();
2421        assert_eq!(engine.name(), "TldrDifferentialEngine");
2422        assert_eq!(engine.timeout_secs, 30);
2423    }
2424
2425    #[test]
2426    fn test_with_timeout() {
2427        let engine = TldrDifferentialEngine::with_timeout(60);
2428        assert_eq!(engine.timeout_secs, 60);
2429    }
2430
2431    #[test]
2432    fn test_languages_empty() {
2433        let engine = TldrDifferentialEngine::new();
2434        assert!(
2435            engine.languages().is_empty(),
2436            "TldrDifferentialEngine is language-agnostic"
2437        );
2438    }
2439
2440    // =========================================================================
2441    // Empty context behavior
2442    // =========================================================================
2443
2444    #[test]
2445    fn test_empty_context() {
2446        let engine = TldrDifferentialEngine::new();
2447        let ctx = empty_context();
2448        let output = engine.analyze(&ctx);
2449
2450        assert!(
2451            output.findings.is_empty(),
2452            "Empty context should produce no findings"
2453        );
2454        assert_eq!(output.functions_analyzed, 0);
2455        assert_eq!(output.functions_skipped, 0);
2456        assert!(output.duration_ms < 5000, "Should complete quickly");
2457    }
2458
2459    #[test]
2460    fn test_empty_context_status() {
2461        let engine = TldrDifferentialEngine::new();
2462        let ctx = empty_context();
2463        let output = engine.analyze(&ctx);
2464
2465        // With no changed files, local commands produce Complete.
2466        // Flow commands may produce Partial if tldr isn't on PATH, but the
2467        // status check is for the overall output shape.
2468        // We accept either Complete or Partial here since flow commands run
2469        // on project root and may fail on /tmp/test-project.
2470        match &output.status {
2471            AnalyzerStatus::Complete => {} // ideal
2472            AnalyzerStatus::Partial { .. } => {} // acceptable (flow command failures)
2473            other => panic!("Unexpected status: {:?}", other),
2474        }
2475    }
2476
2477    // =========================================================================
2478    // Graceful degradation when tldr not available
2479    // =========================================================================
2480
2481    #[test]
2482    fn test_run_tldr_command_not_found() {
2483        // Use a nonexistent binary name to simulate tldr not on PATH
2484        // We test the error handling path directly
2485        let engine = TldrDifferentialEngine::new();
2486        let result = engine.run_tldr_command(&["complexity"], Path::new("/dev/null"));
2487
2488        // Should return an error, not panic
2489        // The result may be an error (binary not found) or success with empty
2490        // output depending on environment. Either way, no panic.
2491        match result {
2492            Ok(_) => {} // tldr is on PATH and ran, that's fine
2493            Err(e) => {
2494                assert!(
2495                    !e.is_empty(),
2496                    "Error message should not be empty"
2497                );
2498            }
2499        }
2500    }
2501
2502    // =========================================================================
2503    // Trait object safety
2504    // =========================================================================
2505
2506    #[test]
2507    fn test_as_trait_object() {
2508        let engine: Box<dyn L2Engine> = Box::new(TldrDifferentialEngine::new());
2509        assert_eq!(engine.name(), "TldrDifferentialEngine");
2510        assert_eq!(engine.finding_types().len(), 11);
2511        assert!(engine.languages().is_empty());
2512    }
2513
2514    // =========================================================================
2515    // Finding ID determinism
2516    // =========================================================================
2517
2518    #[test]
2519    fn test_finding_id_deterministic() {
2520        let id1 = compute_finding_id("complexity-increase", Path::new("a.py"), "foo", 10);
2521        let id2 = compute_finding_id("complexity-increase", Path::new("a.py"), "foo", 10);
2522        assert_eq!(id1, id2);
2523    }
2524
2525    #[test]
2526    fn test_finding_id_differs_for_different_inputs() {
2527        let id1 = compute_finding_id("complexity-increase", Path::new("a.py"), "foo", 10);
2528        let id2 = compute_finding_id("complexity-increase", Path::new("a.py"), "bar", 10);
2529        assert_ne!(id1, id2);
2530    }
2531
2532    // =========================================================================
2533    // Diff logic unit tests (using mock JSON)
2534    // =========================================================================
2535
2536    #[test]
2537    fn test_diff_numeric_metrics_increase_detected() {
2538        let engine = TldrDifferentialEngine::new();
2539
2540        let baseline = serde_json::json!({
2541            "functions": [
2542                { "name": "process", "cyclomatic": 2, "line": 1 }
2543            ]
2544        });
2545        let current = serde_json::json!({
2546            "functions": [
2547                { "name": "process", "cyclomatic": 10, "line": 1 }
2548            ]
2549        });
2550
2551        let findings = engine.diff_numeric_metrics(
2552            "complexity-increase",
2553            "cyclomatic",
2554            Path::new("src/lib.py"),
2555            &baseline,
2556            &current,
2557        );
2558
2559        assert!(!findings.is_empty(), "Should detect cyclomatic increase");
2560        assert_eq!(findings[0].finding_type, "complexity-increase");
2561        assert_eq!(findings[0].confidence, Some("DETERMINISTIC".to_string()));
2562        assert!(findings[0].finding_id.is_some());
2563
2564        // Verify severity: 2 -> 10 = +400%, should be "high"
2565        assert_eq!(findings[0].severity, "high");
2566
2567        let evidence = &findings[0].evidence;
2568        assert_eq!(evidence["old_value"], 2.0);
2569        assert_eq!(evidence["new_value"], 10.0);
2570        assert_eq!(evidence["delta"], 8.0);
2571    }
2572
2573    #[test]
2574    fn test_diff_numeric_metrics_decrease_not_flagged() {
2575        let engine = TldrDifferentialEngine::new();
2576
2577        let baseline = serde_json::json!({
2578            "functions": [
2579                { "name": "process", "cyclomatic": 10, "line": 1 }
2580            ]
2581        });
2582        let current = serde_json::json!({
2583            "functions": [
2584                { "name": "process", "cyclomatic": 2, "line": 1 }
2585            ]
2586        });
2587
2588        let findings = engine.diff_numeric_metrics(
2589            "complexity-increase",
2590            "cyclomatic",
2591            Path::new("src/lib.py"),
2592            &baseline,
2593            &current,
2594        );
2595
2596        assert!(
2597            findings.is_empty(),
2598            "Decrease should not produce a finding"
2599        );
2600    }
2601
2602    #[test]
2603    fn test_diff_numeric_metrics_new_function_info() {
2604        let engine = TldrDifferentialEngine::new();
2605
2606        let baseline = serde_json::json!({
2607            "functions": []
2608        });
2609        let current = serde_json::json!({
2610            "functions": [
2611                { "name": "new_func", "cyclomatic": 15, "line": 5 }
2612            ]
2613        });
2614
2615        let findings = engine.diff_numeric_metrics(
2616            "complexity-increase",
2617            "cyclomatic",
2618            Path::new("src/lib.py"),
2619            &baseline,
2620            &current,
2621        );
2622
2623        assert!(!findings.is_empty(), "New function with high metric should be reported");
2624        assert_eq!(findings[0].severity, "info");
2625        assert!(findings[0].evidence["new_function"].as_bool().unwrap_or(false));
2626    }
2627
2628    #[test]
2629    fn test_diff_numeric_metrics_no_change() {
2630        let engine = TldrDifferentialEngine::new();
2631
2632        let baseline = serde_json::json!({
2633            "functions": [
2634                { "name": "process", "cyclomatic": 5, "line": 1 }
2635            ]
2636        });
2637        let current = serde_json::json!({
2638            "functions": [
2639                { "name": "process", "cyclomatic": 5, "line": 1 }
2640            ]
2641        });
2642
2643        let findings = engine.diff_numeric_metrics(
2644            "complexity-increase",
2645            "cyclomatic",
2646            Path::new("src/lib.py"),
2647            &baseline,
2648            &current,
2649        );
2650
2651        assert!(
2652            findings.is_empty(),
2653            "No change should produce no findings"
2654        );
2655    }
2656
2657    #[test]
2658    fn test_diff_contracts_removed() {
2659        let engine = TldrDifferentialEngine::new();
2660
2661        let baseline = serde_json::json!({
2662            "functions": [
2663                {
2664                    "name": "validate",
2665                    "preconditions": [{"expr": "x > 0"}],
2666                    "postconditions": [{"expr": "result >= 0"}]
2667                }
2668            ]
2669        });
2670        let current = serde_json::json!({
2671            "functions": [
2672                {
2673                    "name": "validate",
2674                    "preconditions": [],
2675                    "postconditions": []
2676                }
2677            ]
2678        });
2679
2680        let findings = engine.diff_contracts(
2681            Path::new("src/lib.py"),
2682            &baseline,
2683            &current,
2684            &["validate".to_string()],
2685        );
2686
2687        assert!(!findings.is_empty(), "Should detect removed contracts");
2688        assert_eq!(findings[0].finding_type, "contract-removed");
2689        assert_eq!(findings[0].severity, "medium");
2690        assert_eq!(findings[0].evidence["removed"], 2);
2691    }
2692
2693    #[test]
2694    fn test_diff_contracts_function_deleted() {
2695        let engine = TldrDifferentialEngine::new();
2696
2697        let baseline = serde_json::json!({
2698            "functions": [
2699                {
2700                    "name": "validate",
2701                    "preconditions": [{"expr": "x > 0"}],
2702                    "postconditions": []
2703                }
2704            ]
2705        });
2706        let current = serde_json::json!({
2707            "functions": []
2708        });
2709
2710        // Pass empty known_current_funcs so "validate" is genuinely absent
2711        let findings = engine.diff_contracts(
2712            Path::new("src/lib.py"),
2713            &baseline,
2714            &current,
2715            &[],
2716        );
2717
2718        assert!(!findings.is_empty(), "Should detect deleted function with contracts");
2719        assert_eq!(findings[0].severity, "high");
2720        assert!(findings[0].evidence["function_deleted"].as_bool().unwrap_or(false));
2721    }
2722
2723    #[test]
2724    fn test_diff_contracts_extraction_failure_not_treated_as_deletion() {
2725        let engine = TldrDifferentialEngine::new();
2726
2727        let baseline = serde_json::json!({
2728            "functions": [
2729                {
2730                    "name": "validate",
2731                    "preconditions": [{"expr": "x > 0"}],
2732                    "postconditions": []
2733                }
2734            ]
2735        });
2736        // Current JSON has no entries for "validate" (extraction failed),
2737        // but the function still exists in the current version.
2738        let current = serde_json::json!({
2739            "functions": []
2740        });
2741
2742        // "validate" is in known_current_funcs — extraction failed, not deleted
2743        let findings = engine.diff_contracts(
2744            Path::new("src/lib.rs"),
2745            &baseline,
2746            &current,
2747            &["validate".to_string()],
2748        );
2749
2750        assert!(findings.is_empty(), "Should NOT emit contract-removed when function exists but extraction failed");
2751    }
2752
2753    #[test]
2754    fn test_diff_smells_introduced() {
2755        let engine = TldrDifferentialEngine::new();
2756
2757        let baseline = serde_json::json!({
2758            "smells": [
2759                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 }
2760            ]
2761        });
2762        let current = serde_json::json!({
2763            "smells": [
2764                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 },
2765                { "smell_type": "god_class", "name": "Handler", "line": 20, "reason": "too many methods", "severity": 2 }
2766            ]
2767        });
2768
2769        let findings = engine.diff_smells(
2770            Path::new("src/lib.py"),
2771            &baseline,
2772            &current,
2773        );
2774
2775        assert!(!findings.is_empty(), "Should detect introduced smell");
2776        assert_eq!(findings[0].finding_type, "smell-introduced");
2777        assert_eq!(findings[0].severity, "medium"); // god_class is structural → medium
2778        assert_eq!(findings[0].evidence["introduced"], 1);
2779        // Verify smell_type is correctly extracted (not "unknown")
2780        assert_eq!(findings[0].evidence["smell_type"], "god_class");
2781        assert!(findings[0].message.contains("god_class"));
2782    }
2783
2784    #[test]
2785    fn test_diff_smells_no_regression() {
2786        let engine = TldrDifferentialEngine::new();
2787
2788        let baseline = serde_json::json!({
2789            "smells": [
2790                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 }
2791            ]
2792        });
2793        let current = serde_json::json!({
2794            "smells": [
2795                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 }
2796            ]
2797        });
2798
2799        let findings = engine.diff_smells(
2800            Path::new("src/lib.py"),
2801            &baseline,
2802            &current,
2803        );
2804
2805        assert!(findings.is_empty(), "Same smells should produce no findings");
2806    }
2807
2808    #[test]
2809    fn test_diff_smells_new_file_baseline_empty() {
2810        let engine = TldrDifferentialEngine::new();
2811
2812        // New file: baseline has no smells, current has many.
2813        // This is NOT a regression — all code is new, so no findings should fire.
2814        let baseline = serde_json::json!({ "smells": [] });
2815        let current = serde_json::json!({
2816            "smells": [
2817                { "smell_type": "god_class", "name": "BigEngine", "line": 10, "reason": "too big", "severity": 2 },
2818                { "smell_type": "long_method", "name": "run", "line": 50, "reason": "too long", "severity": 1 },
2819                { "smell_type": "long_method", "name": "analyze", "line": 200, "reason": "too long", "severity": 1 }
2820            ]
2821        });
2822
2823        let findings = engine.diff_smells(
2824            Path::new("src/new_module.rs"),
2825            &baseline,
2826            &current,
2827        );
2828
2829        assert!(findings.is_empty(), "New file (empty baseline) should not trigger smell-introduced");
2830    }
2831
2832    #[test]
2833    fn test_diff_smells_real_tldr_schema() {
2834        // Test with exact JSON schema produced by `tldr smells --format json`
2835        let engine = TldrDifferentialEngine::new();
2836
2837        let baseline = serde_json::json!({
2838            "smells": [
2839                {
2840                    "smell_type": "long_method",
2841                    "file": "src/engine.rs",
2842                    "name": "analyze",
2843                    "line": 100,
2844                    "reason": "Method has 52 lines of code (threshold: 50)",
2845                    "severity": 1
2846                }
2847            ],
2848            "files_scanned": 1,
2849            "by_file": {},
2850            "summary": { "total": 1 }
2851        });
2852        let current = serde_json::json!({
2853            "smells": [
2854                {
2855                    "smell_type": "long_method",
2856                    "file": "src/engine.rs",
2857                    "name": "analyze",
2858                    "line": 100,
2859                    "reason": "Method has 80 lines of code (threshold: 50)",
2860                    "severity": 2
2861                },
2862                {
2863                    "smell_type": "feature_envy",
2864                    "file": "src/engine.rs",
2865                    "name": "diff_metrics",
2866                    "line": 200,
2867                    "reason": "Method accesses 5 foreign fields",
2868                    "severity": 1
2869                },
2870                {
2871                    "smell_type": "data_clump",
2872                    "file": "src/engine.rs",
2873                    "name": "analyze_batch",
2874                    "line": 300,
2875                    "reason": "3 parameters always appear together",
2876                    "severity": 1
2877                }
2878            ],
2879            "files_scanned": 1,
2880            "by_file": {},
2881            "summary": { "total": 3 }
2882        });
2883
2884        let findings = engine.diff_smells(
2885            Path::new("src/engine.rs"),
2886            &baseline,
2887            &current,
2888        );
2889
2890        assert_eq!(findings.len(), 2, "Should detect 2 introduced smells");
2891        // Verify types are extracted from smell_type field (not "unknown")
2892        let types: Vec<&str> = findings.iter().map(|f| f.evidence["smell_type"].as_str().unwrap()).collect();
2893        assert!(types.contains(&"feature_envy"), "Should extract feature_envy type");
2894        assert!(types.contains(&"data_clump"), "Should extract data_clump type");
2895        // Structural smells should be medium severity
2896        assert!(findings.iter().all(|f| f.severity == "medium"), "Structural smells should be medium severity");
2897        // None should be "unknown"
2898        assert!(!types.contains(&"unknown"), "No smell should have type 'unknown'");
2899    }
2900
2901    #[test]
2902    fn test_diff_smells_suppressed_types_filtered() {
2903        let engine = TldrDifferentialEngine::new();
2904
2905        let baseline = serde_json::json!({
2906            "smells": [
2907                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 }
2908            ]
2909        });
2910        // Introduce only suppressed smell types (message_chain, long_parameter_list)
2911        let current = serde_json::json!({
2912            "smells": [
2913                { "smell_type": "long_method", "name": "process", "line": 1, "reason": "too long", "severity": 1 },
2914                { "smell_type": "message_chain", "name": "chain", "line": 50, "reason": "chain length 4", "severity": 1 },
2915                { "smell_type": "long_parameter_list", "name": "many_params", "line": 80, "reason": "6 params", "severity": 1 }
2916            ]
2917        });
2918
2919        let findings = engine.diff_smells(
2920            Path::new("src/lib.rs"),
2921            &baseline,
2922            &current,
2923        );
2924
2925        assert!(findings.is_empty(), "Suppressed smell types should produce no findings");
2926    }
2927
2928    #[test]
2929    fn test_extract_function_entries_from_functions_key() {
2930        let json = serde_json::json!({
2931            "functions": [
2932                { "name": "foo", "value": 1 },
2933                { "name": "bar", "value": 2 }
2934            ]
2935        });
2936
2937        let entries = TldrDifferentialEngine::extract_function_entries(&json);
2938        assert_eq!(entries.len(), 2);
2939        assert_eq!(entries[0].0, "foo");
2940        assert_eq!(entries[1].0, "bar");
2941    }
2942
2943    #[test]
2944    fn test_extract_function_entries_from_root_array() {
2945        let json = serde_json::json!([
2946            { "name": "foo", "value": 1 },
2947            { "name": "bar", "value": 2 }
2948        ]);
2949
2950        let entries = TldrDifferentialEngine::extract_function_entries(&json);
2951        assert_eq!(entries.len(), 2);
2952    }
2953
2954    #[test]
2955    fn test_extract_function_entries_empty() {
2956        let json = serde_json::json!({ "other": 42 });
2957        let entries = TldrDifferentialEngine::extract_function_entries(&json);
2958        assert!(entries.is_empty());
2959    }
2960
2961    #[test]
2962    fn test_count_dead_code_entries() {
2963        let json = serde_json::json!({
2964            "dead_code": [
2965                { "name": "unused_fn", "file": "src/lib.rs" },
2966                { "name": "old_helper", "file": "src/utils.rs" }
2967            ]
2968        });
2969        assert_eq!(TldrDifferentialEngine::count_dead_code_entries(&json), 2);
2970    }
2971
2972    #[test]
2973    fn test_count_dead_code_entries_empty() {
2974        let json = serde_json::json!({ "dead_code": [] });
2975        assert_eq!(TldrDifferentialEngine::count_dead_code_entries(&json), 0);
2976    }
2977
2978    #[test]
2979    fn test_severity_thresholds() {
2980        let engine = TldrDifferentialEngine::new();
2981
2982        // >50% increase = high
2983        let high = serde_json::json!({ "functions": [{ "name": "f", "metric": 2.0, "line": 1 }] });
2984        let high_curr = serde_json::json!({ "functions": [{ "name": "f", "metric": 10.0, "line": 1 }] });
2985        let findings = engine.diff_numeric_metrics("test-increase", "metric", Path::new("a.py"), &high, &high_curr);
2986        assert_eq!(findings[0].severity, "high");
2987
2988        // 20-50% increase = medium
2989        let med = serde_json::json!({ "functions": [{ "name": "f", "metric": 10.0, "line": 1 }] });
2990        let med_curr = serde_json::json!({ "functions": [{ "name": "f", "metric": 14.0, "line": 1 }] });
2991        let findings = engine.diff_numeric_metrics("test-increase", "metric", Path::new("a.py"), &med, &med_curr);
2992        assert_eq!(findings[0].severity, "medium");
2993
2994        // <20% increase = low
2995        let low = serde_json::json!({ "functions": [{ "name": "f", "metric": 10.0, "line": 1 }] });
2996        let low_curr = serde_json::json!({ "functions": [{ "name": "f", "metric": 11.0, "line": 1 }] });
2997        let findings = engine.diff_numeric_metrics("test-increase", "metric", Path::new("a.py"), &low, &low_curr);
2998        assert_eq!(findings[0].severity, "low");
2999    }
3000
3001    #[test]
3002    fn test_cognitive_delta_threshold_filters_trivial() {
3003        let engine = TldrDifferentialEngine::new();
3004
3005        // Cognitive delta of 2 (below threshold of 3) should be suppressed
3006        let baseline = serde_json::json!({ "functions": [{ "name": "f", "cognitive": 2.0, "line": 1 }] });
3007        let current = serde_json::json!({ "functions": [{ "name": "f", "cognitive": 4.0, "line": 1 }] });
3008        let findings = engine.diff_numeric_metrics("cognitive-increase", "cognitive", Path::new("a.rs"), &baseline, &current);
3009        assert!(findings.is_empty(), "Cognitive delta of 2 should be suppressed (threshold 3)");
3010
3011        // Cognitive delta of 3 (at threshold) should be reported
3012        let baseline = serde_json::json!({ "functions": [{ "name": "g", "cognitive": 5.0, "line": 1 }] });
3013        let current = serde_json::json!({ "functions": [{ "name": "g", "cognitive": 8.0, "line": 1 }] });
3014        let findings = engine.diff_numeric_metrics("cognitive-increase", "cognitive", Path::new("a.rs"), &baseline, &current);
3015        assert_eq!(findings.len(), 1, "Cognitive delta of 3 should be reported");
3016
3017        // Complexity delta of 1 (below threshold of 2) should be suppressed
3018        let baseline = serde_json::json!({ "functions": [{ "name": "h", "cyclomatic": 3.0, "line": 1 }] });
3019        let current = serde_json::json!({ "functions": [{ "name": "h", "cyclomatic": 4.0, "line": 1 }] });
3020        let findings = engine.diff_numeric_metrics("complexity-increase", "cyclomatic", Path::new("a.rs"), &baseline, &current);
3021        assert!(findings.is_empty(), "Complexity delta of 1 should be suppressed (threshold 2)");
3022
3023        // Complexity delta of 2 (at threshold) should be reported
3024        let baseline = serde_json::json!({ "functions": [{ "name": "j", "cyclomatic": 3.0, "line": 1 }] });
3025        let current = serde_json::json!({ "functions": [{ "name": "j", "cyclomatic": 5.0, "line": 1 }] });
3026        let findings = engine.diff_numeric_metrics("complexity-increase", "cyclomatic", Path::new("a.rs"), &baseline, &current);
3027        assert_eq!(findings.len(), 1, "Complexity delta of 2 should be reported");
3028    }
3029
3030    // =========================================================================
3031    // Integration test: complexity diff via actual tldr binary
3032    // =========================================================================
3033
3034    #[test]
3035    fn test_complexity_diff_real_tldr() {
3036        // Skip this test if tldr is not on PATH
3037        if Command::new("tldr").arg("--version").output().is_err() {
3038            eprintln!("Skipping test_complexity_diff_real_tldr: tldr not on PATH");
3039            return;
3040        }
3041
3042        let engine = TldrDifferentialEngine::with_timeout(10);
3043
3044        // Create a temp dir with baseline and current Python files
3045        let tmp_dir = TempDir::new().expect("create tmpdir");
3046        let baseline_file = tmp_dir.path().join("baseline.py");
3047        let current_file = tmp_dir.path().join("current.py");
3048
3049        std::fs::write(
3050            &baseline_file,
3051            "def process(x):\n    return x + 1\n",
3052        ).expect("write baseline");
3053
3054        std::fs::write(
3055            &current_file,
3056            "def process(x):\n    if x > 10:\n        if x > 20:\n            return x * 3\n        return x * 2\n    return x\n",
3057        ).expect("write current");
3058
3059        // Run complexity command on both
3060        let baseline_result = engine.run_tldr_command(&["complexity"], &baseline_file);
3061        let current_result = engine.run_tldr_command(&["complexity"], &current_file);
3062
3063        // Both should succeed (tldr is on PATH)
3064        match (baseline_result, current_result) {
3065            (Ok(baseline_json), Ok(current_json)) => {
3066                // The JSON should be parseable
3067                assert!(baseline_json.is_object() || baseline_json.is_array());
3068                assert!(current_json.is_object() || current_json.is_array());
3069            }
3070            (Err(e), _) => {
3071                // Acceptable: tldr might not support the command or file type
3072                eprintln!("Baseline complexity failed (acceptable): {}", e);
3073            }
3074            (_, Err(e)) => {
3075                eprintln!("Current complexity failed (acceptable): {}", e);
3076            }
3077        }
3078    }
3079
3080    // =========================================================================
3081    // TLDR_COMMANDS config tests
3082    // =========================================================================
3083
3084    #[test]
3085    fn test_tldr_commands_count() {
3086        assert_eq!(TLDR_COMMANDS.len(), 9);
3087    }
3088
3089    #[test]
3090    fn test_tldr_commands_local_count() {
3091        let local_count = TLDR_COMMANDS.iter().filter(|c| c.category == TldrCategory::Local).count();
3092        assert_eq!(local_count, 4);
3093    }
3094
3095    #[test]
3096    fn test_tldr_commands_flow_count() {
3097        let flow_count = TLDR_COMMANDS.iter().filter(|c| c.category == TldrCategory::Flow).count();
3098        assert_eq!(flow_count, 5);
3099    }
3100
3101    #[test]
3102    fn test_finding_types_match_commands() {
3103        // Every TLDR_COMMANDS entry should have a corresponding finding type.
3104        // FINDING_TYPES also includes "downstream-impact" and "breaking-change-risk"
3105        // which come from whatbreaks/impact commands (not in TLDR_COMMANDS).
3106        assert_eq!(FINDING_TYPES.len(), TLDR_COMMANDS.len() + 2);
3107        // Verify the extra types are the impact ones
3108        assert!(FINDING_TYPES.contains(&"downstream-impact"));
3109        assert!(FINDING_TYPES.contains(&"breaking-change-risk"));
3110    }
3111
3112    // =========================================================================
3113    // Flow command baseline diffing: diff_calls_json
3114    // =========================================================================
3115
3116    #[test]
3117    fn test_diff_calls_new_edges_detected() {
3118        let engine = TldrDifferentialEngine::new();
3119        let baseline = serde_json::json!({
3120            "edges": [{"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}],
3121            "edge_count": 1
3122        });
3123        let current = serde_json::json!({
3124            "edges": [
3125                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"},
3126                {"src_file": "a.rs", "src_func": "foo", "dst_file": "c.rs", "dst_func": "baz", "call_type": "direct"}
3127            ],
3128            "edge_count": 2
3129        });
3130        let findings = engine.diff_calls_json(&baseline, &current);
3131        assert!(!findings.is_empty(), "Should detect new call graph edge");
3132        assert_eq!(findings[0].finding_type, "call-graph-change");
3133        assert_eq!(findings[0].confidence, Some("DETERMINISTIC".to_string()));
3134        assert!(findings[0].finding_id.is_some());
3135    }
3136
3137    #[test]
3138    fn test_diff_calls_no_change() {
3139        let engine = TldrDifferentialEngine::new();
3140        let json = serde_json::json!({
3141            "edges": [{"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}],
3142            "edge_count": 1
3143        });
3144        let findings = engine.diff_calls_json(&json, &json);
3145        assert!(findings.is_empty(), "No change should produce no findings");
3146    }
3147
3148    #[test]
3149    fn test_diff_calls_removed_edge_reported() {
3150        let engine = TldrDifferentialEngine::new();
3151        let baseline = serde_json::json!({
3152            "edges": [
3153                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"},
3154                {"src_file": "a.rs", "src_func": "foo", "dst_file": "c.rs", "dst_func": "baz", "call_type": "direct"}
3155            ],
3156            "edge_count": 2
3157        });
3158        let current = serde_json::json!({
3159            "edges": [{"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}],
3160            "edge_count": 1
3161        });
3162        let findings = engine.diff_calls_json(&baseline, &current);
3163        assert!(!findings.is_empty(), "Should detect removed call graph edge");
3164        assert_eq!(findings[0].finding_type, "call-graph-change");
3165    }
3166
3167    #[test]
3168    fn test_diff_calls_many_new_edges_medium_severity() {
3169        let engine = TldrDifferentialEngine::new();
3170        let baseline = serde_json::json!({
3171            "edges": [{"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}],
3172            "edge_count": 1
3173        });
3174        // Add 6 new edges (>5 threshold for medium severity)
3175        let current = serde_json::json!({
3176            "edges": [
3177                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"},
3178                {"src_file": "a.rs", "src_func": "foo", "dst_file": "c.rs", "dst_func": "baz", "call_type": "direct"},
3179                {"src_file": "a.rs", "src_func": "foo", "dst_file": "d.rs", "dst_func": "qux", "call_type": "direct"},
3180                {"src_file": "a.rs", "src_func": "foo", "dst_file": "e.rs", "dst_func": "quux", "call_type": "direct"},
3181                {"src_file": "b.rs", "src_func": "bar", "dst_file": "c.rs", "dst_func": "baz", "call_type": "direct"},
3182                {"src_file": "b.rs", "src_func": "bar", "dst_file": "d.rs", "dst_func": "qux", "call_type": "direct"},
3183                {"src_file": "b.rs", "src_func": "bar", "dst_file": "e.rs", "dst_func": "quux", "call_type": "direct"}
3184            ],
3185            "edge_count": 7
3186        });
3187        let findings = engine.diff_calls_json(&baseline, &current);
3188        assert!(!findings.is_empty());
3189        // At least one finding should have medium severity when >5 new edges
3190        let has_medium = findings.iter().any(|f| f.severity == "medium");
3191        assert!(has_medium, "Should produce a medium-severity summary finding for >5 new edges");
3192    }
3193
3194    // =========================================================================
3195    // Flow command baseline diffing: diff_deps_json
3196    // =========================================================================
3197
3198    #[test]
3199    fn test_diff_deps_new_circular_dep_high_severity() {
3200        let engine = TldrDifferentialEngine::new();
3201        let baseline = serde_json::json!({
3202            "internal_dependencies": {"a.rs": ["b.rs"]},
3203            "circular_dependencies": [],
3204            "stats": {"total_internal_deps": 1}
3205        });
3206        let current = serde_json::json!({
3207            "internal_dependencies": {"a.rs": ["b.rs"], "b.rs": ["a.rs"]},
3208            "circular_dependencies": [{"path": ["a.rs", "b.rs", "a.rs"], "len": 3}],
3209            "stats": {"total_internal_deps": 2}
3210        });
3211        let findings = engine.diff_deps_json(&baseline, &current);
3212        assert!(!findings.is_empty(), "Should detect new circular dependency");
3213        assert_eq!(findings[0].finding_type, "dependency-change");
3214        assert_eq!(findings[0].severity, "high");
3215    }
3216
3217    #[test]
3218    fn test_diff_deps_no_change() {
3219        let engine = TldrDifferentialEngine::new();
3220        let json = serde_json::json!({
3221            "internal_dependencies": {"a.rs": ["b.rs"]},
3222            "circular_dependencies": [],
3223            "stats": {"total_internal_deps": 1}
3224        });
3225        let findings = engine.diff_deps_json(&json, &json);
3226        assert!(findings.is_empty(), "No change should produce no findings");
3227    }
3228
3229    #[test]
3230    fn test_diff_deps_removed_circular_not_flagged() {
3231        let engine = TldrDifferentialEngine::new();
3232        let baseline = serde_json::json!({
3233            "internal_dependencies": {"a.rs": ["b.rs"], "b.rs": ["a.rs"]},
3234            "circular_dependencies": [{"path": ["a.rs", "b.rs", "a.rs"], "len": 3}],
3235            "stats": {"total_internal_deps": 2}
3236        });
3237        let current = serde_json::json!({
3238            "internal_dependencies": {"a.rs": ["b.rs"]},
3239            "circular_dependencies": [],
3240            "stats": {"total_internal_deps": 1}
3241        });
3242        let findings = engine.diff_deps_json(&baseline, &current);
3243        // Removing a circular dependency is an improvement, not a regression
3244        let has_high = findings.iter().any(|f| f.severity == "high");
3245        assert!(!has_high, "Removing circular dependency should not produce high severity finding");
3246    }
3247
3248    #[test]
3249    fn test_diff_deps_internal_deps_dict_count() {
3250        // Verify that internal_dependencies as a dict is counted correctly
3251        let engine = TldrDifferentialEngine::new();
3252        let baseline = serde_json::json!({
3253            "internal_dependencies": {"a.rs": ["b.rs"]},
3254            "circular_dependencies": [],
3255            "stats": {"total_internal_deps": 1}
3256        });
3257        let current = serde_json::json!({
3258            "internal_dependencies": {"a.rs": ["b.rs", "c.rs", "d.rs", "e.rs", "f.rs", "g.rs", "h.rs"]},
3259            "circular_dependencies": [],
3260            "stats": {"total_internal_deps": 7}
3261        });
3262        let findings = engine.diff_deps_json(&baseline, &current);
3263        assert!(!findings.is_empty(), "Should detect dependency count increase of 6 (>5 threshold)");
3264        assert_eq!(findings[0].finding_type, "dependency-change");
3265        assert_eq!(findings[0].severity, "medium");
3266    }
3267
3268    #[test]
3269    fn test_diff_deps_fallback_to_dict_counting_without_stats() {
3270        // When stats.total_internal_deps is missing, fall back to counting dict entries
3271        let engine = TldrDifferentialEngine::new();
3272        let baseline = serde_json::json!({
3273            "internal_dependencies": {"a.rs": ["b.rs"]},
3274            "circular_dependencies": []
3275        });
3276        let current = serde_json::json!({
3277            "internal_dependencies": {"a.rs": ["b.rs", "c.rs", "d.rs", "e.rs", "f.rs", "g.rs", "h.rs"]},
3278            "circular_dependencies": []
3279        });
3280        let findings = engine.diff_deps_json(&baseline, &current);
3281        assert!(!findings.is_empty(), "Should detect dependency count increase even without stats field");
3282    }
3283
3284    // =========================================================================
3285    // Flow command baseline diffing: diff_coupling_json
3286    // =========================================================================
3287
3288    #[test]
3289    fn test_diff_coupling_instability_increase_detected() {
3290        let engine = TldrDifferentialEngine::new();
3291        let baseline = serde_json::json!({
3292            "martin_metrics": [
3293                {"module": "core", "ca": 5, "ce": 2, "instability": 0.29, "abstractness": 0.1}
3294            ],
3295            "pairwise_coupling": []
3296        });
3297        let current = serde_json::json!({
3298            "martin_metrics": [
3299                {"module": "core", "ca": 5, "ce": 8, "instability": 0.62, "abstractness": 0.1}
3300            ],
3301            "pairwise_coupling": []
3302        });
3303        let findings = engine.diff_coupling_json(&baseline, &current);
3304        assert!(!findings.is_empty(), "Should detect instability increase");
3305        assert_eq!(findings[0].finding_type, "coupling-increase");
3306    }
3307
3308    #[test]
3309    fn test_diff_coupling_no_change() {
3310        let engine = TldrDifferentialEngine::new();
3311        let json = serde_json::json!({
3312            "martin_metrics": [
3313                {"module": "core", "ca": 5, "ce": 2, "instability": 0.29, "abstractness": 0.1}
3314            ],
3315            "pairwise_coupling": []
3316        });
3317        let findings = engine.diff_coupling_json(&json, &json);
3318        assert!(findings.is_empty(), "No change should produce no findings");
3319    }
3320
3321    #[test]
3322    fn test_diff_coupling_improvement_not_flagged() {
3323        let engine = TldrDifferentialEngine::new();
3324        let baseline = serde_json::json!({
3325            "martin_metrics": [
3326                {"module": "core", "ca": 5, "ce": 8, "instability": 0.62, "abstractness": 0.1}
3327            ],
3328            "pairwise_coupling": []
3329        });
3330        let current = serde_json::json!({
3331            "martin_metrics": [
3332                {"module": "core", "ca": 5, "ce": 2, "instability": 0.29, "abstractness": 0.1}
3333            ],
3334            "pairwise_coupling": []
3335        });
3336        let findings = engine.diff_coupling_json(&baseline, &current);
3337        assert!(findings.is_empty(), "Coupling decrease should not produce findings");
3338    }
3339
3340    // =========================================================================
3341    // Flow command baseline diffing: diff_cohesion_json
3342    // =========================================================================
3343
3344    #[test]
3345    fn test_diff_cohesion_lcom4_increase_detected() {
3346        let engine = TldrDifferentialEngine::new();
3347        let baseline = serde_json::json!({
3348            "classes": [
3349                {"class_name": "Engine", "lcom4": 1, "method_count": 5, "field_count": 3}
3350            ],
3351            "summary": {"total_classes": 1}
3352        });
3353        let current = serde_json::json!({
3354            "classes": [
3355                {"class_name": "Engine", "lcom4": 4, "method_count": 8, "field_count": 3}
3356            ],
3357            "summary": {"total_classes": 1}
3358        });
3359        let findings = engine.diff_cohesion_json(&baseline, &current);
3360        assert!(!findings.is_empty(), "Should detect LCOM4 increase");
3361        assert_eq!(findings[0].finding_type, "cohesion-decrease");
3362    }
3363
3364    #[test]
3365    fn test_diff_cohesion_no_change() {
3366        let engine = TldrDifferentialEngine::new();
3367        let json = serde_json::json!({
3368            "classes": [
3369                {"class_name": "Engine", "lcom4": 2, "method_count": 5, "field_count": 3}
3370            ],
3371            "summary": {"total_classes": 1}
3372        });
3373        let findings = engine.diff_cohesion_json(&json, &json);
3374        assert!(findings.is_empty(), "No change should produce no findings");
3375    }
3376
3377    #[test]
3378    fn test_diff_cohesion_improvement_not_flagged() {
3379        let engine = TldrDifferentialEngine::new();
3380        let baseline = serde_json::json!({
3381            "classes": [
3382                {"class_name": "Engine", "lcom4": 5, "method_count": 10, "field_count": 3}
3383            ],
3384            "summary": {"total_classes": 1}
3385        });
3386        let current = serde_json::json!({
3387            "classes": [
3388                {"class_name": "Engine", "lcom4": 1, "method_count": 4, "field_count": 3}
3389            ],
3390            "summary": {"total_classes": 1}
3391        });
3392        let findings = engine.diff_cohesion_json(&baseline, &current);
3393        assert!(findings.is_empty(), "LCOM4 decrease is an improvement, should not produce findings");
3394    }
3395
3396    #[test]
3397    fn test_diff_cohesion_new_class_high_lcom4_info() {
3398        let engine = TldrDifferentialEngine::new();
3399        let baseline = serde_json::json!({
3400            "classes": [],
3401            "summary": {"total_classes": 0}
3402        });
3403        let current = serde_json::json!({
3404            "classes": [
3405                {"class_name": "GodObject", "lcom4": 5, "method_count": 12, "field_count": 0, "verdict": "split_candidate"}
3406            ],
3407            "summary": {"total_classes": 1}
3408        });
3409        let findings = engine.diff_cohesion_json(&baseline, &current);
3410        assert!(!findings.is_empty(), "New class with high LCOM4 should be flagged");
3411        assert_eq!(findings[0].severity, "info");
3412    }
3413
3414    #[test]
3415    fn test_diff_cohesion_backward_compat_name_field() {
3416        // Verify backward compatibility: "name" field still works as fallback
3417        let engine = TldrDifferentialEngine::new();
3418        let baseline = serde_json::json!({
3419            "classes": [{"name": "Legacy", "lcom4": 1}],
3420            "summary": {"total_classes": 1}
3421        });
3422        let current = serde_json::json!({
3423            "classes": [{"name": "Legacy", "lcom4": 4}],
3424            "summary": {"total_classes": 1}
3425        });
3426        let findings = engine.diff_cohesion_json(&baseline, &current);
3427        assert!(!findings.is_empty(), "Should still work with 'name' field as fallback");
3428    }
3429
3430    // =========================================================================
3431    // L2Context base_ref field
3432    // =========================================================================
3433
3434    #[test]
3435    fn test_l2context_default_base_ref() {
3436        let ctx = empty_context();
3437        assert_eq!(ctx.base_ref, "HEAD", "Default base_ref should be HEAD");
3438    }
3439
3440    #[test]
3441    fn test_l2context_with_base_ref() {
3442        let ctx = empty_context().with_base_ref(String::from("main"));
3443        assert_eq!(ctx.base_ref, "main");
3444    }
3445
3446    // =========================================================================
3447    // analyze_flow_commands takes base_ref
3448    // =========================================================================
3449
3450    #[test]
3451    fn test_analyze_flow_commands_accepts_base_ref_and_language() {
3452        let engine = TldrDifferentialEngine::new();
3453        let mut partial_reasons = Vec::new();
3454        // Should not panic — graceful failure when project dir doesn't exist
3455        let _findings = engine.analyze_flow_commands(
3456            Path::new("/tmp/nonexistent-project-for-test"),
3457            "HEAD",
3458            "rust",
3459            None,
3460            &mut partial_reasons,
3461        );
3462        // Flow commands should fail gracefully on non-existent project
3463        // (either empty findings or partial_reasons populated, but no panic)
3464    }
3465
3466    // =========================================================================
3467    // run_tldr_flow_command: --lang and --respect-ignore filtering
3468    // =========================================================================
3469
3470    #[test]
3471    fn test_run_tldr_flow_command_exists() {
3472        // Verify the method signature exists and is callable
3473        let engine = TldrDifferentialEngine::new();
3474        // Calling with a nonexistent path should return Err, not panic
3475        let result = engine.run_tldr_flow_command(
3476            "calls",
3477            &["calls"],
3478            Path::new("/tmp/nonexistent-project"),
3479            "rust",
3480        );
3481        // Either Ok (if tldr is available) or Err (spawn/parse failure) — no panic
3482        let _ = result;
3483    }
3484
3485    #[test]
3486    fn test_run_tldr_flow_command_builds_args_with_lang() {
3487        // Verify the method constructs correct args by testing the public interface.
3488        // We test indirectly: the method should produce the same result as run_tldr_command
3489        // but with additional --lang and possibly --respect-ignore flags.
3490        // Since we can't inspect the internal args directly, we verify the method
3491        // is callable with various language strings.
3492        let engine = TldrDifferentialEngine::with_timeout(1);
3493
3494        for lang in &["python", "rust", "typescript", "go", "java"] {
3495            let result = engine.run_tldr_flow_command(
3496                "dead",
3497                &["dead"],
3498                Path::new("/tmp/nonexistent"),
3499                lang,
3500            );
3501            // Should not panic for any language
3502            let _ = result;
3503        }
3504    }
3505
3506    #[test]
3507    fn test_run_tldr_flow_command_calls_gets_respect_ignore() {
3508        // The `calls` command should get --respect-ignore.
3509        // We verify indirectly that the method distinguishes command names.
3510        let engine = TldrDifferentialEngine::with_timeout(1);
3511
3512        // Both should be callable without panic, but `calls` gets --respect-ignore
3513        let _calls_result = engine.run_tldr_flow_command(
3514            "calls",
3515            &["calls"],
3516            Path::new("/tmp/nonexistent"),
3517            "rust",
3518        );
3519        let _deps_result = engine.run_tldr_flow_command(
3520            "deps",
3521            &["deps"],
3522            Path::new("/tmp/nonexistent"),
3523            "rust",
3524        );
3525    }
3526
3527    // =========================================================================
3528    // Flow timeout: 300s for flow commands
3529    // =========================================================================
3530
3531    #[test]
3532    fn test_flow_engine_timeout_is_300s() {
3533        // The analyze method should use 300s timeout for flow commands,
3534        // not the artificial max(self.timeout_secs, 60).
3535        // We verify via analyze_flow_commands: the flow_engine inside uses 300s.
3536        // Since we can't inspect the internal flow_engine directly, we verify
3537        // that analyze_flow_commands completes without artificial timeout issues
3538        // by checking it uses a generous timeout.
3539        let engine = TldrDifferentialEngine::with_timeout(10);
3540        let mut partial_reasons = Vec::new();
3541        let _findings = engine.analyze_flow_commands(
3542            Path::new("/tmp/nonexistent-project"),
3543            "HEAD",
3544            "python",
3545            None,
3546            &mut partial_reasons,
3547        );
3548        // The fact that it runs without panic is sufficient;
3549        // the timeout change is an internal implementation detail.
3550    }
3551
3552    // =========================================================================
3553    // analyze() passes language to analyze_flow_commands
3554    // =========================================================================
3555
3556    #[test]
3557    fn test_analyze_passes_language_to_flow_commands() {
3558        // Verify that analyze() correctly derives language string from ctx.language
3559        // and passes it to flow commands.
3560        let engine = TldrDifferentialEngine::new();
3561        let ctx = L2Context::new(
3562            PathBuf::from("/tmp/test-project-lang"),
3563            Language::Python,
3564            vec![],
3565            FunctionDiff {
3566                changed: vec![],
3567                inserted: vec![],
3568                deleted: vec![],
3569            },
3570            HashMap::new(),
3571            HashMap::new(),
3572            HashMap::new(),
3573        );
3574        let output = engine.analyze(&ctx);
3575        // Should complete without panic. Flow commands will fail on /tmp path,
3576        // but the important thing is the language plumbing works.
3577        match &output.status {
3578            AnalyzerStatus::Complete => {}
3579            AnalyzerStatus::Partial { .. } => {}
3580            other => panic!("Unexpected status: {:?}", other),
3581        }
3582    }
3583
3584    // =========================================================================
3585    // downstream-impact (whatbreaks) parsing tests
3586    // =========================================================================
3587
3588    #[test]
3589    fn test_finding_types_includes_impact() {
3590        let engine = TldrDifferentialEngine::new();
3591        let types = engine.finding_types();
3592        assert!(
3593            types.contains(&"downstream-impact"),
3594            "FINDING_TYPES must include downstream-impact"
3595        );
3596        assert!(
3597            types.contains(&"breaking-change-risk"),
3598            "FINDING_TYPES must include breaking-change-risk"
3599        );
3600    }
3601
3602    #[test]
3603    fn test_downstream_impact_severity_high() {
3604        let json = serde_json::json!({
3605            "summary": {
3606                "importer_count": 15,
3607                "direct_caller_count": 3,
3608                "affected_test_count": 2
3609            }
3610        });
3611        let file = PathBuf::from("src/lib.rs");
3612        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3613        assert_eq!(findings.len(), 1);
3614        assert_eq!(findings[0].finding_type, "downstream-impact");
3615        assert_eq!(findings[0].severity, "high");
3616        assert_eq!(findings[0].function, "(file-level)");
3617        assert_eq!(findings[0].file, file);
3618        assert_eq!(
3619            findings[0].confidence.as_deref(),
3620            Some("DETERMINISTIC")
3621        );
3622        assert!(findings[0].finding_id.is_some());
3623
3624        // Verify evidence fields
3625        let ev = &findings[0].evidence;
3626        assert_eq!(ev["command"], "whatbreaks");
3627        assert_eq!(ev["importer_count"], 15);
3628        assert_eq!(ev["direct_caller_count"], 3);
3629        assert_eq!(ev["affected_test_count"], 2);
3630    }
3631
3632    #[test]
3633    fn test_downstream_impact_severity_medium() {
3634        let json = serde_json::json!({
3635            "summary": {
3636                "importer_count": 7,
3637                "direct_caller_count": 1,
3638                "affected_test_count": 0
3639            }
3640        });
3641        let file = PathBuf::from("src/core.rs");
3642        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3643        assert_eq!(findings.len(), 1);
3644        assert_eq!(findings[0].severity, "medium");
3645    }
3646
3647    #[test]
3648    fn test_downstream_impact_severity_low() {
3649        let json = serde_json::json!({
3650            "summary": {
3651                "importer_count": 2,
3652                "direct_caller_count": 0,
3653                "affected_test_count": 1
3654            }
3655        });
3656        let file = PathBuf::from("src/utils.rs");
3657        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3658        assert_eq!(findings.len(), 1);
3659        assert_eq!(findings[0].severity, "low");
3660    }
3661
3662    #[test]
3663    fn test_downstream_impact_no_findings_when_no_importers() {
3664        let json = serde_json::json!({
3665            "summary": {
3666                "importer_count": 0,
3667                "direct_caller_count": 0,
3668                "affected_test_count": 0
3669            }
3670        });
3671        let file = PathBuf::from("src/leaf.rs");
3672        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3673        assert!(
3674            findings.is_empty(),
3675            "Zero importers and zero callers should produce no findings"
3676        );
3677    }
3678
3679    #[test]
3680    fn test_downstream_impact_boundary_importer_3() {
3681        // importer_count == 3 is NOT > 3, so severity should be "low"
3682        let json = serde_json::json!({
3683            "summary": {
3684                "importer_count": 3,
3685                "direct_caller_count": 0,
3686                "affected_test_count": 0
3687            }
3688        });
3689        let file = PathBuf::from("src/boundary.rs");
3690        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3691        assert_eq!(findings.len(), 1);
3692        assert_eq!(findings[0].severity, "low");
3693    }
3694
3695    #[test]
3696    fn test_downstream_impact_boundary_importer_4() {
3697        // importer_count == 4 is > 3 but NOT > 10, so severity should be "medium"
3698        let json = serde_json::json!({
3699            "summary": {
3700                "importer_count": 4,
3701                "direct_caller_count": 0,
3702                "affected_test_count": 0
3703            }
3704        });
3705        let file = PathBuf::from("src/boundary4.rs");
3706        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3707        assert_eq!(findings.len(), 1);
3708        assert_eq!(findings[0].severity, "medium");
3709    }
3710
3711    #[test]
3712    fn test_downstream_impact_boundary_importer_10() {
3713        // importer_count == 10 is NOT > 10, so severity should be "medium"
3714        let json = serde_json::json!({
3715            "summary": {
3716                "importer_count": 10,
3717                "direct_caller_count": 0,
3718                "affected_test_count": 0
3719            }
3720        });
3721        let file = PathBuf::from("src/boundary10.rs");
3722        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3723        assert_eq!(findings.len(), 1);
3724        assert_eq!(findings[0].severity, "medium");
3725    }
3726
3727    #[test]
3728    fn test_downstream_impact_boundary_importer_11() {
3729        // importer_count == 11 is > 10, so severity should be "high"
3730        let json = serde_json::json!({
3731            "summary": {
3732                "importer_count": 11,
3733                "direct_caller_count": 0,
3734                "affected_test_count": 0
3735            }
3736        });
3737        let file = PathBuf::from("src/boundary11.rs");
3738        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3739        assert_eq!(findings.len(), 1);
3740        assert_eq!(findings[0].severity, "high");
3741    }
3742
3743    #[test]
3744    fn test_downstream_impact_callers_only() {
3745        // 0 importers but positive caller_count still emits a finding
3746        let json = serde_json::json!({
3747            "summary": {
3748                "importer_count": 0,
3749                "direct_caller_count": 5,
3750                "affected_test_count": 0
3751            }
3752        });
3753        let file = PathBuf::from("src/callers.rs");
3754        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3755        assert_eq!(findings.len(), 1);
3756        assert_eq!(findings[0].severity, "low");
3757        assert!(findings[0].message.contains("5 direct callers"));
3758    }
3759
3760    #[test]
3761    fn test_downstream_impact_summary_at_top_level() {
3762        // When summary fields are at top level (no "summary" wrapper)
3763        let json = serde_json::json!({
3764            "importer_count": 6,
3765            "direct_caller_count": 2,
3766            "affected_test_count": 1
3767        });
3768        let file = PathBuf::from("src/flat.rs");
3769        let findings = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3770        assert_eq!(findings.len(), 1);
3771        assert_eq!(findings[0].severity, "medium");
3772    }
3773
3774    // =========================================================================
3775    // breaking-change-risk (impact) parsing tests
3776    // =========================================================================
3777
3778    #[test]
3779    fn test_function_impact_high_severity() {
3780        let json = serde_json::json!({
3781            "targets": {
3782                "process_data": {
3783                    "caller_count": 8,
3784                    "callers": [
3785                        { "file": "main.rs", "function": "run" },
3786                        { "file": "handler.rs", "function": "handle" },
3787                        { "file": "api.rs", "function": "endpoint" },
3788                        { "file": "worker.rs", "function": "execute" },
3789                        { "file": "batch.rs", "function": "process_all" },
3790                        { "file": "test.rs", "function": "test_it" },
3791                    ]
3792                }
3793            }
3794        });
3795        let findings =
3796            TldrDifferentialEngine::parse_impact_findings("process_data", &json);
3797        assert_eq!(findings.len(), 1);
3798        assert_eq!(findings[0].finding_type, "breaking-change-risk");
3799        assert_eq!(findings[0].severity, "high");
3800        assert_eq!(findings[0].function, "process_data");
3801        assert_eq!(findings[0].file, PathBuf::from("(project)"));
3802        assert_eq!(
3803            findings[0].confidence.as_deref(),
3804            Some("DETERMINISTIC")
3805        );
3806        assert!(findings[0].finding_id.is_some());
3807
3808        // Verify evidence
3809        let ev = &findings[0].evidence;
3810        assert_eq!(ev["command"], "impact");
3811        assert_eq!(ev["caller_count"], 8);
3812        // callers_preview capped at 5
3813        let preview = ev["callers_preview"].as_array().unwrap();
3814        assert_eq!(preview.len(), 5);
3815    }
3816
3817    #[test]
3818    fn test_function_impact_medium_severity() {
3819        let json = serde_json::json!({
3820            "targets": {
3821                "helper_fn": {
3822                    "caller_count": 3,
3823                    "callers": [
3824                        { "file": "a.rs", "function": "foo" },
3825                        { "file": "b.rs", "function": "bar" },
3826                        { "file": "c.rs", "function": "baz" },
3827                    ]
3828                }
3829            }
3830        });
3831        let findings =
3832            TldrDifferentialEngine::parse_impact_findings("helper_fn", &json);
3833        assert_eq!(findings.len(), 1);
3834        assert_eq!(findings[0].severity, "medium");
3835    }
3836
3837    #[test]
3838    fn test_function_impact_info_severity() {
3839        let json = serde_json::json!({
3840            "targets": {
3841                "rare_fn": {
3842                    "caller_count": 1,
3843                    "callers": [
3844                        { "file": "only.rs", "function": "sole_caller" }
3845                    ]
3846                }
3847            }
3848        });
3849        let findings =
3850            TldrDifferentialEngine::parse_impact_findings("rare_fn", &json);
3851        assert_eq!(findings.len(), 1);
3852        assert_eq!(findings[0].severity, "info");
3853    }
3854
3855    #[test]
3856    fn test_function_impact_no_callers() {
3857        let json = serde_json::json!({
3858            "targets": {
3859                "leaf_fn": {
3860                    "caller_count": 0,
3861                    "callers": []
3862                }
3863            }
3864        });
3865        let findings =
3866            TldrDifferentialEngine::parse_impact_findings("leaf_fn", &json);
3867        assert!(
3868            findings.is_empty(),
3869            "Function with zero callers should produce no findings"
3870        );
3871    }
3872
3873    #[test]
3874    fn test_function_impact_missing_target() {
3875        // Function name not found in targets -- should produce no findings
3876        let json = serde_json::json!({
3877            "targets": {
3878                "other_fn": {
3879                    "caller_count": 5,
3880                    "callers": []
3881                }
3882            }
3883        });
3884        let findings =
3885            TldrDifferentialEngine::parse_impact_findings("missing_fn", &json);
3886        assert!(
3887            findings.is_empty(),
3888            "Missing target key should produce no findings"
3889        );
3890    }
3891
3892    #[test]
3893    fn test_function_impact_fallback_top_level() {
3894        // When caller data is at top level (no "targets" wrapper)
3895        let json = serde_json::json!({
3896            "caller_count": 4,
3897            "callers": [
3898                { "file": "x.rs", "function": "a" },
3899                { "file": "y.rs", "function": "b" },
3900                { "file": "z.rs", "function": "c" },
3901                { "file": "w.rs", "function": "d" },
3902            ]
3903        });
3904        let findings =
3905            TldrDifferentialEngine::parse_impact_findings("any_fn", &json);
3906        assert_eq!(findings.len(), 1);
3907        assert_eq!(findings[0].severity, "medium");
3908        assert_eq!(findings[0].evidence["caller_count"], 4);
3909    }
3910
3911    #[test]
3912    fn test_function_impact_boundary_caller_2() {
3913        // caller_count == 2 is >= 2, so severity should be "medium"
3914        let json = serde_json::json!({
3915            "targets": {
3916                "boundary_fn": {
3917                    "caller_count": 2,
3918                    "callers": [
3919                        { "file": "a.rs", "function": "x" },
3920                        { "file": "b.rs", "function": "y" },
3921                    ]
3922                }
3923            }
3924        });
3925        let findings =
3926            TldrDifferentialEngine::parse_impact_findings("boundary_fn", &json);
3927        assert_eq!(findings.len(), 1);
3928        assert_eq!(findings[0].severity, "medium");
3929    }
3930
3931    #[test]
3932    fn test_function_impact_boundary_caller_5() {
3933        // caller_count == 5 is NOT > 5, so severity should be "medium"
3934        let json = serde_json::json!({
3935            "targets": {
3936                "five_fn": {
3937                    "caller_count": 5,
3938                    "callers": []
3939                }
3940            }
3941        });
3942        let findings =
3943            TldrDifferentialEngine::parse_impact_findings("five_fn", &json);
3944        assert_eq!(findings.len(), 1);
3945        assert_eq!(findings[0].severity, "medium");
3946    }
3947
3948    #[test]
3949    fn test_function_impact_boundary_caller_6() {
3950        // caller_count == 6 is > 5, so severity should be "high"
3951        let json = serde_json::json!({
3952            "targets": {
3953                "six_fn": {
3954                    "caller_count": 6,
3955                    "callers": []
3956                }
3957            }
3958        });
3959        let findings =
3960            TldrDifferentialEngine::parse_impact_findings("six_fn", &json);
3961        assert_eq!(findings.len(), 1);
3962        assert_eq!(findings[0].severity, "high");
3963    }
3964
3965    #[test]
3966    fn test_downstream_impact_finding_id_deterministic() {
3967        // Same inputs should produce the same finding_id
3968        let json = serde_json::json!({
3969            "summary": {
3970                "importer_count": 5,
3971                "direct_caller_count": 2,
3972                "affected_test_count": 1
3973            }
3974        });
3975        let file = PathBuf::from("src/stable.rs");
3976        let findings1 = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3977        let findings2 = TldrDifferentialEngine::parse_whatbreaks_findings(&file, &json);
3978        assert_eq!(findings1[0].finding_id, findings2[0].finding_id);
3979    }
3980
3981    #[test]
3982    fn test_function_impact_finding_id_deterministic() {
3983        let json = serde_json::json!({
3984            "targets": {
3985                "stable_fn": {
3986                    "caller_count": 3,
3987                    "callers": []
3988                }
3989            }
3990        });
3991        let findings1 =
3992            TldrDifferentialEngine::parse_impact_findings("stable_fn", &json);
3993        let findings2 =
3994            TldrDifferentialEngine::parse_impact_findings("stable_fn", &json);
3995        assert_eq!(findings1[0].finding_id, findings2[0].finding_id);
3996    }
3997
3998    // =========================================================================
3999    // build_reverse_caller_map tests
4000    // =========================================================================
4001
4002    #[test]
4003    fn test_build_reverse_caller_map_basic() {
4004        // Two edges pointing to same dst_func "bar"
4005        // Expected: map has 1 key "bar" with 2 callers
4006        let json = serde_json::json!({
4007            "edges": [
4008                { "src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct" },
4009                { "src_file": "c.rs", "src_func": "baz", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct" }
4010            ]
4011        });
4012        let map = TldrDifferentialEngine::build_reverse_caller_map(&json);
4013        assert_eq!(map.len(), 1);
4014        assert_eq!(map["bar"].len(), 2);
4015        assert!(map["bar"].contains(&("a.rs".to_string(), "foo".to_string())));
4016        assert!(map["bar"].contains(&("c.rs".to_string(), "baz".to_string())));
4017    }
4018
4019    #[test]
4020    fn test_build_reverse_caller_map_multiple_targets() {
4021        // Edges to different dst_funcs
4022        let json = serde_json::json!({
4023            "edges": [
4024                { "src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct" },
4025                { "src_file": "c.rs", "src_func": "baz", "dst_file": "d.rs", "dst_func": "qux", "call_type": "direct" }
4026            ]
4027        });
4028        let map = TldrDifferentialEngine::build_reverse_caller_map(&json);
4029        assert_eq!(map.len(), 2);
4030        assert_eq!(map["bar"].len(), 1);
4031        assert_eq!(map["qux"].len(), 1);
4032    }
4033
4034    #[test]
4035    fn test_build_reverse_caller_map_empty_edges() {
4036        let json = serde_json::json!({ "edges": [] });
4037        let map = TldrDifferentialEngine::build_reverse_caller_map(&json);
4038        assert!(map.is_empty());
4039    }
4040
4041    #[test]
4042    fn test_build_reverse_caller_map_no_edges_key() {
4043        let json = serde_json::json!({ "nodes": [] });
4044        let map = TldrDifferentialEngine::build_reverse_caller_map(&json);
4045        assert!(map.is_empty());
4046    }
4047
4048    #[test]
4049    fn test_build_reverse_caller_map_malformed_edges_skipped() {
4050        // Edges missing required fields should be skipped
4051        let json = serde_json::json!({
4052            "edges": [
4053                { "src_file": "a.rs", "src_func": "foo" },
4054                { "src_func": "bar", "dst_func": "baz" },
4055                { "src_file": "valid.rs", "src_func": "caller", "dst_file": "t.rs", "dst_func": "target", "call_type": "direct" }
4056            ]
4057        });
4058        let map = TldrDifferentialEngine::build_reverse_caller_map(&json);
4059        // Only the valid edge should be in the map
4060        assert_eq!(map.len(), 1);
4061        assert_eq!(map["target"].len(), 1);
4062    }
4063
4064    // =========================================================================
4065    // parse_impact_findings_from_callgraph tests
4066    // =========================================================================
4067
4068    #[test]
4069    fn test_parse_impact_from_callgraph_high_severity() {
4070        // >5 callers = high severity
4071        let callers = vec![
4072            ("main.rs".to_string(), "run".to_string()),
4073            ("handler.rs".to_string(), "handle".to_string()),
4074            ("api.rs".to_string(), "endpoint".to_string()),
4075            ("worker.rs".to_string(), "execute".to_string()),
4076            ("batch.rs".to_string(), "process_all".to_string()),
4077            ("scheduler.rs".to_string(), "schedule".to_string()),
4078        ];
4079        let findings =
4080            TldrDifferentialEngine::parse_impact_findings_from_callgraph("process_data", &callers);
4081
4082        assert_eq!(findings.len(), 1);
4083        assert_eq!(findings[0].finding_type, "breaking-change-risk");
4084        assert_eq!(findings[0].severity, "high");
4085        assert_eq!(findings[0].evidence["caller_count"], 6);
4086        assert_eq!(findings[0].evidence["command"], "calls");
4087        assert!(findings[0].message.contains("process_data"));
4088        assert!(findings[0].message.contains("6 callers"));
4089        // Callers preview capped at 5
4090        let preview = findings[0].evidence["callers_preview"].as_array().unwrap();
4091        assert_eq!(preview.len(), 5);
4092    }
4093
4094    #[test]
4095    fn test_parse_impact_from_callgraph_medium_severity() {
4096        // 2-5 callers = medium severity
4097        let callers = vec![
4098            ("a.rs".to_string(), "foo".to_string()),
4099            ("b.rs".to_string(), "bar".to_string()),
4100            ("c.rs".to_string(), "baz".to_string()),
4101        ];
4102        let findings =
4103            TldrDifferentialEngine::parse_impact_findings_from_callgraph("helper", &callers);
4104
4105        assert_eq!(findings.len(), 1);
4106        assert_eq!(findings[0].severity, "medium");
4107        assert_eq!(findings[0].evidence["caller_count"], 3);
4108    }
4109
4110    #[test]
4111    fn test_parse_impact_from_callgraph_info_severity() {
4112        // 1 caller = info severity
4113        let callers = vec![("main.rs".to_string(), "run".to_string())];
4114        let findings =
4115            TldrDifferentialEngine::parse_impact_findings_from_callgraph("private_fn", &callers);
4116
4117        assert_eq!(findings.len(), 1);
4118        assert_eq!(findings[0].severity, "info");
4119        assert_eq!(findings[0].evidence["caller_count"], 1);
4120    }
4121
4122    #[test]
4123    fn test_parse_impact_from_callgraph_no_callers() {
4124        // 0 callers = no finding
4125        let callers: Vec<(String, String)> = vec![];
4126        let findings =
4127            TldrDifferentialEngine::parse_impact_findings_from_callgraph("unused_fn", &callers);
4128        assert!(findings.is_empty());
4129    }
4130
4131    #[test]
4132    fn test_parse_impact_from_callgraph_callers_preview_format() {
4133        // Preview format should be "file::func"
4134        let callers = vec![
4135            ("main.rs".to_string(), "run".to_string()),
4136            ("handler.rs".to_string(), "handle".to_string()),
4137        ];
4138        let findings =
4139            TldrDifferentialEngine::parse_impact_findings_from_callgraph("target", &callers);
4140
4141        let preview = findings[0].evidence["callers_preview"].as_array().unwrap();
4142        assert_eq!(preview[0], "main.rs::run");
4143        assert_eq!(preview[1], "handler.rs::handle");
4144    }
4145
4146    #[test]
4147    fn test_parse_impact_from_callgraph_finding_fields() {
4148        // Verify all finding fields match expected values
4149        let callers = vec![
4150            ("src.rs".to_string(), "caller".to_string()),
4151            ("other.rs".to_string(), "other_caller".to_string()),
4152        ];
4153        let findings =
4154            TldrDifferentialEngine::parse_impact_findings_from_callgraph("my_func", &callers);
4155
4156        assert_eq!(findings[0].finding_type, "breaking-change-risk");
4157        assert_eq!(findings[0].file, PathBuf::from("(project)"));
4158        assert_eq!(findings[0].function, "my_func");
4159        assert_eq!(findings[0].line, 0);
4160        assert_eq!(findings[0].confidence, Some("DETERMINISTIC".to_string()));
4161        assert!(findings[0].finding_id.is_some());
4162    }
4163
4164    #[test]
4165    fn test_parse_impact_from_callgraph_boundary_5_callers() {
4166        // Exactly 5 callers = medium (not high, which requires >5)
4167        let callers: Vec<(String, String)> = (0..5)
4168            .map(|i| (format!("f{}.rs", i), format!("fn{}", i)))
4169            .collect();
4170        let findings =
4171            TldrDifferentialEngine::parse_impact_findings_from_callgraph("boundary_fn", &callers);
4172
4173        assert_eq!(findings[0].severity, "medium");
4174        // Preview should include all 5 (cap is 5)
4175        let preview = findings[0].evidence["callers_preview"].as_array().unwrap();
4176        assert_eq!(preview.len(), 5);
4177    }
4178
4179    #[test]
4180    fn test_parse_impact_from_callgraph_boundary_2_callers() {
4181        // Exactly 2 callers = medium (>= 2)
4182        let callers = vec![
4183            ("a.rs".to_string(), "fa".to_string()),
4184            ("b.rs".to_string(), "fb".to_string()),
4185        ];
4186        let findings =
4187            TldrDifferentialEngine::parse_impact_findings_from_callgraph("edge_fn", &callers);
4188        assert_eq!(findings[0].severity, "medium");
4189    }
4190
4191    // =========================================================================
4192    // Derivation function tests (flow cache refactoring)
4193    // =========================================================================
4194
4195    // --- derive_deps_from_calls ---
4196
4197    #[test]
4198    fn test_bugbot_derive_deps_basic() {
4199        // One cross-file edge → one dependency
4200        let calls = serde_json::json!({
4201            "edges": [
4202                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}
4203            ]
4204        });
4205        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4206        let internal = deps["internal_dependencies"].as_object().unwrap();
4207        assert!(internal.contains_key("a.rs"));
4208        let a_deps = internal["a.rs"].as_array().unwrap();
4209        assert_eq!(a_deps.len(), 1);
4210        assert!(a_deps.iter().any(|v| v.as_str() == Some("b.rs")));
4211        assert_eq!(deps["stats"]["total_internal_deps"].as_u64().unwrap(), 1);
4212    }
4213
4214    #[test]
4215    fn test_bugbot_derive_deps_intra_file_excluded() {
4216        // Same-file edge should NOT produce a dependency
4217        let calls = serde_json::json!({
4218            "edges": [
4219                {"src_file": "a.rs", "src_func": "foo", "dst_file": "a.rs", "dst_func": "bar", "call_type": "direct"}
4220            ]
4221        });
4222        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4223        let internal = deps["internal_dependencies"].as_object().unwrap();
4224        assert!(internal.is_empty() || internal.values().all(|v| v.as_array().unwrap().is_empty()));
4225        assert_eq!(deps["stats"]["total_internal_deps"].as_u64().unwrap(), 0);
4226    }
4227
4228    #[test]
4229    fn test_bugbot_derive_deps_deduplication() {
4230        // Two edges between same files → only one dependency entry
4231        let calls = serde_json::json!({
4232            "edges": [
4233                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"},
4234                {"src_file": "a.rs", "src_func": "baz", "dst_file": "b.rs", "dst_func": "qux", "call_type": "direct"}
4235            ]
4236        });
4237        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4238        let a_deps = deps["internal_dependencies"]["a.rs"].as_array().unwrap();
4239        assert_eq!(a_deps.len(), 1);
4240        assert_eq!(deps["stats"]["total_internal_deps"].as_u64().unwrap(), 1);
4241    }
4242
4243    #[test]
4244    fn test_bugbot_derive_deps_circular_detection() {
4245        // a.rs → b.rs → a.rs forms a cycle
4246        let calls = serde_json::json!({
4247            "edges": [
4248                {"src_file": "a.rs", "src_func": "f1", "dst_file": "b.rs", "dst_func": "f2", "call_type": "direct"},
4249                {"src_file": "b.rs", "src_func": "f2", "dst_file": "a.rs", "dst_func": "f3", "call_type": "direct"}
4250            ]
4251        });
4252        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4253        let circular = deps["circular_dependencies"].as_array().unwrap();
4254        assert!(!circular.is_empty(), "should detect circular dependency between a.rs and b.rs");
4255        // The cycle path should mention both files
4256        let path = circular[0]["path"].as_array().unwrap();
4257        let path_strs: Vec<&str> = path.iter().map(|v| v.as_str().unwrap()).collect();
4258        assert!(path_strs.contains(&"a.rs"));
4259        assert!(path_strs.contains(&"b.rs"));
4260    }
4261
4262    #[test]
4263    fn test_bugbot_derive_deps_empty_edges() {
4264        let calls = serde_json::json!({ "edges": [] });
4265        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4266        let internal = deps["internal_dependencies"].as_object().unwrap();
4267        assert!(internal.is_empty());
4268        let circular = deps["circular_dependencies"].as_array().unwrap();
4269        assert!(circular.is_empty());
4270        assert_eq!(deps["stats"]["total_internal_deps"].as_u64().unwrap(), 0);
4271    }
4272
4273    #[test]
4274    fn test_bugbot_derive_deps_no_edges_key() {
4275        // Graceful handling when edges key is missing
4276        let calls = serde_json::json!({ "nodes": ["a.rs:foo"] });
4277        let deps = TldrDifferentialEngine::derive_deps_from_calls(&calls);
4278        assert_eq!(deps["stats"]["total_internal_deps"].as_u64().unwrap(), 0);
4279    }
4280
4281    // --- derive_coupling_from_calls ---
4282
4283    #[test]
4284    fn test_bugbot_derive_coupling_basic() {
4285        // a.rs→b.rs and c.rs→b.rs: b.rs has Ca=2, Ce=0
4286        let calls = serde_json::json!({
4287            "edges": [
4288                {"src_file": "a.rs", "src_func": "f1", "dst_file": "b.rs", "dst_func": "g1", "call_type": "direct"},
4289                {"src_file": "c.rs", "src_func": "f2", "dst_file": "b.rs", "dst_func": "g2", "call_type": "direct"}
4290            ]
4291        });
4292        let coupling = TldrDifferentialEngine::derive_coupling_from_calls(&calls);
4293        let metrics = coupling["martin_metrics"].as_array().unwrap();
4294
4295        // Find b.rs entry
4296        let b_metric = metrics.iter().find(|m| m["module"].as_str() == Some("b.rs")).unwrap();
4297        assert_eq!(b_metric["ca"].as_u64().unwrap(), 2);
4298        assert_eq!(b_metric["ce"].as_u64().unwrap(), 0);
4299        assert!((b_metric["instability"].as_f64().unwrap() - 0.0).abs() < 0.01);
4300
4301        // a.rs: Ca=0, Ce=1, instability=1.0
4302        let a_metric = metrics.iter().find(|m| m["module"].as_str() == Some("a.rs")).unwrap();
4303        assert_eq!(a_metric["ca"].as_u64().unwrap(), 0);
4304        assert_eq!(a_metric["ce"].as_u64().unwrap(), 1);
4305        assert!((a_metric["instability"].as_f64().unwrap() - 1.0).abs() < 0.01);
4306    }
4307
4308    #[test]
4309    fn test_bugbot_derive_coupling_bidirectional() {
4310        // a.rs↔b.rs: both have Ca=1, Ce=1, instability=0.5
4311        let calls = serde_json::json!({
4312            "edges": [
4313                {"src_file": "a.rs", "src_func": "f1", "dst_file": "b.rs", "dst_func": "g1", "call_type": "direct"},
4314                {"src_file": "b.rs", "src_func": "g2", "dst_file": "a.rs", "dst_func": "f2", "call_type": "direct"}
4315            ]
4316        });
4317        let coupling = TldrDifferentialEngine::derive_coupling_from_calls(&calls);
4318        let metrics = coupling["martin_metrics"].as_array().unwrap();
4319
4320        for module_name in &["a.rs", "b.rs"] {
4321            let m = metrics.iter().find(|m| m["module"].as_str() == Some(*module_name))
4322                .unwrap_or_else(|| panic!("missing metric for {}", module_name));
4323            assert_eq!(m["ca"].as_u64().unwrap(), 1, "{} Ca should be 1", module_name);
4324            assert_eq!(m["ce"].as_u64().unwrap(), 1, "{} Ce should be 1", module_name);
4325            assert!((m["instability"].as_f64().unwrap() - 0.5).abs() < 0.01,
4326                "{} instability should be 0.5", module_name);
4327        }
4328    }
4329
4330    #[test]
4331    fn test_bugbot_derive_coupling_self_calls_excluded() {
4332        // Self-call should not contribute to coupling
4333        let calls = serde_json::json!({
4334            "edges": [
4335                {"src_file": "a.rs", "src_func": "f1", "dst_file": "a.rs", "dst_func": "f2", "call_type": "direct"}
4336            ]
4337        });
4338        let coupling = TldrDifferentialEngine::derive_coupling_from_calls(&calls);
4339        let metrics = coupling["martin_metrics"].as_array().unwrap();
4340        // Either empty or a.rs with Ca=0, Ce=0
4341        if !metrics.is_empty() {
4342            let a = metrics.iter().find(|m| m["module"].as_str() == Some("a.rs"));
4343            if let Some(a_metric) = a {
4344                assert_eq!(a_metric["ca"].as_u64().unwrap(), 0);
4345                assert_eq!(a_metric["ce"].as_u64().unwrap(), 0);
4346            }
4347        }
4348    }
4349
4350    #[test]
4351    fn test_bugbot_derive_coupling_empty() {
4352        let calls = serde_json::json!({ "edges": [] });
4353        let coupling = TldrDifferentialEngine::derive_coupling_from_calls(&calls);
4354        let metrics = coupling["martin_metrics"].as_array().unwrap();
4355        assert!(metrics.is_empty());
4356    }
4357
4358    // --- derive_downstream_from_calls ---
4359
4360    #[test]
4361    fn test_bugbot_derive_downstream_basic() {
4362        let calls = serde_json::json!({
4363            "edges": [
4364                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"}
4365            ]
4366        });
4367        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4368        assert_eq!(results.len(), 1);
4369        let (file, metrics) = &results[0];
4370        assert_eq!(file, "lib.rs");
4371        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 1);
4372        assert_eq!(metrics["direct_caller_count"].as_u64().unwrap(), 1);
4373    }
4374
4375    #[test]
4376    fn test_bugbot_derive_downstream_multiple_importers() {
4377        let calls = serde_json::json!({
4378            "edges": [
4379                {"src_file": "a.rs", "src_func": "f1", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4380                {"src_file": "b.rs", "src_func": "f2", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4381                {"src_file": "c.rs", "src_func": "f3", "dst_file": "lib.rs", "dst_func": "init", "call_type": "direct"}
4382            ]
4383        });
4384        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4385        let (_, metrics) = &results[0];
4386        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 3);
4387        assert_eq!(metrics["direct_caller_count"].as_u64().unwrap(), 3);
4388    }
4389
4390    #[test]
4391    fn test_bugbot_derive_downstream_no_callers() {
4392        // No edges point to the changed file
4393        let calls = serde_json::json!({
4394            "edges": [
4395                {"src_file": "a.rs", "src_func": "f1", "dst_file": "b.rs", "dst_func": "g1", "call_type": "direct"}
4396            ]
4397        });
4398        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4399        assert_eq!(results.len(), 1);
4400        let (_, metrics) = &results[0];
4401        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 0);
4402        assert_eq!(metrics["direct_caller_count"].as_u64().unwrap(), 0);
4403    }
4404
4405    #[test]
4406    fn test_bugbot_derive_downstream_test_heuristic() {
4407        // Caller from a test file should be counted in affected_test_count
4408        let calls = serde_json::json!({
4409            "edges": [
4410                {"src_file": "tests/test_lib.rs", "src_func": "test_process", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4411                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"}
4412            ]
4413        });
4414        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4415        let (_, metrics) = &results[0];
4416        assert!(metrics["affected_test_count"].as_u64().unwrap() >= 1,
4417            "test callers should be detected via path/name heuristic");
4418        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 2);
4419    }
4420
4421    #[test]
4422    fn test_bugbot_derive_downstream_self_calls_excluded() {
4423        // Edges from the same file should not count as importers
4424        let calls = serde_json::json!({
4425            "edges": [
4426                {"src_file": "lib.rs", "src_func": "helper", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4427                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"}
4428            ]
4429        });
4430        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4431        let (_, metrics) = &results[0];
4432        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 1, "self-calls should be excluded");
4433    }
4434
4435    #[test]
4436    fn test_bugbot_derive_downstream_same_importer_multiple_calls() {
4437        // Same importer calling multiple functions should count as 1 importer
4438        let calls = serde_json::json!({
4439            "edges": [
4440                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "init", "call_type": "direct"},
4441                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4442                {"src_file": "main.rs", "src_func": "shutdown", "dst_file": "lib.rs", "dst_func": "cleanup", "call_type": "direct"}
4443            ]
4444        });
4445        let results = TldrDifferentialEngine::derive_downstream_from_calls(&calls, &["lib.rs"]);
4446        let (_, metrics) = &results[0];
4447        assert_eq!(metrics["importer_count"].as_u64().unwrap(), 1, "3 edges from same file = 1 importer");
4448        assert_eq!(metrics["direct_caller_count"].as_u64().unwrap(), 1);
4449    }
4450
4451    // =========================================================================
4452    // Calls JSON caching: rewired signatures accept cached calls
4453    // =========================================================================
4454
4455    #[test]
4456    fn test_analyze_flow_commands_accepts_cached_calls_json() {
4457        // analyze_flow_commands should accept an optional current_calls_json
4458        // parameter. When None, it falls back to running the subprocess.
4459        let engine = TldrDifferentialEngine::new();
4460        let mut partial_reasons = Vec::new();
4461        let _findings = engine.analyze_flow_commands(
4462            Path::new("/tmp/nonexistent-project-for-cache-test"),
4463            "HEAD",
4464            "rust",
4465            None, // no cached calls — fallback behavior
4466            &mut partial_reasons,
4467        );
4468        // Should not panic
4469    }
4470
4471    #[test]
4472    fn test_analyze_flow_commands_uses_cached_calls_for_deps() {
4473        // When current_calls_json is Some, analyze_flow_commands should derive
4474        // deps from it instead of running `tldr deps` subprocess.
4475        let engine = TldrDifferentialEngine::new();
4476        let mut partial_reasons = Vec::new();
4477        let calls_json = serde_json::json!({
4478            "edges": [
4479                {"src_file": "a.rs", "src_func": "foo", "dst_file": "b.rs", "dst_func": "bar", "call_type": "direct"}
4480            ]
4481        });
4482        // With cached calls, the method should not need to run tldr deps subprocess.
4483        // On a nonexistent project, the worktree will fail, so we won't get findings,
4484        // but the important thing is it doesn't panic and accepts the parameter.
4485        let _findings = engine.analyze_flow_commands(
4486            Path::new("/tmp/nonexistent-project-for-cache-test"),
4487            "HEAD",
4488            "rust",
4489            Some(&calls_json),
4490            &mut partial_reasons,
4491        );
4492    }
4493
4494    #[test]
4495    fn test_analyze_downstream_impact_accepts_cached_calls_json() {
4496        // analyze_downstream_impact should accept an optional current_calls_json.
4497        // When Some, it derives downstream impact from the calls JSON instead
4498        // of running tldr whatbreaks per file.
4499        let engine = TldrDifferentialEngine::new();
4500        let mut partial_reasons = Vec::new();
4501        let calls_json = serde_json::json!({
4502            "edges": [
4503                {"src_file": "main.rs", "src_func": "run", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"},
4504                {"src_file": "tests/test_lib.rs", "src_func": "test_it", "dst_file": "lib.rs", "dst_func": "process", "call_type": "direct"}
4505            ]
4506        });
4507
4508        let project = Path::new("/tmp/nonexistent-downstream-test");
4509        let changed_files = vec![project.join("lib.rs")];
4510        let findings = engine.analyze_downstream_impact(
4511            project,
4512            &changed_files,
4513            "rust",
4514            Some(&calls_json),
4515            &mut partial_reasons,
4516        );
4517
4518        // With 2 cross-file edges into lib.rs, should produce a downstream-impact finding
4519        assert!(!findings.is_empty(), "cached calls should produce downstream findings");
4520        assert_eq!(findings[0].finding_type, "downstream-impact");
4521    }
4522
4523    #[test]
4524    fn test_analyze_downstream_impact_none_falls_back() {
4525        // When current_calls_json is None, analyze_downstream_impact should
4526        // fall back to running tldr whatbreaks subprocess (which will fail
4527        // gracefully on nonexistent paths).
4528        let engine = TldrDifferentialEngine::new();
4529        let mut partial_reasons = Vec::new();
4530        let project = Path::new("/tmp/nonexistent-downstream-fallback");
4531        let changed_files = vec![project.join("lib.rs")];
4532        let _findings = engine.analyze_downstream_impact(
4533            project,
4534            &changed_files,
4535            "rust",
4536            None,
4537            &mut partial_reasons,
4538        );
4539        // Should not panic — graceful fallback
4540    }
4541
4542    #[test]
4543    fn test_analyze_function_impact_accepts_cached_calls_json() {
4544        // analyze_function_impact should accept an optional current_calls_json.
4545        // When Some, it reuses the cached JSON instead of running tldr calls.
4546        let engine = TldrDifferentialEngine::new();
4547        let mut partial_reasons = Vec::new();
4548        let calls_json = serde_json::json!({
4549            "edges": [
4550                {"src_file": "caller.rs", "src_func": "caller_fn", "dst_file": "lib.rs", "dst_func": "target_fn", "call_type": "direct"}
4551            ]
4552        });
4553        let project = Path::new("/tmp/nonexistent-function-impact-test");
4554        let changed_files = vec![project.join("lib.rs")];
4555        let _findings = engine.analyze_function_impact(
4556            project,
4557            &changed_files,
4558            "rust",
4559            Some(&calls_json),
4560            &mut partial_reasons,
4561        );
4562        // Should not panic and should accept the parameter
4563    }
4564
4565    #[test]
4566    fn test_analyze_function_impact_none_falls_back() {
4567        // When current_calls_json is None, falls back to subprocess
4568        let engine = TldrDifferentialEngine::new();
4569        let mut partial_reasons = Vec::new();
4570        let project = Path::new("/tmp/nonexistent-function-impact-fallback");
4571        let changed_files = vec![project.join("lib.rs")];
4572        let _findings = engine.analyze_function_impact(
4573            project,
4574            &changed_files,
4575            "rust",
4576            None,
4577            &mut partial_reasons,
4578        );
4579        // Should not panic — graceful fallback to subprocess
4580    }
4581
4582    #[test]
4583    fn test_analyze_downstream_with_cached_calls_produces_correct_findings() {
4584        // When using cached calls, the downstream findings should match
4585        // what derive_downstream_from_calls produces fed through parse_whatbreaks_findings.
4586        let engine = TldrDifferentialEngine::new();
4587        let mut partial_reasons = Vec::new();
4588        let calls_json = serde_json::json!({
4589            "edges": [
4590                {"src_file": "a.rs", "src_func": "f1", "dst_file": "target.rs", "dst_func": "process", "call_type": "direct"},
4591                {"src_file": "b.rs", "src_func": "f2", "dst_file": "target.rs", "dst_func": "init", "call_type": "direct"},
4592                {"src_file": "c.rs", "src_func": "f3", "dst_file": "target.rs", "dst_func": "run", "call_type": "direct"},
4593                {"src_file": "d.rs", "src_func": "f4", "dst_file": "target.rs", "dst_func": "cleanup", "call_type": "direct"},
4594            ]
4595        });
4596
4597        let project = Path::new("/tmp/nonexistent-downstream-correct");
4598        let changed_files = vec![project.join("target.rs")];
4599        let findings = engine.analyze_downstream_impact(
4600            project,
4601            &changed_files,
4602            "rust",
4603            Some(&calls_json),
4604            &mut partial_reasons,
4605        );
4606
4607        // 4 importers → medium severity (>3 but <=10)
4608        assert_eq!(findings.len(), 1);
4609        assert_eq!(findings[0].severity, "medium");
4610        assert_eq!(findings[0].finding_type, "downstream-impact");
4611        // Evidence should contain the counts
4612        assert_eq!(findings[0].evidence["importer_count"], 4);
4613    }
4614}
tldr_cli/commands/bugbot/l2/engines/tldr_differential.rs

tldr_cli/commands/bugbot/l2/engines/
tldr_differential.rs