go_brrr/callgraph/
impact.rs

1//! Impact analysis (reverse call graph).
2//!
3//! Analyzes the impact of changing a function by finding all functions
4//! that directly or transitively call it. Uses BFS traversal to track
5//! exact distance from the target function.
6//!
7//! # Depth Semantics
8//!
9//! The `max_depth` parameter controls how many levels of the call hierarchy
10//! are **explored** (matching Python semantics):
11//!
12//! - `max_depth = 0`: NO exploration - returns only direct callers (distance 1), truncated
13//! - `max_depth = 1`: Explore direct callers (distance 1), finding their callers (distance 2)
14//! - `max_depth = 2`: Explore up to distance 2, finding callers up to distance 3
15//! - `max_depth = N`: Explore up to distance N, potentially discovering callers at distance N+1
16//!
17//! This matches Python's recursive `_build_caller_tree` where `depth <= 0` causes
18//! immediate truncation with no further exploration.
19//!
20//! # Usage
21//!
22//! ```ignore
23//! use go_brrr::callgraph::impact::{analyze_impact, ImpactConfig};
24//!
25//! let config = ImpactConfig::new()
26//!     .with_depth(3)  // Explore up to depth 3, find callers up to depth 4
27//!     .exclude_tests();
28//!
29//! let result = analyze_impact(&graph, "process_data", config);
30//! println!("{}", result.to_llm_context());
31//! ```
32
33use std::collections::{HashMap, HashSet, VecDeque};
34
35use serde::{Deserialize, Serialize};
36
37use crate::callgraph::types::{CallGraph, FunctionRef};
38
39/// Configuration for impact analysis.
40#[derive(Debug, Clone)]
41pub struct ImpactConfig {
42    /// Maximum depth to traverse (0 = NO traversal, direct callers only).
43    ///
44    /// This controls how many levels of the call hierarchy are explored:
45    /// - `max_depth = 0`: NO exploration - returns only direct callers (distance 1)
46    /// - `max_depth = 1`: Explore direct callers to find distance 2 callers
47    /// - `max_depth = 2`: Explore up to distance 2, finding callers up to distance 3
48    /// - `max_depth = N`: Explore up to distance N, potentially finding callers at distance N+1
49    ///
50    /// This matches Python semantics where `depth <= 0` causes immediate truncation.
51    /// Note: The depth controls exploration, not the maximum distance in results.
52    pub max_depth: usize,
53    /// Filter by language (e.g., "python", "rust").
54    pub language: Option<String>,
55    /// Glob patterns to include (e.g., "src/**/*.rs").
56    pub include_patterns: Vec<String>,
57    /// Glob patterns to exclude (e.g., "**/test/**").
58    pub exclude_patterns: Vec<String>,
59    /// Whether to exclude test files from results.
60    pub exclude_tests: bool,
61    /// Whether to include call site information.
62    pub include_call_sites: bool,
63    /// If true, each node appears only once (first path wins, BFS behavior).
64    /// If false, all paths are explored with per-branch visited sets (DFS, Python semantics).
65    /// When false, the minimum distance across all paths is recorded for each node.
66    ///
67    /// For diamond-shaped call graphs like:
68    /// ```text
69    ///     A
70    ///    / \
71    ///   B   C
72    ///    \ /
73    ///     D (target)
74    /// ```
75    /// - `deduplicate_paths = true`: A appears once at distance 2 (via first explored path)
76    /// - `deduplicate_paths = false`: All paths explored, A at minimum distance 2 with aggregated call sites
77    pub deduplicate_paths: bool,
78}
79
80impl Default for ImpactConfig {
81    fn default() -> Self {
82        Self {
83            max_depth: 0,
84            language: None,
85            include_patterns: Vec::new(),
86            exclude_patterns: Vec::new(),
87            exclude_tests: false,
88            include_call_sites: false,
89            deduplicate_paths: true, // Default to current Rust behavior for backward compatibility
90        }
91    }
92}
93
94impl ImpactConfig {
95    /// Create a new default configuration.
96    pub fn new() -> Self {
97        Self::default()
98    }
99
100    /// Set maximum traversal depth.
101    ///
102    /// The depth controls how many levels of callers are explored. With depth N,
103    /// the algorithm explores callers up to distance N from the target, which
104    /// means callers at distance N+1 may be discovered but not explored further.
105    pub fn with_depth(mut self, depth: usize) -> Self {
106        self.max_depth = depth;
107        self
108    }
109
110    /// Filter results by language.
111    // Builder API for library consumers
112    #[allow(dead_code)]
113    pub fn with_language(mut self, lang: &str) -> Self {
114        self.language = Some(lang.to_string());
115        self
116    }
117
118    /// Add file patterns to include.
119    // Builder API for library consumers
120    #[allow(dead_code)]
121    pub fn with_includes(mut self, patterns: &[&str]) -> Self {
122        self.include_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
123        self
124    }
125
126    /// Add file patterns to exclude.
127    // Builder API for library consumers
128    #[allow(dead_code)]
129    pub fn with_excludes(mut self, patterns: &[&str]) -> Self {
130        self.exclude_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
131        self
132    }
133
134    /// Exclude test files from results.
135    // Builder API for library consumers
136    #[allow(dead_code)]
137    pub fn exclude_tests(mut self) -> Self {
138        self.exclude_tests = true;
139        self
140    }
141
142    /// Include call site line numbers in results.
143    pub fn with_call_sites(mut self) -> Self {
144        self.include_call_sites = true;
145        self
146    }
147
148    /// Enable or disable path deduplication.
149    ///
150    /// When enabled (default), each node appears only once using BFS traversal.
151    /// When disabled, all paths are explored using DFS with per-branch visited sets,
152    /// matching Python's `_build_caller_tree` semantics where `visited.copy()` is used.
153    ///
154    /// Use `false` for complete path exploration in diamond-shaped call graphs.
155    // Builder API for library consumers
156    #[allow(dead_code)]
157    pub fn with_deduplicate_paths(mut self, deduplicate: bool) -> Self {
158        self.deduplicate_paths = deduplicate;
159        self
160    }
161
162    /// Disable path deduplication to explore all paths (Python-compatible mode).
163    ///
164    /// This is a convenience method equivalent to `with_deduplicate_paths(false)`.
165    // Builder API for library consumers
166    #[allow(dead_code)]
167    pub fn explore_all_paths(mut self) -> Self {
168        self.deduplicate_paths = false;
169        self
170    }
171}
172
173/// Result of impact analysis.
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct ImpactResult {
176    /// Target function analyzed.
177    pub target: String,
178    /// File containing the target (if resolved to single file).
179    pub target_file: Option<String>,
180    /// Depth of analysis performed.
181    pub depth: usize,
182    /// Functions that directly or indirectly call the target.
183    pub callers: Vec<CallerInfo>,
184    /// Total count of affected functions.
185    pub total_affected: usize,
186    /// Callers grouped by distance level.
187    pub by_distance: HashMap<usize, usize>,
188    /// Callers grouped by file.
189    pub by_file: HashMap<String, usize>,
190}
191
192impl ImpactResult {
193    /// Generate LLM-ready context string.
194    ///
195    /// Produces a structured text format suitable for LLM consumption,
196    /// organized by distance from the target function.
197    // Public API for library consumers
198    #[allow(dead_code)]
199    pub fn to_llm_context(&self) -> String {
200        let mut output = String::with_capacity(4096);
201
202        // Header
203        output.push_str(&format!("# Impact Analysis: {}\n\n", self.target));
204
205        if let Some(ref file) = self.target_file {
206            output.push_str(&format!("Target file: {}\n", file));
207        }
208
209        output.push_str(&format!(
210            "Total affected: {} functions at {} depth levels\n\n",
211            self.total_affected,
212            self.by_distance.len()
213        ));
214
215        // Group callers by distance
216        let mut by_distance: HashMap<usize, Vec<&CallerInfo>> = HashMap::new();
217        for caller in &self.callers {
218            by_distance.entry(caller.distance).or_default().push(caller);
219        }
220
221        // Output by distance level
222        let mut distances: Vec<_> = by_distance.keys().copied().collect();
223        distances.sort();
224
225        for distance in distances {
226            let callers = &by_distance[&distance];
227            output.push_str(&format!(
228                "## Distance {} ({} functions)\n\n",
229                distance,
230                callers.len()
231            ));
232
233            // Group by file within distance level
234            let mut by_file: HashMap<&str, Vec<&CallerInfo>> = HashMap::new();
235            for caller in callers {
236                by_file.entry(&caller.file).or_default().push(caller);
237            }
238
239            let mut files: Vec<_> = by_file.keys().copied().collect();
240            files.sort();
241
242            for file in files {
243                let file_callers = &by_file[file];
244                output.push_str(&format!("### {}\n", file));
245
246                for caller in file_callers {
247                    output.push_str(&format!("- {}", caller.name));
248                    if !caller.call_sites.is_empty() {
249                        let sites: Vec<_> =
250                            caller.call_sites.iter().map(|s| s.to_string()).collect();
251                        output.push_str(&format!(" (lines: {})", sites.join(", ")));
252                    }
253                    output.push('\n');
254                }
255                output.push('\n');
256            }
257        }
258
259        // Summary section
260        output.push_str("## Summary by File\n\n");
261        let mut file_counts: Vec<_> = self.by_file.iter().collect();
262        file_counts.sort_by(|a, b| b.1.cmp(a.1)); // Sort by count descending
263
264        for (file, count) in file_counts.iter().take(10) {
265            output.push_str(&format!("- {}: {} functions\n", file, count));
266        }
267
268        if file_counts.len() > 10 {
269            output.push_str(&format!(
270                "- ... and {} more files\n",
271                file_counts.len() - 10
272            ));
273        }
274
275        output
276    }
277
278    /// Convert to JSON for API consumption.
279    // Public API for library consumers
280    #[allow(dead_code)]
281    pub fn to_json(&self) -> serde_json::Value {
282        serde_json::json!({
283            "target": self.target,
284            "target_file": self.target_file,
285            "depth": self.depth,
286            "total_affected": self.total_affected,
287            "callers": self.callers,
288            "by_distance": self.by_distance,
289            "by_file": self.by_file
290        })
291    }
292}
293
294/// Information about a caller function.
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct CallerInfo {
297    /// File containing the caller.
298    pub file: String,
299    /// Function name.
300    pub name: String,
301    /// Fully qualified name (if available).
302    pub qualified_name: Option<String>,
303    /// Distance from target (1 = direct caller, 2 = caller of caller, etc.).
304    pub distance: usize,
305    /// Line numbers where calls to the target occur.
306    pub call_sites: Vec<usize>,
307}
308
309/// Analyze impact of changing a function.
310///
311/// Finds all functions that directly or indirectly call the target function,
312/// tracking the exact distance (number of call chain hops) from the target.
313///
314/// The traversal strategy depends on `config.deduplicate_paths`:
315/// - `true` (default): BFS with global visited set - each node appears once, first path wins
316/// - `false`: DFS with per-path visited sets - all paths explored, minimum distance recorded
317///
318/// # Arguments
319///
320/// * `graph` - The call graph to analyze
321/// * `target` - Name of the target function (can be unqualified)
322/// * `config` - Configuration for filtering and depth limits
323///
324/// # Returns
325///
326/// An `ImpactResult` containing all callers with their distances and call sites.
327pub fn analyze_impact(graph: &CallGraph, target: &str, config: ImpactConfig) -> ImpactResult {
328    // Build reverse index: callee -> [(caller, call_line)]
329    // The reverse index uses full FunctionRef identity (file + name + qualified_name),
330    // ensuring that functions with the same name in different files are tracked separately.
331    let reverse_index = build_reverse_index(graph);
332
333    // Build name index for O(1) lookup by function name.
334    // This enables efficient target matching without O(E) linear scans.
335    // Complexity: O(V) where V = unique callees, done once before BFS/DFS.
336    let name_index = build_name_index(&reverse_index);
337
338    // Find all functions matching the target name using O(1) name index lookup.
339    // Previous implementation used O(E) scan through all_functions().
340    // Use HashSet for O(1) membership testing (BUG-018 fix: prevents O(n) linear search)
341    let target_matches: HashSet<FunctionRef> = find_matching_targets_indexed(&name_index, target)
342        .into_iter()
343        .cloned()
344        .collect();
345
346    if target_matches.is_empty() {
347        return ImpactResult {
348            target: target.to_string(),
349            target_file: None,
350            depth: 0,
351            callers: Vec::new(),
352            total_affected: 0,
353            by_distance: HashMap::new(),
354            by_file: HashMap::new(),
355        };
356    }
357
358    // Determine target file (if unique)
359    let target_file = if target_matches.len() == 1 {
360        target_matches.iter().next().map(|f| f.file.clone())
361    } else {
362        None
363    };
364
365    // Branch based on deduplication mode
366    let callers_map = if config.deduplicate_paths {
367        // BFS with global visited set (original Rust behavior)
368        analyze_impact_bfs(&reverse_index, &target_matches, &config)
369    } else {
370        // DFS with per-path visited sets (Python-compatible behavior)
371        analyze_impact_dfs(&reverse_index, &target_matches, &config)
372    };
373
374    // Convert to CallerInfo list
375    let mut callers: Vec<CallerInfo> = callers_map
376        .into_iter()
377        .map(|(func, (distance, call_sites))| CallerInfo {
378            file: func.file.clone(),
379            name: func.name.clone(),
380            qualified_name: func.qualified_name.clone(),
381            distance,
382            call_sites,
383        })
384        .collect();
385
386    // Sort by distance, then by file, then by name
387    callers.sort_by(|a, b| {
388        a.distance
389            .cmp(&b.distance)
390            .then_with(|| a.file.cmp(&b.file))
391            .then_with(|| a.name.cmp(&b.name))
392    });
393
394    // Calculate statistics
395    let total_affected = callers.len();
396    let max_distance = callers.iter().map(|c| c.distance).max().unwrap_or(0);
397
398    let mut by_distance: HashMap<usize, usize> = HashMap::new();
399    let mut by_file: HashMap<String, usize> = HashMap::new();
400
401    for caller in &callers {
402        *by_distance.entry(caller.distance).or_insert(0) += 1;
403        *by_file.entry(caller.file.clone()).or_insert(0) += 1;
404    }
405
406    ImpactResult {
407        target: target.to_string(),
408        target_file,
409        depth: max_distance,
410        callers,
411        total_affected,
412        by_distance,
413        by_file,
414    }
415}
416
417/// BFS traversal with global visited set (original Rust behavior).
418///
419/// Each node appears only once - the first path to reach it determines its distance.
420/// This is efficient for large graphs but may miss some path information in diamond
421/// patterns.
422///
423/// Complexity: O(V + E) where V = visited nodes, E = edges traversed.
424/// Uses O(1) HashMap lookups via reverse_index (keyed by full FunctionRef).
425fn analyze_impact_bfs(
426    reverse_index: &ReverseIndex,
427    target_matches: &HashSet<FunctionRef>,
428    config: &ImpactConfig,
429) -> HashMap<FunctionRef, (usize, Vec<usize>)> {
430    let mut visited: HashSet<FunctionRef> = HashSet::new();
431    let mut callers_map: HashMap<FunctionRef, (usize, Vec<usize>)> = HashMap::new();
432    let mut queue: VecDeque<(FunctionRef, usize)> = VecDeque::new();
433
434    // Initialize queue with direct callers of all matching targets
435    for target_ref in target_matches {
436        if let Some(callers) = reverse_index.get(target_ref) {
437            for (caller, call_line) in callers {
438                if !visited.contains(caller) && should_include(caller, config) {
439                    visited.insert(caller.clone());
440                    let entry = callers_map.entry(caller.clone()).or_insert((1, Vec::new()));
441                    if config.include_call_sites {
442                        entry.1.push(*call_line);
443                    }
444                    queue.push_back((caller.clone(), 1));
445                }
446            }
447        }
448    }
449
450    // BFS traversal - max_depth controls exploration depth
451    let max_depth = config.max_depth;
452
453    while let Some((func, distance)) = queue.pop_front() {
454        // Stop exploring beyond max depth
455        if distance > max_depth {
456            continue;
457        }
458
459        if let Some(callers) = reverse_index.get(&func) {
460            for (caller, call_line) in callers {
461                if !visited.contains(caller) && should_include(caller, config) {
462                    visited.insert(caller.clone());
463                    let new_distance = distance + 1;
464                    let entry = callers_map
465                        .entry(caller.clone())
466                        .or_insert((new_distance, Vec::new()));
467                    if config.include_call_sites {
468                        entry.1.push(*call_line);
469                    }
470                    queue.push_back((caller.clone(), new_distance));
471                }
472            }
473        }
474    }
475
476    callers_map
477}
478
479/// DFS traversal with per-path visited sets (Python-compatible behavior).
480///
481/// Explores all paths independently, like Python's `_build_caller_tree` with
482/// `visited.copy()`. Each node can be explored through multiple paths, but
483/// appears once in the output with the minimum distance across all paths.
484/// Call sites are aggregated from all paths.
485///
486/// This matches Python semantics for diamond-shaped call graphs:
487/// ```text
488///     A
489///    / \
490///   B   C
491///    \ /
492///     D (target)
493/// ```
494/// Both paths D->B->A and D->C->A are explored, and A gets the minimum distance.
495///
496/// Complexity: O(V + E) for simple graphs, potentially O(V * paths) for diamond patterns.
497/// Uses O(1) HashMap lookups via reverse_index (keyed by full FunctionRef).
498fn analyze_impact_dfs(
499    reverse_index: &ReverseIndex,
500    target_matches: &HashSet<FunctionRef>,
501    config: &ImpactConfig,
502) -> HashMap<FunctionRef, (usize, Vec<usize>)> {
503    let mut all_callers: HashMap<FunctionRef, (usize, Vec<usize>)> = HashMap::new();
504    let max_depth = config.max_depth;
505
506    /// Recursive DFS helper with per-path visited set.
507    fn dfs_explore(
508        func: &FunctionRef,
509        distance: usize,
510        max_depth: usize,
511        visited: &mut HashSet<FunctionRef>,
512        reverse_index: &HashMap<FunctionRef, Vec<(FunctionRef, usize)>>,
513        all_callers: &mut HashMap<FunctionRef, (usize, Vec<usize>)>,
514        config: &ImpactConfig,
515    ) {
516        // Base case: beyond max exploration depth or cycle in current path
517        if distance > max_depth || visited.contains(func) {
518            return;
519        }
520
521        // Mark as visited in THIS path
522        visited.insert(func.clone());
523
524        // Explore callers of this function
525        if let Some(callers) = reverse_index.get(func) {
526            for (caller, call_line) in callers {
527                if !should_include(caller, config) {
528                    continue;
529                }
530
531                let new_distance = distance + 1;
532
533                // Update or insert caller with minimum distance
534                let entry = all_callers
535                    .entry(caller.clone())
536                    .or_insert((new_distance, Vec::new()));
537
538                // Take minimum distance if seen via multiple paths
539                entry.0 = entry.0.min(new_distance);
540
541                // Aggregate call sites from all paths (avoid duplicates)
542                if config.include_call_sites && !entry.1.contains(call_line) {
543                    entry.1.push(*call_line);
544                }
545
546                // Recurse with CLONED visited set (Python semantics: visited.copy())
547                let mut branch_visited = visited.clone();
548                dfs_explore(
549                    caller,
550                    new_distance,
551                    max_depth,
552                    &mut branch_visited,
553                    reverse_index,
554                    all_callers,
555                    config,
556                );
557            }
558        }
559    }
560
561    // Start DFS from each target's direct callers
562    for target_ref in target_matches {
563        if let Some(callers) = reverse_index.get(target_ref) {
564            for (caller, call_line) in callers {
565                if !should_include(caller, config) {
566                    continue;
567                }
568
569                // Record direct caller (distance 1)
570                let entry = all_callers.entry(caller.clone()).or_insert((1, Vec::new()));
571                entry.0 = entry.0.min(1);
572                if config.include_call_sites && !entry.1.contains(call_line) {
573                    entry.1.push(*call_line);
574                }
575
576                // Start DFS from this direct caller with fresh per-path visited set
577                let mut visited = HashSet::new();
578                visited.insert(target_ref.clone()); // Don't revisit target
579                dfs_explore(
580                    caller,
581                    1,
582                    max_depth,
583                    &mut visited,
584                    reverse_index,
585                    &mut all_callers,
586                    config,
587                );
588            }
589        }
590    }
591
592    all_callers
593}
594
595/// Reverse index: callee -> [(caller, call_line)].
596type ReverseIndex = HashMap<FunctionRef, Vec<(FunctionRef, usize)>>;
597
598/// Name-based index for O(1) lookup of functions by name.
599/// Maps function name -> all FunctionRefs with that name.
600type NameIndex<'a> = HashMap<&'a str, Vec<&'a FunctionRef>>;
601
602/// Build reverse index: callee -> [(caller, call_line)].
603fn build_reverse_index(graph: &CallGraph) -> ReverseIndex {
604    let mut index: ReverseIndex = HashMap::new();
605
606    for edge in &graph.edges {
607        index
608            .entry(edge.callee.clone())
609            .or_default()
610            .push((edge.caller.clone(), edge.call_line));
611    }
612
613    index
614}
615
616/// Build name index for O(1) lookup by function name.
617/// This avoids O(E) linear scans when matching by name during BFS/DFS.
618fn build_name_index(reverse_index: &ReverseIndex) -> NameIndex<'_> {
619    let mut name_index: NameIndex<'_> = HashMap::new();
620
621    for func_ref in reverse_index.keys() {
622        name_index
623            .entry(func_ref.name.as_str())
624            .or_default()
625            .push(func_ref);
626    }
627
628    name_index
629}
630
631/// Find all function refs that match the target name using O(1) name index lookup.
632///
633/// This replaces the previous O(V) scan with O(1) lookup by name, plus O(k) where
634/// k is the number of functions with matching names (typically small).
635fn find_matching_targets_indexed<'a>(
636    name_index: &'a NameIndex<'a>,
637    target: &str,
638) -> Vec<&'a FunctionRef> {
639    let mut matches = Vec::new();
640
641    // O(1) lookup by exact name match
642    if let Some(funcs) = name_index.get(target) {
643        matches.extend(funcs.iter().copied());
644    }
645
646    // Also check for qualified name matches (target might be "Class.method")
647    // This requires checking all entries, but only when target contains '.'
648    if target.contains('.') {
649        for (_name, funcs) in name_index.iter() {
650            for func in funcs.iter() {
651                if let Some(ref qn) = func.qualified_name {
652                    if qn == target || qn.ends_with(&format!(".{}", target)) {
653                        // Avoid duplicates (already matched by name)
654                        if func.name != target {
655                            matches.push(*func);
656                        }
657                    }
658                }
659            }
660        }
661    } else {
662        // Check if any function has qualified_name ending with ".{target}"
663        // This is for cases like searching "method" when qualified_name is "Class.method"
664        for funcs in name_index.values() {
665            for func in funcs.iter() {
666                if func.name != target {
667                    if let Some(ref qn) = func.qualified_name {
668                        if qn.ends_with(&format!(".{}", target)) {
669                            matches.push(*func);
670                        }
671                    }
672                }
673            }
674        }
675    }
676
677    matches
678}
679
680/// Find all function refs that match the target name (legacy O(V) implementation).
681///
682/// Prefer `find_matching_targets_indexed` when a name index is available.
683/// Kept for API compatibility and as fallback when name index is not available.
684#[allow(dead_code)]
685fn find_matching_targets(graph: &CallGraph, target: &str) -> Vec<FunctionRef> {
686    let all_funcs = graph.all_functions();
687
688    all_funcs
689        .iter()
690        .filter(|f| {
691            // Match by name
692            if f.name == target {
693                return true;
694            }
695
696            // Match by qualified name
697            if let Some(ref qn) = f.qualified_name {
698                if qn == target || qn.ends_with(&format!(".{}", target)) {
699                    return true;
700                }
701            }
702
703            false
704        })
705        .cloned()
706        .collect()
707}
708
709/// Check if a function should be included based on config filters.
710fn should_include(func: &FunctionRef, config: &ImpactConfig) -> bool {
711    // Language filter
712    if let Some(ref lang) = config.language {
713        if !matches_language(&func.file, lang) {
714            return false;
715        }
716    }
717
718    // Test file exclusion
719    if config.exclude_tests && is_test_file(&func.file) {
720        return false;
721    }
722
723    // Include patterns (if any, file must match at least one)
724    if !config.include_patterns.is_empty() {
725        let matches_any = config
726            .include_patterns
727            .iter()
728            .any(|p| glob_match(p, &func.file));
729        if !matches_any {
730            return false;
731        }
732    }
733
734    // Exclude patterns
735    for pattern in &config.exclude_patterns {
736        if glob_match(pattern, &func.file) {
737            return false;
738        }
739    }
740
741    true
742}
743
744/// Check if a file path matches a language by extension.
745fn matches_language(file: &str, lang: &str) -> bool {
746    let extensions: &[&str] = match lang {
747        "python" => &[".py", ".pyi"],
748        "typescript" => &[".ts", ".tsx"],
749        "javascript" => &[".js", ".jsx", ".mjs"],
750        "rust" => &[".rs"],
751        "go" => &[".go"],
752        "java" => &[".java"],
753        "c" => &[".c", ".h"],
754        "cpp" => &[".cpp", ".cc", ".cxx", ".hpp", ".hxx"],
755        "csharp" => &[".cs"],
756        "ruby" => &[".rb"],
757        "php" => &[".php"],
758        "swift" => &[".swift"],
759        "kotlin" => &[".kt", ".kts"],
760        "scala" => &[".scala", ".sc"],
761        _ => return true, // Unknown language, don't filter
762    };
763
764    extensions.iter().any(|ext| file.ends_with(ext))
765}
766
767/// Check if a file is likely a test file.
768///
769/// Supports common test file conventions across languages:
770/// - Python: `test_*.py`, `*_test.py`
771/// - Java/Kotlin: `*Test.java`, `Test*.java`, `*Test.kt`, `Test*.kt`
772/// - JavaScript/TypeScript: `*.test.js`, `*.spec.ts`, etc.
773/// - Go: `*_test.go`
774/// - Rust: `*_test.rs`, `tests.rs`
775/// - Ruby: `*_spec.rb`
776fn is_test_file(file: &str) -> bool {
777    let path_lower = file.to_lowercase();
778
779    // Common test directory patterns (check both embedded and prefix positions)
780    if path_lower.contains("/test/")
781        || path_lower.contains("/tests/")
782        || path_lower.contains("/__tests__/")
783        || path_lower.contains("/spec/")
784        || path_lower.contains("/specs/")
785        || path_lower.starts_with("test/")
786        || path_lower.starts_with("tests/")
787        || path_lower.starts_with("__tests__/")
788        || path_lower.starts_with("spec/")
789        || path_lower.starts_with("specs/")
790    {
791        return true;
792    }
793
794    // Extract filename (handle both Unix and Windows paths)
795    let filename = file
796        .rsplit(|c| c == '/' || c == '\\')
797        .next()
798        .unwrap_or(file);
799    let filename_lower = filename.to_lowercase();
800
801    // Python test patterns: test_*.py, *_test.py
802    if filename_lower.starts_with("test_") && filename_lower.ends_with(".py") {
803        return true;
804    }
805    if filename_lower.ends_with("_test.py") {
806        return true;
807    }
808
809    // Java test patterns (case-sensitive for proper conventions)
810    // JUnit convention: UserTest.java, ServiceTest.java
811    if filename.ends_with("Test.java") {
812        return true;
813    }
814    // TestNG convention: TestUser.java, TestService.java
815    // Requires "Test" + uppercase letter to avoid matching "Testing...", "Testable...", etc.
816    if filename.starts_with("Test")
817        && filename.ends_with(".java")
818        && filename.len() > 9
819        && filename.chars().nth(4).map_or(false, |c| c.is_uppercase())
820    {
821        return true;
822    }
823    // Python-style in Java: test_user.java (lowercase check)
824    if filename_lower.starts_with("test_") && filename_lower.ends_with(".java") {
825        return true;
826    }
827
828    // Kotlin test patterns (mirrors Java conventions)
829    if filename.ends_with("Test.kt") {
830        return true;
831    }
832    // TestNG convention for Kotlin: TestUser.kt, TestService.kt
833    if filename.starts_with("Test")
834        && filename.ends_with(".kt")
835        && filename.len() > 7
836        && filename.chars().nth(4).map_or(false, |c| c.is_uppercase())
837    {
838        return true;
839    }
840    if filename_lower.starts_with("test_") && filename_lower.ends_with(".kt") {
841        return true;
842    }
843
844    // JavaScript/TypeScript test patterns
845    if filename_lower.ends_with(".test.js")
846        || filename_lower.ends_with(".test.ts")
847        || filename_lower.ends_with(".test.jsx")
848        || filename_lower.ends_with(".test.tsx")
849        || filename_lower.ends_with(".spec.js")
850        || filename_lower.ends_with(".spec.ts")
851        || filename_lower.ends_with(".spec.jsx")
852        || filename_lower.ends_with(".spec.tsx")
853    {
854        return true;
855    }
856
857    // Go test pattern: *_test.go
858    if filename_lower.ends_with("_test.go") {
859        return true;
860    }
861
862    // Rust test patterns: *_test.rs, tests.rs
863    if filename_lower.ends_with("_test.rs") || filename_lower == "tests.rs" {
864        return true;
865    }
866
867    // Ruby test patterns: *_spec.rb, *_test.rb
868    if filename_lower.ends_with("_spec.rb") || filename_lower.ends_with("_test.rb") {
869        return true;
870    }
871
872    // C# test patterns: *Tests.cs, *Test.cs
873    if filename.ends_with("Test.cs") || filename.ends_with("Tests.cs") {
874        return true;
875    }
876
877    // Generic test_ prefix (catches remaining cases)
878    if filename_lower.starts_with("test_") {
879        return true;
880    }
881
882    false
883}
884
885/// Simple glob matching for file patterns.
886///
887/// Supports:
888/// - `*` matches any sequence of characters except `/`
889/// - `**` matches any sequence including `/`
890/// - `?` matches any single character
891fn glob_match(pattern: &str, path: &str) -> bool {
892    // Normalize paths for consistent matching
893    let pattern = pattern.replace('\\', "/");
894    let path = path.replace('\\', "/");
895
896    glob_match_recursive(&pattern, &path)
897}
898
899fn glob_match_recursive(pattern: &str, path: &str) -> bool {
900    let mut pat_chars = pattern.chars().peekable();
901    let mut path_chars = path.chars().peekable();
902
903    while let Some(pc) = pat_chars.next() {
904        match pc {
905            '*' => {
906                // Check for **
907                if pat_chars.peek() == Some(&'*') {
908                    pat_chars.next(); // consume second *
909
910                    // Skip optional path separator after **
911                    if pat_chars.peek() == Some(&'/') {
912                        pat_chars.next();
913                    }
914
915                    let remaining_pattern: String = pat_chars.collect();
916
917                    // ** matches everything
918                    if remaining_pattern.is_empty() {
919                        return true;
920                    }
921
922                    // Try matching remaining pattern at every position
923                    let remaining_path: String = path_chars.collect();
924                    for i in 0..=remaining_path.len() {
925                        if glob_match_recursive(&remaining_pattern, &remaining_path[i..]) {
926                            return true;
927                        }
928                    }
929                    return false;
930                } else {
931                    // Single * - match until next /
932                    let remaining_pattern: String = pat_chars.collect();
933
934                    if remaining_pattern.is_empty() {
935                        // * at end matches rest of current segment
936                        return !path_chars.any(|c| c == '/');
937                    }
938
939                    // Try matching remaining pattern after consuming non-/ chars
940                    let remaining_path: String = path_chars.collect();
941                    for i in 0..=remaining_path.len() {
942                        if i > 0 && remaining_path.chars().nth(i - 1) == Some('/') {
943                            break; // * doesn't match across /
944                        }
945                        if glob_match_recursive(&remaining_pattern, &remaining_path[i..]) {
946                            return true;
947                        }
948                    }
949                    return false;
950                }
951            }
952            '?' => {
953                // Match any single character except /
954                match path_chars.next() {
955                    Some(c) if c != '/' => continue,
956                    _ => return false,
957                }
958            }
959            c => {
960                // Literal character match
961                match path_chars.next() {
962                    Some(pc) if pc == c => continue,
963                    _ => return false,
964                }
965            }
966        }
967    }
968
969    // Pattern consumed - path should also be consumed
970    path_chars.next().is_none()
971}
972
973#[cfg(test)]
974mod tests {
975    use super::*;
976    use crate::callgraph::types::CallEdge;
977
978    fn create_test_graph() -> CallGraph {
979        let mut graph = CallGraph::default();
980
981        // Build a call chain: main -> process -> validate -> helper
982        let edges = vec![
983            CallEdge {
984                caller: FunctionRef {
985                    file: "src/main.py".to_string(),
986                    name: "main".to_string(),
987                    qualified_name: None,
988                },
989                callee: FunctionRef {
990                    file: "src/process.py".to_string(),
991                    name: "process".to_string(),
992                    qualified_name: None,
993                },
994                call_line: 10,
995            },
996            CallEdge {
997                caller: FunctionRef {
998                    file: "src/process.py".to_string(),
999                    name: "process".to_string(),
1000                    qualified_name: None,
1001                },
1002                callee: FunctionRef {
1003                    file: "src/validate.py".to_string(),
1004                    name: "validate".to_string(),
1005                    qualified_name: None,
1006                },
1007                call_line: 25,
1008            },
1009            CallEdge {
1010                caller: FunctionRef {
1011                    file: "src/validate.py".to_string(),
1012                    name: "validate".to_string(),
1013                    qualified_name: None,
1014                },
1015                callee: FunctionRef {
1016                    file: "src/helper.py".to_string(),
1017                    name: "helper".to_string(),
1018                    qualified_name: None,
1019                },
1020                call_line: 15,
1021            },
1022            // Additional caller of helper from tests
1023            CallEdge {
1024                caller: FunctionRef {
1025                    file: "tests/test_helper.py".to_string(),
1026                    name: "test_helper".to_string(),
1027                    qualified_name: None,
1028                },
1029                callee: FunctionRef {
1030                    file: "src/helper.py".to_string(),
1031                    name: "helper".to_string(),
1032                    qualified_name: None,
1033                },
1034                call_line: 8,
1035            },
1036        ];
1037
1038        graph.edges = edges;
1039        graph.build_indexes();
1040        graph
1041    }
1042
1043    #[test]
1044    fn test_basic_impact_analysis() {
1045        let graph = create_test_graph();
1046        let config = ImpactConfig::new().with_depth(10);
1047        let result = analyze_impact(&graph, "helper", config);
1048
1049        assert_eq!(result.target, "helper");
1050        assert_eq!(result.total_affected, 4); // validate, process, main, test_helper
1051    }
1052
1053    #[test]
1054    fn test_impact_with_depth_limit() {
1055        let graph = create_test_graph();
1056        // depth=1 means: explore distance 1 nodes, which discovers distance 2 callers
1057        // With the call chain: main -> process -> validate -> helper
1058        // - validate, test_helper are at distance 1 (direct callers)
1059        // - process is at distance 2 (found by exploring validate)
1060        let config = ImpactConfig::new().with_depth(1);
1061        let result = analyze_impact(&graph, "helper", config);
1062
1063        // Distance 1 callers (validate, test_helper) + distance 2 (process)
1064        assert_eq!(result.total_affected, 3);
1065        assert!(result.callers.iter().all(|c| c.distance <= 2));
1066
1067        // Verify we have both distance 1 and distance 2 callers
1068        let distance_1_count = result.callers.iter().filter(|c| c.distance == 1).count();
1069        let distance_2_count = result.callers.iter().filter(|c| c.distance == 2).count();
1070        assert_eq!(distance_1_count, 2); // validate, test_helper
1071        assert_eq!(distance_2_count, 1); // process
1072    }
1073
1074    #[test]
1075    fn test_impact_depth_zero_no_traversal() {
1076        // BUG-014 FIX: Verify max_depth=0 means NO traversal (Python semantics)
1077        // Previously, max_depth=0 was incorrectly converted to usize::MAX (unlimited)
1078        let graph = create_test_graph();
1079        // depth=0 means: NO exploration - return only direct callers, don't explore them
1080        // With the call chain: main -> process -> validate -> helper
1081        // - validate, test_helper are at distance 1 (direct callers) - INCLUDED
1082        // - process, main should NOT be found because we don't explore distance 1 nodes
1083        let config = ImpactConfig::new().with_depth(0);
1084        let result = analyze_impact(&graph, "helper", config);
1085
1086        // Should only have direct callers (distance 1), no further exploration
1087        assert_eq!(result.total_affected, 2); // validate and test_helper only
1088        assert!(
1089            result.callers.iter().all(|c| c.distance == 1),
1090            "All callers should be at distance 1 when max_depth=0"
1091        );
1092
1093        // Verify we have the right direct callers
1094        let caller_names: Vec<&str> = result.callers.iter().map(|c| c.name.as_str()).collect();
1095        assert!(
1096            caller_names.contains(&"validate"),
1097            "validate should be a direct caller"
1098        );
1099        assert!(
1100            caller_names.contains(&"test_helper"),
1101            "test_helper should be a direct caller"
1102        );
1103
1104        // Verify we do NOT have indirect callers (would be present if bug existed)
1105        assert!(
1106            !caller_names.contains(&"process"),
1107            "process should NOT be found with max_depth=0"
1108        );
1109        assert!(
1110            !caller_names.contains(&"main"),
1111            "main should NOT be found with max_depth=0"
1112        );
1113    }
1114
1115    #[test]
1116    fn test_impact_exclude_tests() {
1117        let graph = create_test_graph();
1118        let config = ImpactConfig::new().with_depth(10).exclude_tests();
1119        let result = analyze_impact(&graph, "helper", config);
1120
1121        // Should exclude test_helper
1122        assert_eq!(result.total_affected, 3);
1123        assert!(!result.callers.iter().any(|c| c.name == "test_helper"));
1124    }
1125
1126    #[test]
1127    fn test_impact_language_filter() {
1128        let graph = create_test_graph();
1129        let config = ImpactConfig::new().with_depth(10).with_language("python");
1130        let result = analyze_impact(&graph, "helper", config);
1131
1132        // All files are .py, so all should match
1133        assert_eq!(result.total_affected, 4);
1134    }
1135
1136    #[test]
1137    fn test_impact_distance_tracking() {
1138        let graph = create_test_graph();
1139        let config = ImpactConfig::new().with_depth(10).exclude_tests();
1140        let result = analyze_impact(&graph, "helper", config);
1141
1142        // Check distances
1143        let validate = result
1144            .callers
1145            .iter()
1146            .find(|c| c.name == "validate")
1147            .unwrap();
1148        assert_eq!(validate.distance, 1);
1149
1150        let process = result.callers.iter().find(|c| c.name == "process").unwrap();
1151        assert_eq!(process.distance, 2);
1152
1153        let main_fn = result.callers.iter().find(|c| c.name == "main").unwrap();
1154        assert_eq!(main_fn.distance, 3);
1155    }
1156
1157    #[test]
1158    fn test_impact_nonexistent_target() {
1159        let graph = create_test_graph();
1160        let config = ImpactConfig::new();
1161        let result = analyze_impact(&graph, "nonexistent", config);
1162
1163        assert_eq!(result.total_affected, 0);
1164        assert!(result.callers.is_empty());
1165    }
1166
1167    #[test]
1168    fn test_glob_match() {
1169        // Basic matches
1170        assert!(glob_match("*.py", "test.py"));
1171        assert!(!glob_match("*.py", "test.rs"));
1172
1173        // Double star
1174        assert!(glob_match("**/*.py", "src/lib/test.py"));
1175        assert!(glob_match("**/test/**", "foo/test/bar.py"));
1176
1177        // Question mark
1178        assert!(glob_match("test?.py", "test1.py"));
1179        assert!(!glob_match("test?.py", "test12.py"));
1180    }
1181
1182    #[test]
1183    fn test_is_test_file() {
1184        // Directory-based detection
1185        assert!(is_test_file("tests/test_main.py"));
1186        assert!(is_test_file("src/__tests__/component.test.ts"));
1187        assert!(is_test_file("test/helper_test.go"));
1188        assert!(is_test_file("src/spec/model_spec.rb"));
1189        assert!(is_test_file("specs/integration.rb"));
1190
1191        // Python patterns
1192        assert!(is_test_file("test_main.py"));
1193        assert!(is_test_file("src/test_utils.py"));
1194        assert!(is_test_file("helper_test.py"));
1195
1196        // Java patterns (BUG-008 fix: JUnit and TestNG conventions)
1197        assert!(is_test_file("UserTest.java")); // JUnit convention
1198        assert!(is_test_file("src/UserServiceTest.java"));
1199        assert!(is_test_file("TestUser.java")); // TestNG convention
1200        assert!(is_test_file("src/TestUserService.java"));
1201        assert!(is_test_file("test_user.java")); // Python-style
1202        assert!(is_test_file("Test.java")); // Matches JUnit pattern (*Test.java)
1203        assert!(!is_test_file("Contest.java")); // Contains "test" but not a test file
1204
1205        // Kotlin patterns
1206        assert!(is_test_file("UserTest.kt"));
1207        assert!(is_test_file("TestUser.kt"));
1208        assert!(is_test_file("test_user.kt"));
1209        assert!(is_test_file("Test.kt")); // Matches JUnit pattern (*Test.kt)
1210
1211        // JavaScript/TypeScript patterns
1212        assert!(is_test_file("component.test.js"));
1213        assert!(is_test_file("component.test.ts"));
1214        assert!(is_test_file("component.test.jsx"));
1215        assert!(is_test_file("component.test.tsx"));
1216        assert!(is_test_file("component.spec.js"));
1217        assert!(is_test_file("component.spec.ts"));
1218
1219        // Go patterns
1220        assert!(is_test_file("main_test.go"));
1221        assert!(is_test_file("handler_test.go"));
1222
1223        // Rust patterns
1224        assert!(is_test_file("parser_test.rs"));
1225        assert!(is_test_file("tests.rs"));
1226
1227        // Ruby patterns
1228        assert!(is_test_file("model_spec.rb"));
1229        assert!(is_test_file("helper_test.rb"));
1230
1231        // C# patterns
1232        assert!(is_test_file("UserTest.cs"));
1233        assert!(is_test_file("UserTests.cs"));
1234
1235        // Non-test files
1236        assert!(!is_test_file("src/main.py"));
1237        assert!(!is_test_file("lib/process.rs"));
1238        assert!(!is_test_file("src/TestingFramework.java")); // Not a test, just has "Testing" in name
1239        assert!(!is_test_file("src/LatestNews.java")); // Contains "test" substring
1240    }
1241
1242    #[test]
1243    fn test_llm_context_output() {
1244        let graph = create_test_graph();
1245        let config = ImpactConfig::new().with_depth(10).exclude_tests();
1246        let result = analyze_impact(&graph, "helper", config);
1247
1248        let context = result.to_llm_context();
1249
1250        assert!(context.contains("# Impact Analysis: helper"));
1251        assert!(context.contains("Distance 1"));
1252        assert!(context.contains("validate"));
1253        assert!(context.contains("Distance 2"));
1254        assert!(context.contains("process"));
1255    }
1256
1257    /// Test that functions with the same name in different files are NOT conflated.
1258    /// This is BUG-010: Name-only matching was incorrectly treating callers of
1259    /// a.py:helper and b.py:helper as callers of the same function.
1260    #[test]
1261    fn test_same_name_different_files_not_conflated() {
1262        let mut graph = CallGraph::default();
1263
1264        // Create two helper functions in different files
1265        // a.py:helper is called by caller_a
1266        // b.py:helper is called by caller_b
1267        let edges = vec![
1268            CallEdge {
1269                caller: FunctionRef {
1270                    file: "src/caller_a.py".to_string(),
1271                    name: "caller_a".to_string(),
1272                    qualified_name: None,
1273                },
1274                callee: FunctionRef {
1275                    file: "src/a.py".to_string(),
1276                    name: "helper".to_string(),
1277                    qualified_name: None,
1278                },
1279                call_line: 10,
1280            },
1281            CallEdge {
1282                caller: FunctionRef {
1283                    file: "src/caller_b.py".to_string(),
1284                    name: "caller_b".to_string(),
1285                    qualified_name: None,
1286                },
1287                callee: FunctionRef {
1288                    file: "src/b.py".to_string(),
1289                    name: "helper".to_string(),
1290                    qualified_name: None,
1291                },
1292                call_line: 20,
1293            },
1294            // Also add transitive callers to test BFS doesn't conflate
1295            CallEdge {
1296                caller: FunctionRef {
1297                    file: "src/main_a.py".to_string(),
1298                    name: "main_a".to_string(),
1299                    qualified_name: None,
1300                },
1301                callee: FunctionRef {
1302                    file: "src/caller_a.py".to_string(),
1303                    name: "caller_a".to_string(),
1304                    qualified_name: None,
1305                },
1306                call_line: 5,
1307            },
1308            CallEdge {
1309                caller: FunctionRef {
1310                    file: "src/main_b.py".to_string(),
1311                    name: "main_b".to_string(),
1312                    qualified_name: None,
1313                },
1314                callee: FunctionRef {
1315                    file: "src/caller_b.py".to_string(),
1316                    name: "caller_b".to_string(),
1317                    qualified_name: None,
1318                },
1319                call_line: 15,
1320            },
1321        ];
1322
1323        graph.edges = edges;
1324        graph.build_indexes();
1325
1326        // Analyze impact of a.py:helper - should only find caller_a and main_a
1327        let config = ImpactConfig::new().with_depth(10);
1328
1329        // Search for the specific file's helper by using find_matching_targets behavior
1330        // When searching for "helper", both a.py:helper and b.py:helper match
1331        // BUT their callers should be tracked separately
1332        let result = analyze_impact(&graph, "helper", config);
1333
1334        // With the fix: searching for "helper" matches BOTH a.py:helper and b.py:helper
1335        // So we get callers of both: caller_a, caller_b, main_a, main_b
1336        assert_eq!(result.total_affected, 4);
1337
1338        // Verify caller_a is at distance 1 (direct caller of a.py:helper)
1339        let caller_a = result
1340            .callers
1341            .iter()
1342            .find(|c| c.name == "caller_a")
1343            .expect("caller_a should be found");
1344        assert_eq!(caller_a.distance, 1);
1345        assert_eq!(caller_a.file, "src/caller_a.py");
1346
1347        // Verify caller_b is at distance 1 (direct caller of b.py:helper)
1348        let caller_b = result
1349            .callers
1350            .iter()
1351            .find(|c| c.name == "caller_b")
1352            .expect("caller_b should be found");
1353        assert_eq!(caller_b.distance, 1);
1354        assert_eq!(caller_b.file, "src/caller_b.py");
1355
1356        // Verify main_a is at distance 2 (calls caller_a which calls a.py:helper)
1357        let main_a = result
1358            .callers
1359            .iter()
1360            .find(|c| c.name == "main_a")
1361            .expect("main_a should be found");
1362        assert_eq!(main_a.distance, 2);
1363
1364        // Verify main_b is at distance 2 (calls caller_b which calls b.py:helper)
1365        let main_b = result
1366            .callers
1367            .iter()
1368            .find(|c| c.name == "main_b")
1369            .expect("main_b should be found");
1370        assert_eq!(main_b.distance, 2);
1371
1372        // CRITICAL: main_a should NOT be reachable from b.py:helper's call chain
1373        // and main_b should NOT be reachable from a.py:helper's call chain
1374        // Before the fix, name-only matching would have made them appear at wrong distances
1375        // or created false transitive relationships.
1376    }
1377
1378    /// Test that targeting a specific file's function only returns its callers.
1379    #[test]
1380    fn test_qualified_target_isolates_file() {
1381        let mut graph = CallGraph::default();
1382
1383        // Same setup: two helper functions in different files
1384        let edges = vec![
1385            CallEdge {
1386                caller: FunctionRef {
1387                    file: "src/caller_a.py".to_string(),
1388                    name: "caller_a".to_string(),
1389                    qualified_name: Some("module_a.caller_a".to_string()),
1390                },
1391                callee: FunctionRef {
1392                    file: "src/a.py".to_string(),
1393                    name: "helper".to_string(),
1394                    qualified_name: Some("module_a.helper".to_string()),
1395                },
1396                call_line: 10,
1397            },
1398            CallEdge {
1399                caller: FunctionRef {
1400                    file: "src/caller_b.py".to_string(),
1401                    name: "caller_b".to_string(),
1402                    qualified_name: Some("module_b.caller_b".to_string()),
1403                },
1404                callee: FunctionRef {
1405                    file: "src/b.py".to_string(),
1406                    name: "helper".to_string(),
1407                    qualified_name: Some("module_b.helper".to_string()),
1408                },
1409                call_line: 20,
1410            },
1411        ];
1412
1413        graph.edges = edges;
1414        graph.build_indexes();
1415
1416        // Search by qualified name - should only find caller_a
1417        let config = ImpactConfig::new().with_depth(10);
1418        let result = analyze_impact(&graph, "module_a.helper", config.clone());
1419
1420        // Should only find caller_a, not caller_b
1421        assert_eq!(result.total_affected, 1);
1422        assert_eq!(result.callers[0].name, "caller_a");
1423        assert_eq!(result.callers[0].file, "src/caller_a.py");
1424
1425        // Search for module_b.helper - should only find caller_b
1426        let result_b = analyze_impact(&graph, "module_b.helper", config);
1427        assert_eq!(result_b.total_affected, 1);
1428        assert_eq!(result_b.callers[0].name, "caller_b");
1429        assert_eq!(result_b.callers[0].file, "src/caller_b.py");
1430    }
1431
1432    /// Create a diamond-shaped call graph for testing path deduplication.
1433    ///
1434    /// ```text
1435    ///     A
1436    ///    / \
1437    ///   B   C
1438    ///    \ /
1439    ///     D (target)
1440    /// ```
1441    fn create_diamond_graph() -> CallGraph {
1442        let mut graph = CallGraph::default();
1443
1444        let edges = vec![
1445            // D is called by B and C (two paths to reach A)
1446            CallEdge {
1447                caller: FunctionRef {
1448                    file: "src/b.py".to_string(),
1449                    name: "B".to_string(),
1450                    qualified_name: None,
1451                },
1452                callee: FunctionRef {
1453                    file: "src/d.py".to_string(),
1454                    name: "D".to_string(),
1455                    qualified_name: None,
1456                },
1457                call_line: 10,
1458            },
1459            CallEdge {
1460                caller: FunctionRef {
1461                    file: "src/c.py".to_string(),
1462                    name: "C".to_string(),
1463                    qualified_name: None,
1464                },
1465                callee: FunctionRef {
1466                    file: "src/d.py".to_string(),
1467                    name: "D".to_string(),
1468                    qualified_name: None,
1469                },
1470                call_line: 20,
1471            },
1472            // A calls both B and C
1473            CallEdge {
1474                caller: FunctionRef {
1475                    file: "src/a.py".to_string(),
1476                    name: "A".to_string(),
1477                    qualified_name: None,
1478                },
1479                callee: FunctionRef {
1480                    file: "src/b.py".to_string(),
1481                    name: "B".to_string(),
1482                    qualified_name: None,
1483                },
1484                call_line: 5,
1485            },
1486            CallEdge {
1487                caller: FunctionRef {
1488                    file: "src/a.py".to_string(),
1489                    name: "A".to_string(),
1490                    qualified_name: None,
1491                },
1492                callee: FunctionRef {
1493                    file: "src/c.py".to_string(),
1494                    name: "C".to_string(),
1495                    qualified_name: None,
1496                },
1497                call_line: 6,
1498            },
1499        ];
1500
1501        graph.edges = edges;
1502        graph.build_indexes();
1503        graph
1504    }
1505
1506    /// Test BFS behavior (deduplicate_paths=true, default) on diamond graph.
1507    ///
1508    /// BFS with global visited: A appears once at distance 2 via the first explored path.
1509    #[test]
1510    fn test_diamond_graph_bfs_deduplication() {
1511        let graph = create_diamond_graph();
1512        let config = ImpactConfig::new().with_depth(10); // Default: deduplicate_paths = true
1513
1514        let result = analyze_impact(&graph, "D", config);
1515
1516        // B and C are direct callers (distance 1)
1517        // A is at distance 2 (caller of B and C)
1518        assert_eq!(result.total_affected, 3);
1519
1520        // Verify B and C are at distance 1
1521        let b = result.callers.iter().find(|c| c.name == "B").unwrap();
1522        assert_eq!(b.distance, 1);
1523
1524        let c = result.callers.iter().find(|c| c.name == "C").unwrap();
1525        assert_eq!(c.distance, 1);
1526
1527        // Verify A is at distance 2 (via whichever path was explored first)
1528        let a = result.callers.iter().find(|c| c.name == "A").unwrap();
1529        assert_eq!(a.distance, 2);
1530
1531        // With BFS deduplication, A appears exactly once
1532        let a_count = result.callers.iter().filter(|c| c.name == "A").count();
1533        assert_eq!(a_count, 1);
1534    }
1535
1536    /// Test DFS behavior (deduplicate_paths=false) on diamond graph.
1537    ///
1538    /// DFS with per-path visited: All paths explored, A recorded with minimum distance.
1539    #[test]
1540    fn test_diamond_graph_dfs_all_paths() {
1541        let graph = create_diamond_graph();
1542        let config = ImpactConfig::new()
1543            .with_depth(10)
1544            .explore_all_paths(); // deduplicate_paths = false
1545
1546        let result = analyze_impact(&graph, "D", config);
1547
1548        // Same result count - A appears once in output (with aggregated info)
1549        assert_eq!(result.total_affected, 3);
1550
1551        // Verify B and C are at distance 1
1552        let b = result.callers.iter().find(|c| c.name == "B").unwrap();
1553        assert_eq!(b.distance, 1);
1554
1555        let c = result.callers.iter().find(|c| c.name == "C").unwrap();
1556        assert_eq!(c.distance, 1);
1557
1558        // Verify A is at distance 2 (minimum across all paths)
1559        let a = result.callers.iter().find(|c| c.name == "A").unwrap();
1560        assert_eq!(a.distance, 2);
1561
1562        // A still appears once in output (but both paths were explored internally)
1563        let a_count = result.callers.iter().filter(|c| c.name == "A").count();
1564        assert_eq!(a_count, 1);
1565    }
1566
1567    /// Test that DFS mode aggregates call sites from all paths.
1568    #[test]
1569    fn test_diamond_graph_dfs_aggregates_call_sites() {
1570        let graph = create_diamond_graph();
1571        let config = ImpactConfig::new()
1572            .with_depth(10)
1573            .with_call_sites()
1574            .explore_all_paths();
1575
1576        let result = analyze_impact(&graph, "D", config);
1577
1578        // A calls B at line 5 and C at line 6
1579        // In DFS mode, A's call sites should include both paths' call info
1580        let a = result.callers.iter().find(|c| c.name == "A").unwrap();
1581
1582        // A should have call site info from both paths (lines 5 and 6 where it calls B and C)
1583        // Note: The call sites recorded are where A calls B/C, not where B/C call D
1584        assert_eq!(a.call_sites.len(), 2, "A should have 2 call sites (via B and C)");
1585        assert!(a.call_sites.contains(&5), "Should include call site line 5 (A->B)");
1586        assert!(a.call_sites.contains(&6), "Should include call site line 6 (A->C)");
1587    }
1588
1589    /// Test that BFS mode may miss some call sites in diamond patterns.
1590    #[test]
1591    fn test_diamond_graph_bfs_call_sites() {
1592        let graph = create_diamond_graph();
1593        let config = ImpactConfig::new()
1594            .with_depth(10)
1595            .with_call_sites(); // Default: deduplicate_paths = true
1596
1597        let result = analyze_impact(&graph, "D", config);
1598
1599        // With BFS, A is visited via the first path only
1600        // So it may only have one call site (depends on traversal order)
1601        let a = result.callers.iter().find(|c| c.name == "A").unwrap();
1602
1603        // BFS records call site from the first path that discovers A
1604        assert!(
1605            a.call_sites.len() >= 1,
1606            "A should have at least 1 call site"
1607        );
1608    }
1609
1610    /// Test that explore_all_paths builder method works correctly.
1611    #[test]
1612    fn test_explore_all_paths_builder() {
1613        let config = ImpactConfig::new().explore_all_paths();
1614        assert!(!config.deduplicate_paths);
1615
1616        let config2 = ImpactConfig::new().with_deduplicate_paths(false);
1617        assert!(!config2.deduplicate_paths);
1618
1619        let config3 = ImpactConfig::new().with_deduplicate_paths(true);
1620        assert!(config3.deduplicate_paths);
1621    }
1622
1623    /// Test default config has deduplicate_paths=true for backward compatibility.
1624    #[test]
1625    fn test_default_config_backward_compatible() {
1626        let config = ImpactConfig::default();
1627        assert!(
1628            config.deduplicate_paths,
1629            "Default should use deduplication for backward compatibility"
1630        );
1631
1632        let config2 = ImpactConfig::new();
1633        assert!(
1634            config2.deduplicate_paths,
1635            "ImpactConfig::new() should also use deduplication"
1636        );
1637    }
1638}