go_brrr/callgraph/
dead.rs

1//! Dead code detection.
2//!
3//! Identifies unreachable functions in a codebase by computing reachability
4//! from detected entry points. Uses heuristics to minimize false positives
5//! from callbacks, event handlers, and dynamically-invoked functions.
6//!
7//! # Algorithm
8//!
9//! 1. Detect entry points (main, tests, CLI handlers, API endpoints, exports)
10//! 2. BFS traversal from entry points marking all reachable functions
11//! 3. Filter remaining functions for false positives (callbacks, handlers)
12//! 4. Return truly unreachable functions as dead code
13//!
14//! # Entry Point Patterns
15//!
16//! - Main functions: `main`, `Main`, `run`, `app`, `start`
17//! - Test functions: `test_*`, `Test*`, `*_test`, `*Tests`, `spec_*`
18//! - CLI handlers: `cmd_*`, `handle_*`, `run_*`, `execute_*`
19//! - API endpoints: `api_*`, `get_*`, `post_*`, `put_*`, `delete_*`, `patch_*`
20//! - Framework hooks: `setup`, `teardown`, `init`, `cleanup`, `configure`
21//! - Pytest hooks: `pytest_*` (pytest_configure, pytest_collection, pytest_runtest_setup, etc.)
22//! - Python dunder: `__init__`, `__main__`, `__call__`, etc.
23
24use std::collections::{HashSet, VecDeque};
25
26use serde::{Deserialize, Serialize};
27
28use crate::callgraph::types::{CallGraph, FunctionRef};
29
30/// Result of dead code analysis.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct DeadCodeResult {
33    /// Functions identified as dead (unreachable)
34    pub dead_functions: Vec<DeadFunction>,
35    /// Total count
36    pub total_dead: usize,
37    /// Entry points used for analysis
38    pub entry_points: Vec<String>,
39    /// Functions filtered as likely false positives
40    pub filtered_count: usize,
41    /// Analysis statistics
42    pub stats: DeadCodeStats,
43}
44
45/// A dead (unreachable) function.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DeadFunction {
48    /// File containing the function
49    pub file: String,
50    /// Function name
51    pub name: String,
52    /// Fully qualified name
53    pub qualified_name: Option<String>,
54    /// Line number
55    pub line: Option<usize>,
56    /// Reason this function is considered dead
57    pub reason: DeadReason,
58    /// Confidence score (0.0 - 1.0)
59    pub confidence: f64,
60}
61
62/// Reason why a function is considered dead.
63#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
64pub enum DeadReason {
65    /// Not reachable from any entry point
66    Unreachable,
67    /// Defined but never called anywhere
68    NeverCalled,
69    /// Only called from other dead functions
70    CalledOnlyByDead,
71}
72
73/// Statistics from dead code analysis.
74#[derive(Debug, Clone, Default, Serialize, Deserialize)]
75pub struct DeadCodeStats {
76    /// Total functions analyzed
77    pub total_functions: usize,
78    /// Functions marked as entry points
79    pub entry_point_count: usize,
80    /// Functions reachable from entry points
81    pub reachable_count: usize,
82    /// Functions filtered as false positives
83    pub filtered_as_callback: usize,
84    pub filtered_as_handler: usize,
85    pub filtered_as_decorator: usize,
86    pub filtered_as_dynamic: usize,
87}
88
89/// Configuration for dead code detection.
90#[derive(Debug, Clone)]
91pub struct DeadCodeConfig {
92    /// Minimum confidence threshold for reporting (0.0 - 1.0)
93    pub min_confidence: f64,
94    /// Additional entry point patterns (regex-like simple patterns)
95    pub extra_entry_patterns: Vec<String>,
96    /// Additional false positive patterns to filter
97    pub filter_patterns: Vec<String>,
98    /// Language-specific mode (enables language-specific heuristics)
99    pub language: Option<String>,
100    /// Include functions matching public API patterns (get_, set_, PascalCase, etc.) as entry points.
101    ///
102    /// WARNING: This is EXTREMELY permissive and causes massive false negatives!
103    /// The `is_likely_public_api()` function matches 50-70% of functions because it includes:
104    /// - Any function starting with uppercase (PascalCase) - matches most Java/Go/Rust functions
105    /// - get_/set_ prefixed functions
106    /// - is_/has_/can_/should_ prefixed functions
107    /// - with_/from_/to_/as_ prefixed functions
108    ///
109    /// Default: false (matches Python `brrr dead` behavior which only checks specific patterns)
110    ///
111    /// Only enable this if you want to be extremely conservative and avoid false positives,
112    /// at the cost of missing most actual dead code.
113    pub include_public_api_patterns: bool,
114}
115
116impl Default for DeadCodeConfig {
117    fn default() -> Self {
118        Self {
119            min_confidence: 0.7,
120            extra_entry_patterns: Vec::new(),
121            filter_patterns: Vec::new(),
122            language: None,
123            include_public_api_patterns: false, // Off by default - too permissive!
124        }
125    }
126}
127
128/// Entry point category for classification.
129#[derive(Debug, Clone, Copy, PartialEq)]
130pub enum EntryPointKind {
131    Main,
132    Test,
133    CliHandler,
134    ApiEndpoint,
135    FrameworkHook,
136    PythonDunder,
137}
138
139/// Detect entry points in the call graph with custom configuration.
140///
141/// Entry points are functions that are never called but are likely
142/// intentionally public (main, test functions, exported API, etc.)
143///
144/// This function uses comprehensive heuristics to identify entry points
145/// across multiple programming paradigms and frameworks.
146pub fn detect_entry_points_with_config(graph: &CallGraph, config: &DeadCodeConfig) -> Vec<FunctionRef> {
147    let all_funcs = graph.all_functions();
148    let called: HashSet<_> = graph.edges.iter().map(|e| &e.callee).collect();
149
150    all_funcs
151        .iter()
152        .filter(|f| !called.contains(f) || is_definitely_entry_point(&f.name))
153        .filter(|f| is_likely_entry_point(&f.name, Some(config)))
154        .cloned()
155        .collect()
156}
157
158/// Classify the type of entry point for a function name.
159pub fn classify_entry_point(name: &str) -> Option<EntryPointKind> {
160    // Main entry points
161    if name == "main" || name == "Main" || name == "__main__" {
162        return Some(EntryPointKind::Main);
163    }
164
165    // Test functions - comprehensive patterns
166    if name.starts_with("test_")
167        || name.starts_with("Test")
168        || name.ends_with("_test")
169        || name.ends_with("Test")
170        || name.ends_with("Tests")
171        || name.starts_with("spec_")
172        || name.ends_with("_spec")
173        || name.starts_with("it_")
174        || name.starts_with("should_")
175        || name == "setUp"
176        || name == "tearDown"
177        || name == "setUpClass"
178        || name == "tearDownClass"
179        || name == "beforeEach"
180        || name == "afterEach"
181        || name == "beforeAll"
182        || name == "afterAll"
183    {
184        return Some(EntryPointKind::Test);
185    }
186
187    // Pytest plugin hooks (pytest_configure, pytest_collection, pytest_runtest_setup, etc.)
188    // These are dynamically called by the pytest framework and should not be reported as dead code
189    if name.starts_with("pytest_") {
190        return Some(EntryPointKind::FrameworkHook);
191    }
192
193    // Conftest hooks (conftest.py functions called by pytest)
194    if name.starts_with("conftest_") {
195        return Some(EntryPointKind::FrameworkHook);
196    }
197
198    // CLI handlers
199    if name.starts_with("cmd_")
200        || name.starts_with("handle_")
201        || name.starts_with("run_")
202        || name.starts_with("execute_")
203        || name.starts_with("do_")
204        || name.starts_with("action_")
205        || name.starts_with("command_")
206    {
207        return Some(EntryPointKind::CliHandler);
208    }
209
210    // API endpoints - REST patterns
211    if name.starts_with("api_")
212        || name.starts_with("get_")
213        || name.starts_with("post_")
214        || name.starts_with("put_")
215        || name.starts_with("delete_")
216        || name.starts_with("patch_")
217        || name.starts_with("list_")
218        || name.starts_with("create_")
219        || name.starts_with("update_")
220        || name.starts_with("destroy_")
221        || name.starts_with("index_")
222        || name.starts_with("show_")
223        || name.starts_with("new_")
224        || name.starts_with("edit_")
225    {
226        return Some(EntryPointKind::ApiEndpoint);
227    }
228
229    // Framework hooks and lifecycle methods
230    if name == "setup"
231        || name == "teardown"
232        || name == "init"
233        || name == "cleanup"
234        || name == "configure"
235        || name == "register"
236        || name == "bootstrap"
237        || name == "mount"
238        || name == "unmount"
239        || name == "render"
240        || name == "componentDidMount"
241        || name == "componentWillUnmount"
242        || name == "ngOnInit"
243        || name == "ngOnDestroy"
244        || name == "created"
245        || name == "mounted"
246        || name == "destroyed"
247    {
248        return Some(EntryPointKind::FrameworkHook);
249    }
250
251    // Python dunder methods (special methods)
252    if name.starts_with("__") && name.ends_with("__") {
253        return Some(EntryPointKind::PythonDunder);
254    }
255
256    None
257}
258
259/// Check if a function name looks like an entry point.
260///
261/// When `config` is None, uses conservative defaults (public API patterns disabled).
262/// When `config` is Some, respects the `include_public_api_patterns` and `extra_entry_patterns` settings.
263fn is_likely_entry_point(name: &str, config: Option<&DeadCodeConfig>) -> bool {
264    // Core entry point patterns (always checked)
265    if classify_entry_point(name).is_some() || is_likely_callback(name) || is_likely_factory(name) {
266        return true;
267    }
268
269    // Check user-defined custom entry patterns
270    if let Some(cfg) = config {
271        for pattern in &cfg.extra_entry_patterns {
272            if name.contains(pattern) {
273                return true;
274            }
275        }
276
277        // Only check public API patterns if explicitly enabled (opt-in)
278        // This is disabled by default because it matches 50-70% of functions
279        if cfg.include_public_api_patterns && is_likely_public_api(name) {
280            return true;
281        }
282    }
283
284    false
285}
286
287/// Check if a function name is definitely an entry point regardless of call status.
288fn is_definitely_entry_point(name: &str) -> bool {
289    name == "main"
290        || name == "Main"
291        || name == "__main__"
292        || name == "app"
293        || name == "start"
294        || name == "run"
295}
296
297/// Check if name looks like a callback function (likely dynamically invoked).
298///
299/// Uses strict pattern matching to avoid false positives on common English words
300/// like "once", "online", "only", "ongoing", "handler" (as regular word).
301fn is_likely_callback(name: &str) -> bool {
302    // on_ prefix is always a callback pattern (on_click, on_submit)
303    name.starts_with("on_")
304        // onX where X is uppercase: camelCase callback like onClick, onSubmit
305        // Avoids false positives: once, online, only, ongoing
306        || (name.starts_with("on")
307            && name.len() > 2
308            && name.chars().nth(2).map(|c| c.is_ascii_uppercase()).unwrap_or(false))
309        // OnX where X is uppercase: PascalCase like OnClick, OnSubmit
310        // Avoids false positives: Ongoing, Online, Once (capitalized)
311        || (name.starts_with("On")
312            && name.len() > 2
313            && name.chars().nth(2).map(|c| c.is_ascii_uppercase()).unwrap_or(false))
314        || name.ends_with("_callback")
315        || name.ends_with("Callback")
316        || name.ends_with("_handler")
317        || name.ends_with("Handler")
318        || name.ends_with("_listener")
319        || name.ends_with("Listener")
320        // handleX where X is uppercase: camelCase like handleClick, handleSubmit
321        // Avoids false positives: handler, handling
322        || (name.starts_with("handle")
323            && name.len() > 6
324            && name.chars().nth(6).map(|c| c.is_ascii_uppercase()).unwrap_or(false))
325        // handle_ prefix is always a handler pattern
326        || name.starts_with("handle_")
327        || name.contains("Callback")
328        || name.contains("Handler")
329}
330
331/// Check if name looks like a factory function.
332fn is_likely_factory(name: &str) -> bool {
333    name.starts_with("create_")
334        || name.starts_with("make_")
335        || name.starts_with("build_")
336        || name.starts_with("new_")
337        || name.ends_with("_factory")
338        || name.ends_with("Factory")
339        || name.starts_with("Create")
340        || name.starts_with("Make")
341        || name.starts_with("Build")
342        || name.starts_with("New")
343}
344
345/// Check if name looks like intentionally public API.
346///
347/// Uses strict pattern matching to avoid false positives on common words
348/// like "gettext", "getter", "getaway", "settings", "settle", "setup".
349fn is_likely_public_api(name: &str) -> bool {
350    if name.starts_with('_') {
351        return false;
352    }
353
354    // Getter pattern: get_ or getX (camelCase)
355    // Avoids false positives: gettext, getter, getaway
356    let is_getter = name.starts_with("get_")
357        || (name.starts_with("get")
358            && name.len() > 3
359            && name.chars().nth(3).map(|c| c.is_uppercase()).unwrap_or(false));
360
361    // Setter pattern: set_ or setX (camelCase)
362    // Avoids false positives: settings, settle, setup, setter
363    let is_setter = name.starts_with("set_")
364        || (name.starts_with("set")
365            && name.len() > 3
366            && name.chars().nth(3).map(|c| c.is_uppercase()).unwrap_or(false));
367
368    // Boolean accessor patterns (always use underscore convention)
369    let is_boolean_accessor = name.starts_with("is_")
370        || name.starts_with("has_")
371        || name.starts_with("can_")
372        || name.starts_with("should_");
373
374    // Builder/converter patterns (always use underscore convention)
375    let is_builder = name.starts_with("with_")
376        || name.starts_with("from_")
377        || name.starts_with("to_")
378        || name.starts_with("as_");
379
380    // Public API by naming convention (PascalCase class/type names)
381    let is_public_by_case = name
382        .chars()
383        .next()
384        .map(|c| c.is_uppercase())
385        .unwrap_or(false);
386
387    is_getter || is_setter || is_boolean_accessor || is_builder || is_public_by_case
388}
389
390/// Analyze dead code in a project.
391#[allow(dead_code)]
392pub fn analyze_dead_code(graph: &CallGraph) -> DeadCodeResult {
393    analyze_dead_code_with_config(graph, &DeadCodeConfig::default())
394}
395
396/// Analyze dead code with custom configuration.
397pub fn analyze_dead_code_with_config(graph: &CallGraph, config: &DeadCodeConfig) -> DeadCodeResult {
398    let entry_points = detect_entry_points_with_config(graph, config);
399    let all_funcs = graph.all_functions();
400
401    let mut stats = DeadCodeStats {
402        total_functions: all_funcs.len(),
403        entry_point_count: entry_points.len(),
404        ..Default::default()
405    };
406
407    // Compute reachable functions via BFS
408    let reachable = compute_reachability(graph, &entry_points);
409    stats.reachable_count = reachable.len();
410
411    // Find potentially dead functions
412    let mut potentially_dead: Vec<_> = all_funcs
413        .difference(&reachable)
414        .filter(|f| !entry_points.contains(f))
415        .cloned()
416        .collect();
417
418    // Filter false positives and compute confidence
419    let mut dead_functions = Vec::new();
420    let mut filtered_count = 0;
421
422    for func in potentially_dead.drain(..) {
423        let (is_false_positive, filter_reason) = check_false_positive(&func, config);
424
425        if is_false_positive {
426            filtered_count += 1;
427            match filter_reason.as_str() {
428                "callback" => stats.filtered_as_callback += 1,
429                "handler" => stats.filtered_as_handler += 1,
430                "decorator" => stats.filtered_as_decorator += 1,
431                "dynamic" => stats.filtered_as_dynamic += 1,
432                _ => {}
433            }
434            continue;
435        }
436
437        let confidence = compute_confidence(&func, graph, &reachable);
438
439        if confidence >= config.min_confidence {
440            let reason = determine_dead_reason(&func, graph, &reachable);
441
442            dead_functions.push(DeadFunction {
443                file: func.file.clone(),
444                name: func.name.clone(),
445                qualified_name: func.qualified_name.clone(),
446                line: None, // Would need index for line numbers
447                reason,
448                confidence,
449            });
450        }
451    }
452
453    DeadCodeResult {
454        total_dead: dead_functions.len(),
455        dead_functions,
456        entry_points: entry_points.iter().map(|f| f.name.clone()).collect(),
457        filtered_count,
458        stats,
459    }
460}
461
462/// Compute reachability from entry points using BFS.
463///
464/// Returns the set of all functions reachable from any entry point.
465///
466/// # Performance
467///
468/// Uses index-based BFS to avoid cloning `FunctionRef` during traversal.
469/// Only clones once at the end when building the final result set.
470fn compute_reachability(graph: &CallGraph, entry_points: &[FunctionRef]) -> HashSet<FunctionRef> {
471    use std::collections::HashMap;
472
473    // Collect all functions once and build index mapping (uses cached result)
474    let all_funcs: Vec<FunctionRef> = graph.all_functions().iter().cloned().collect();
475    if all_funcs.is_empty() {
476        return HashSet::new();
477    }
478
479    let func_to_idx: HashMap<&FunctionRef, usize> = all_funcs
480        .iter()
481        .enumerate()
482        .map(|(i, f)| (f, i))
483        .collect();
484
485    // Use Vec<bool> for O(1) visited tracking instead of HashSet lookups
486    let mut visited = vec![false; all_funcs.len()];
487
488    // Initialize queue with entry point indices
489    let mut queue: VecDeque<usize> = entry_points
490        .iter()
491        .filter_map(|f| func_to_idx.get(f).copied())
492        .collect();
493
494    // BFS traversal using indices (no cloning during traversal)
495    while let Some(idx) = queue.pop_front() {
496        if !visited[idx] {
497            visited[idx] = true;
498            let func = &all_funcs[idx];
499
500            // Add all callees to queue
501            if let Some(callees) = graph.callees.get(func) {
502                for callee in callees {
503                    if let Some(&callee_idx) = func_to_idx.get(callee) {
504                        if !visited[callee_idx] {
505                            queue.push_back(callee_idx);
506                        }
507                    }
508                }
509            }
510        }
511    }
512
513    // Convert back to FunctionRef set (single clone per reachable function)
514    visited
515        .into_iter()
516        .enumerate()
517        .filter(|(_, v)| *v)
518        .map(|(i, _)| all_funcs[i].clone())
519        .collect()
520}
521
522/// Check if a function is likely a false positive for dead code.
523///
524/// Returns (is_false_positive, reason).
525fn check_false_positive(func: &FunctionRef, config: &DeadCodeConfig) -> (bool, String) {
526    let name = &func.name;
527
528    // Callback patterns - dynamically invoked
529    // Note: is_likely_callback() already handles on_, onX (camelCase), OnX (PascalCase)
530    // with strict uppercase checks to avoid false positives like "once", "ongoing"
531    if is_likely_callback(name) {
532        return (true, "callback".to_string());
533    }
534
535    // Event-specific patterns NOT covered by is_likely_callback()
536    // (which handles _handler, Handler, _callback, Callback, _listener, Listener)
537    if name.ends_with("_event") || name.ends_with("Event") {
538        return (true, "handler".to_string());
539    }
540
541    // Decorator-registered patterns
542    if name.starts_with("route_")
543        || name.starts_with("endpoint_")
544        || name.starts_with("task_")
545        || name.starts_with("job_")
546        || name.starts_with("signal_")
547        || name.starts_with("hook_")
548    {
549        return (true, "decorator".to_string());
550    }
551
552    // Dynamic dispatch patterns (visitor pattern, dispatch methods)
553    // Note: process_* is intentionally NOT included as it's a common naming
554    // pattern for regular data processing functions
555    if name.starts_with("visit_")
556        || name.starts_with("Visit")
557        || name.starts_with("dispatch_")
558        || name.starts_with("Dispatch")
559        || name.contains("Strategy")
560        || name.contains("Visitor")
561    {
562        return (true, "dynamic".to_string());
563    }
564
565    // Protocol/interface implementation patterns
566    if name.starts_with("impl_") || is_protocol_method(name) {
567        return (true, "dynamic".to_string());
568    }
569
570    // Check user-defined filter patterns
571    for pattern in &config.filter_patterns {
572        if name.contains(pattern) {
573            return (true, "user_filter".to_string());
574        }
575    }
576
577    (false, String::new())
578}
579
580/// Check if name is a common protocol/interface method.
581fn is_protocol_method(name: &str) -> bool {
582    matches!(
583        name,
584        "next"
585            | "iter"
586            | "len"
587            | "hash"
588            | "eq"
589            | "cmp"
590            | "clone"
591            | "drop"
592            | "deref"
593            | "index"
594            | "call"
595            | "enter"
596            | "exit"
597            | "read"
598            | "write"
599            | "close"
600            | "flush"
601            | "seek"
602            | "accept"
603            | "connect"
604            | "bind"
605            | "listen"
606            | "send"
607            | "recv"
608    )
609}
610
611/// Check if file path is NOT in commonly called module paths.
612///
613/// Files outside common paths (api, public, lib, src root) are more likely to contain dead code.
614fn is_in_common_module_path(file_path: &str) -> bool {
615    // Common public-facing module paths that are less likely to have dead code
616    const COMMON_PATHS: &[&str] = &[
617        "/api/",
618        "/public/",
619        "/lib/",
620        "/handlers/",
621        "/routes/",
622        "/controllers/",
623        "/endpoints/",
624        "/views/",
625        "/services/",
626        "/commands/",
627    ];
628
629    for path in COMMON_PATHS {
630        if file_path.contains(path) {
631            return true;
632        }
633    }
634
635    false
636}
637
638/// Compute confidence score for a potentially dead function.
639///
640/// Higher confidence means more likely to be truly dead.
641/// Uses balanced scoring starting from neutral (0.5) with symmetric
642/// increases and decreases based on multiple factors.
643///
644/// # Scoring Factors
645///
646/// **Increases (more likely dead):**
647/// - No callers at all: +0.2
648/// - Calls other dead functions: +0.1 per dead callee (capped at +0.3)
649/// - Private naming convention (_prefix): +0.1
650/// - Not in common module paths: +0.1
651///
652/// **Decreases (less likely dead):**
653/// - Public naming convention (PascalCase): -0.15
654/// - Very short name (likely utility/interface method): -0.1
655/// - Factory/builder pattern: -0.15
656/// - In public module path (/api/, /public/): -0.2
657fn compute_confidence(
658    func: &FunctionRef,
659    graph: &CallGraph,
660    reachable: &HashSet<FunctionRef>,
661) -> f64 {
662    let mut confidence: f64 = 0.5; // Start neutral
663    let name = &func.name;
664
665    // === INCREASE confidence (more likely dead) ===
666
667    // Has no callers at all - strong indicator of dead code
668    if !graph.callers.contains_key(func) {
669        confidence += 0.2;
670    }
671
672    // Calls other dead functions - if callees are dead, caller is likely dead too
673    if let Some(callees) = graph.callees.get(func) {
674        let dead_callees = callees.iter().filter(|c| !reachable.contains(*c)).count();
675        if dead_callees > 0 {
676            // Scale bonus: +0.1 per dead callee, capped at +0.3
677            confidence += 0.1 * (dead_callees.min(3) as f64);
678        }
679    }
680
681    // Private naming convention (single underscore prefix, not dunder)
682    // Private functions are more likely to be dead if unreachable
683    if name.starts_with('_') && !name.starts_with("__") {
684        confidence += 0.1;
685    }
686
687    // Not in commonly called module paths
688    if !is_in_common_module_path(&func.file) {
689        confidence += 0.1;
690    }
691
692    // === DECREASE confidence (less likely dead) ===
693
694    // Public naming convention (PascalCase) - may be called externally
695    if name.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) {
696        confidence -= 0.15;
697    }
698
699    // Very short name (likely utility or interface method)
700    if name.len() <= 3 {
701        confidence -= 0.1;
702    }
703
704    // Factory/builder pattern - often called via dependency injection
705    if is_likely_factory(name) {
706        confidence -= 0.15;
707    }
708
709    // In public module path - higher chance of external usage
710    if func.file.contains("/api/") || func.file.contains("/public/") {
711        confidence -= 0.2;
712    }
713
714    confidence.clamp(0.0, 1.0)
715}
716
717/// Determine why a function is considered dead.
718fn determine_dead_reason(
719    func: &FunctionRef,
720    graph: &CallGraph,
721    reachable: &HashSet<FunctionRef>,
722) -> DeadReason {
723    // Check if never called
724    if !graph.callers.contains_key(func) {
725        return DeadReason::NeverCalled;
726    }
727
728    // Check if only called by other dead functions
729    if let Some(callers) = graph.callers.get(func) {
730        let live_callers = callers.iter().filter(|c| reachable.contains(*c)).count();
731        if live_callers == 0 {
732            return DeadReason::CalledOnlyByDead;
733        }
734    }
735
736    DeadReason::Unreachable
737}
738
739#[cfg(test)]
740mod tests {
741    use super::*;
742    use crate::callgraph::types::CallEdge;
743
744    fn create_test_graph() -> CallGraph {
745        let mut graph = CallGraph::default();
746
747        // main -> helper -> utility
748        // orphan_func (not called)
749        // test_something (entry point)
750        // dead_island -> dead_helper (isolated)
751
752        let main_ref = FunctionRef {
753            file: "main.py".to_string(),
754            name: "main".to_string(),
755            qualified_name: Some("main.main".to_string()),
756        };
757        let helper_ref = FunctionRef {
758            file: "main.py".to_string(),
759            name: "helper".to_string(),
760            qualified_name: Some("main.helper".to_string()),
761        };
762        let utility_ref = FunctionRef {
763            file: "utils.py".to_string(),
764            name: "utility".to_string(),
765            qualified_name: Some("utils.utility".to_string()),
766        };
767        let orphan_ref = FunctionRef {
768            file: "orphan.py".to_string(),
769            name: "orphan_func".to_string(),
770            qualified_name: Some("orphan.orphan_func".to_string()),
771        };
772        let test_ref = FunctionRef {
773            file: "test_main.py".to_string(),
774            name: "test_something".to_string(),
775            qualified_name: Some("test_main.test_something".to_string()),
776        };
777        let dead_island_ref = FunctionRef {
778            file: "dead.py".to_string(),
779            name: "dead_island".to_string(),
780            qualified_name: Some("dead.dead_island".to_string()),
781        };
782        let dead_helper_ref = FunctionRef {
783            file: "dead.py".to_string(),
784            name: "dead_helper".to_string(),
785            qualified_name: Some("dead.dead_helper".to_string()),
786        };
787
788        graph.edges.push(CallEdge {
789            caller: main_ref.clone(),
790            callee: helper_ref.clone(),
791            call_line: 5,
792        });
793        graph.edges.push(CallEdge {
794            caller: helper_ref.clone(),
795            callee: utility_ref.clone(),
796            call_line: 10,
797        });
798        graph.edges.push(CallEdge {
799            caller: test_ref.clone(),
800            callee: helper_ref.clone(),
801            call_line: 3,
802        });
803        graph.edges.push(CallEdge {
804            caller: dead_island_ref.clone(),
805            callee: dead_helper_ref.clone(),
806            call_line: 2,
807        });
808        // orphan_ref has no edges (never called, calls nothing)
809
810        // Need to add orphan to the graph somehow - add a self-reference edge
811        // Actually, for testing, let's add it calling utility
812        graph.edges.push(CallEdge {
813            caller: orphan_ref.clone(),
814            callee: utility_ref.clone(),
815            call_line: 1,
816        });
817
818        graph.build_indexes();
819        graph
820    }
821
822    #[test]
823    fn test_detect_entry_points() {
824        let graph = create_test_graph();
825        let entry_points = detect_entry_points_with_config(&graph, &DeadCodeConfig::default());
826
827        // Should detect main and test_something as entry points
828        let names: Vec<_> = entry_points.iter().map(|f| f.name.as_str()).collect();
829        assert!(names.contains(&"main"));
830        assert!(names.contains(&"test_something"));
831    }
832
833    #[test]
834    fn test_classify_entry_point() {
835        assert_eq!(classify_entry_point("main"), Some(EntryPointKind::Main));
836        assert_eq!(classify_entry_point("test_foo"), Some(EntryPointKind::Test));
837        assert_eq!(
838            classify_entry_point("cmd_deploy"),
839            Some(EntryPointKind::CliHandler)
840        );
841        assert_eq!(
842            classify_entry_point("api_users"),
843            Some(EntryPointKind::ApiEndpoint)
844        );
845        assert_eq!(
846            classify_entry_point("__init__"),
847            Some(EntryPointKind::PythonDunder)
848        );
849        assert_eq!(
850            classify_entry_point("setup"),
851            Some(EntryPointKind::FrameworkHook)
852        );
853        assert_eq!(classify_entry_point("random_name"), None);
854    }
855
856    #[test]
857    fn test_classify_entry_point_pytest_hooks() {
858        // BUG-006 fix: pytest plugin hooks should be classified as FrameworkHook
859        // These are dynamically called by pytest and should not be reported as dead code
860
861        // Common pytest plugin hooks
862        assert_eq!(
863            classify_entry_point("pytest_configure"),
864            Some(EntryPointKind::FrameworkHook)
865        );
866        assert_eq!(
867            classify_entry_point("pytest_collection"),
868            Some(EntryPointKind::FrameworkHook)
869        );
870        assert_eq!(
871            classify_entry_point("pytest_runtest_setup"),
872            Some(EntryPointKind::FrameworkHook)
873        );
874        assert_eq!(
875            classify_entry_point("pytest_runtest_teardown"),
876            Some(EntryPointKind::FrameworkHook)
877        );
878        assert_eq!(
879            classify_entry_point("pytest_sessionstart"),
880            Some(EntryPointKind::FrameworkHook)
881        );
882        assert_eq!(
883            classify_entry_point("pytest_sessionfinish"),
884            Some(EntryPointKind::FrameworkHook)
885        );
886        assert_eq!(
887            classify_entry_point("pytest_addoption"),
888            Some(EntryPointKind::FrameworkHook)
889        );
890        assert_eq!(
891            classify_entry_point("pytest_collection_modifyitems"),
892            Some(EntryPointKind::FrameworkHook)
893        );
894        assert_eq!(
895            classify_entry_point("pytest_generate_tests"),
896            Some(EntryPointKind::FrameworkHook)
897        );
898
899        // Conftest hooks
900        assert_eq!(
901            classify_entry_point("conftest_setup"),
902            Some(EntryPointKind::FrameworkHook)
903        );
904        assert_eq!(
905            classify_entry_point("conftest_teardown"),
906            Some(EntryPointKind::FrameworkHook)
907        );
908
909        // Ensure other patterns are not affected
910        assert_eq!(classify_entry_point("test_pytest_works"), Some(EntryPointKind::Test));
911        assert_eq!(classify_entry_point("regular_function"), None);
912    }
913
914    #[test]
915    fn test_is_likely_callback() {
916        // Positive cases - should be detected as callbacks
917        assert!(is_likely_callback("on_click"));
918        assert!(is_likely_callback("on_submit"));
919        assert!(is_likely_callback("onClick"));
920        assert!(is_likely_callback("onSubmit"));
921        assert!(is_likely_callback("OnClick"));
922        assert!(is_likely_callback("OnSubmit"));
923        assert!(is_likely_callback("button_callback"));
924        assert!(is_likely_callback("MyHandler"));
925        assert!(is_likely_callback("handleClick"));
926        assert!(is_likely_callback("handleSubmit"));
927        assert!(is_likely_callback("handle_click"));
928        assert!(is_likely_callback("event_listener"));
929        assert!(is_likely_callback("EventListener"));
930        assert!(is_likely_callback("MyCallback"));
931
932        // Negative cases - should NOT be detected as callbacks
933        assert!(!is_likely_callback("process_data"));
934        assert!(!is_likely_callback("calculate"));
935    }
936
937    #[test]
938    fn test_callback_detection_not_too_broad() {
939        // BUG-001: These common English words should NOT be detected as callbacks
940        // They were false positives due to overly broad starts_with("on")
941        assert!(!is_likely_callback("once"));
942        assert!(!is_likely_callback("online"));
943        assert!(!is_likely_callback("only"));
944        assert!(!is_likely_callback("ongoing"));
945        assert!(!is_likely_callback("onward"));
946        assert!(!is_likely_callback("onset"));
947
948        // PascalCase variants should also not match
949        assert!(!is_likely_callback("Once"));
950        assert!(!is_likely_callback("Online"));
951        assert!(!is_likely_callback("Only"));
952        assert!(!is_likely_callback("Ongoing"));
953
954        // "handle" as part of other words should not match
955        // Note: "handler" does NOT end with "Handler" (uppercase H), so it correctly does not match
956        assert!(!is_likely_callback("handler"));
957        assert!(!is_likely_callback("handling"));
958        assert!(!is_likely_callback("handled"));
959
960        // Edge cases - too short
961        assert!(!is_likely_callback("on"));
962        assert!(!is_likely_callback("On"));
963
964        // But valid callbacks should still work
965        assert!(is_likely_callback("onClick"));
966        assert!(is_likely_callback("on_click"));
967        assert!(is_likely_callback("OnClick"));
968        assert!(is_likely_callback("handleClick"));
969        assert!(is_likely_callback("handle_click"));
970    }
971
972    #[test]
973    fn test_is_likely_factory() {
974        assert!(is_likely_factory("create_user"));
975        assert!(is_likely_factory("make_config"));
976        assert!(is_likely_factory("build_query"));
977        assert!(is_likely_factory("UserFactory"));
978        assert!(!is_likely_factory("process_user"));
979    }
980
981    #[test]
982    fn test_is_likely_public_api() {
983        // Positive cases - should be detected as public API
984
985        // Getters - snake_case with underscore
986        assert!(is_likely_public_api("get_user"));
987        assert!(is_likely_public_api("get_value"));
988        assert!(is_likely_public_api("get_config"));
989
990        // Getters - camelCase
991        assert!(is_likely_public_api("getUser"));
992        assert!(is_likely_public_api("getValue"));
993        assert!(is_likely_public_api("getConfig"));
994
995        // Setters - snake_case with underscore
996        assert!(is_likely_public_api("set_user"));
997        assert!(is_likely_public_api("set_value"));
998        assert!(is_likely_public_api("set_config"));
999
1000        // Setters - camelCase
1001        assert!(is_likely_public_api("setUser"));
1002        assert!(is_likely_public_api("setValue"));
1003        assert!(is_likely_public_api("setConfig"));
1004
1005        // Boolean accessors
1006        assert!(is_likely_public_api("is_valid"));
1007        assert!(is_likely_public_api("has_data"));
1008        assert!(is_likely_public_api("can_proceed"));
1009        assert!(is_likely_public_api("should_retry"));
1010
1011        // Builder/converter patterns
1012        assert!(is_likely_public_api("with_timeout"));
1013        assert!(is_likely_public_api("from_bytes"));
1014        assert!(is_likely_public_api("to_string"));
1015        assert!(is_likely_public_api("as_ref"));
1016
1017        // PascalCase (public types/classes)
1018        assert!(is_likely_public_api("UserManager"));
1019        assert!(is_likely_public_api("Config"));
1020    }
1021
1022    #[test]
1023    fn test_public_api_detection_not_too_broad() {
1024        // BUG-002: These common English words should NOT be detected as public API
1025        // They were false positives due to overly broad starts_with("get")/starts_with("set")
1026
1027        // get* false positives
1028        assert!(!is_likely_public_api("gettext"));
1029        assert!(!is_likely_public_api("getter"));
1030        assert!(!is_likely_public_api("getaway"));
1031        assert!(!is_likely_public_api("getopt"));
1032        assert!(!is_likely_public_api("getenv")); // lowercase 'e' after "get"
1033
1034        // set* false positives
1035        assert!(!is_likely_public_api("settings"));
1036        assert!(!is_likely_public_api("settle"));
1037        assert!(!is_likely_public_api("setup"));
1038        assert!(!is_likely_public_api("setter"));
1039        assert!(!is_likely_public_api("setback"));
1040
1041        // Edge cases - too short
1042        assert!(!is_likely_public_api("get"));
1043        assert!(!is_likely_public_api("set"));
1044
1045        // Private functions (underscore prefix)
1046        assert!(!is_likely_public_api("_get_value"));
1047        assert!(!is_likely_public_api("_set_value"));
1048        assert!(!is_likely_public_api("_private"));
1049
1050        // Regular function names (not public API patterns)
1051        assert!(!is_likely_public_api("process_data"));
1052        assert!(!is_likely_public_api("calculate"));
1053        assert!(!is_likely_public_api("helper"));
1054    }
1055
1056    #[test]
1057    fn test_analyze_dead_code() {
1058        let graph = create_test_graph();
1059        let result = analyze_dead_code(&graph);
1060
1061        // dead_island and dead_helper should be detected as dead
1062        // orphan_func might be detected depending on heuristics
1063        assert!(result.total_dead > 0);
1064
1065        // Check stats
1066        assert!(result.stats.entry_point_count > 0);
1067        assert!(result.stats.reachable_count > 0);
1068    }
1069
1070    #[test]
1071    fn test_compute_reachability() {
1072        let graph = create_test_graph();
1073        let entry_points = detect_entry_points_with_config(&graph, &DeadCodeConfig::default());
1074        let reachable = compute_reachability(&graph, &entry_points);
1075
1076        // main, helper, utility should be reachable
1077        // test_something should be reachable (it's an entry point)
1078        assert!(reachable.iter().any(|f| f.name == "main"));
1079        assert!(reachable.iter().any(|f| f.name == "helper"));
1080        assert!(reachable.iter().any(|f| f.name == "utility"));
1081        assert!(reachable.iter().any(|f| f.name == "test_something"));
1082
1083        // dead_island should NOT be reachable
1084        assert!(!reachable.iter().any(|f| f.name == "dead_island"));
1085    }
1086
1087    #[test]
1088    fn test_check_false_positive() {
1089        let config = DeadCodeConfig::default();
1090
1091        let callback_func = FunctionRef {
1092            file: "test.py".to_string(),
1093            name: "on_click".to_string(),
1094            qualified_name: None,
1095        };
1096        let (is_fp, reason) = check_false_positive(&callback_func, &config);
1097        assert!(is_fp);
1098        assert_eq!(reason, "callback");
1099
1100        let normal_func = FunctionRef {
1101            file: "test.py".to_string(),
1102            name: "process_data".to_string(),
1103            qualified_name: None,
1104        };
1105        let (is_fp, _) = check_false_positive(&normal_func, &config);
1106        assert!(!is_fp);
1107    }
1108
1109    #[test]
1110    fn test_check_false_positive_no_redundant_handler_detection() {
1111        // BUG-007 fix: check_false_positive() should NOT have redundant on_/On checks
1112        // that bypass is_likely_callback()'s strict uppercase requirements.
1113        // Words like "Ongoing", "Online", "Once" should NOT be false positives.
1114        let config = DeadCodeConfig::default();
1115
1116        // These common words should NOT be marked as handlers/callbacks
1117        // because they don't follow the callback pattern (third char must be uppercase)
1118        let false_positive_cases = ["Ongoing", "Online", "Once", "Onward", "Onset"];
1119        for name in false_positive_cases {
1120            let func = FunctionRef {
1121                file: "test.py".to_string(),
1122                name: name.to_string(),
1123                qualified_name: None,
1124            };
1125            let (is_fp, reason) = check_false_positive(&func, &config);
1126            assert!(
1127                !is_fp,
1128                "{} should NOT be a false positive, but got reason: {}",
1129                name,
1130                reason
1131            );
1132        }
1133
1134        // Proper callbacks with uppercase third char SHOULD still be detected
1135        let valid_callbacks = ["OnClick", "OnSubmit", "OnChange", "on_click", "onClick"];
1136        for name in valid_callbacks {
1137            let func = FunctionRef {
1138                file: "test.py".to_string(),
1139                name: name.to_string(),
1140                qualified_name: None,
1141            };
1142            let (is_fp, reason) = check_false_positive(&func, &config);
1143            assert!(
1144                is_fp && reason == "callback",
1145                "{} should be detected as callback, but got: is_fp={}, reason={}",
1146                name,
1147                is_fp,
1148                reason
1149            );
1150        }
1151
1152        // Event patterns (not covered by is_likely_callback) should still be detected
1153        let event_handlers = ["user_event", "MouseEvent", "handle_event", "KeyboardEvent"];
1154        for name in event_handlers {
1155            let func = FunctionRef {
1156                file: "test.py".to_string(),
1157                name: name.to_string(),
1158                qualified_name: None,
1159            };
1160            let (is_fp, _) = check_false_positive(&func, &config);
1161            assert!(
1162                is_fp,
1163                "{} should be detected as false positive (event handler)",
1164                name
1165            );
1166        }
1167    }
1168
1169    #[test]
1170    fn test_dead_reason_classification() {
1171        let mut graph = CallGraph::default();
1172
1173        let caller = FunctionRef {
1174            file: "a.py".to_string(),
1175            name: "caller".to_string(),
1176            qualified_name: None,
1177        };
1178        let callee = FunctionRef {
1179            file: "a.py".to_string(),
1180            name: "callee".to_string(),
1181            qualified_name: None,
1182        };
1183        let orphan = FunctionRef {
1184            file: "a.py".to_string(),
1185            name: "orphan".to_string(),
1186            qualified_name: None,
1187        };
1188
1189        graph.edges.push(CallEdge {
1190            caller: caller.clone(),
1191            callee: callee.clone(),
1192            call_line: 1,
1193        });
1194        graph.build_indexes();
1195
1196        let mut reachable = HashSet::new();
1197        reachable.insert(caller.clone());
1198
1199        // orphan is never called
1200        let reason = determine_dead_reason(&orphan, &graph, &reachable);
1201        assert_eq!(reason, DeadReason::NeverCalled);
1202
1203        // callee is called only by reachable caller, so actually not dead
1204        // But if we make caller not reachable...
1205        let empty_reachable = HashSet::new();
1206        let reason = determine_dead_reason(&callee, &graph, &empty_reachable);
1207        assert_eq!(reason, DeadReason::CalledOnlyByDead);
1208    }
1209
1210    #[test]
1211    fn test_config_min_confidence() {
1212        let graph = create_test_graph();
1213
1214        // With high confidence threshold, should filter more
1215        let config = DeadCodeConfig {
1216            min_confidence: 0.99,
1217            ..Default::default()
1218        };
1219        let result = analyze_dead_code_with_config(&graph, &config);
1220
1221        // With lower threshold, should report more
1222        let config_low = DeadCodeConfig {
1223            min_confidence: 0.1,
1224            ..Default::default()
1225        };
1226        let result_low = analyze_dead_code_with_config(&graph, &config_low);
1227
1228        // Lower threshold should find same or more dead code
1229        assert!(result_low.total_dead >= result.total_dead);
1230    }
1231
1232    #[test]
1233    fn test_user_defined_filter_patterns() {
1234        let graph = create_test_graph();
1235
1236        // Add custom filter pattern
1237        let config = DeadCodeConfig {
1238            filter_patterns: vec!["orphan".to_string()],
1239            ..Default::default()
1240        };
1241
1242        let result = analyze_dead_code_with_config(&graph, &config);
1243
1244        // orphan_func should be filtered out
1245        assert!(!result
1246            .dead_functions
1247            .iter()
1248            .any(|f| f.name.contains("orphan")));
1249    }
1250
1251    #[test]
1252    fn test_include_public_api_patterns_opt_in() {
1253        // BUG-003 fix verification: is_likely_public_api() is too permissive
1254        // and should be opt-in via config.include_public_api_patterns
1255
1256        let default_config = DeadCodeConfig::default();
1257        assert!(!default_config.include_public_api_patterns); // Should be false by default
1258
1259        // PascalCase functions should NOT be entry points with default config
1260        // (Note: these don't start with Create/Make/Build/New which would be factories)
1261        assert!(!is_likely_entry_point("UserManager", Some(&default_config)));
1262        assert!(!is_likely_entry_point("Config", Some(&default_config)));
1263        assert!(!is_likely_entry_point("DatabaseConnection", Some(&default_config)));
1264
1265        // camelCase getters/setters should NOT be entry points with default config
1266        // (Note: get_/set_ with underscore ARE entry points via classify_entry_point
1267        // as API endpoints, so we test camelCase versions like getUser, setValue)
1268        assert!(!is_likely_entry_point("getUser", Some(&default_config)));
1269        assert!(!is_likely_entry_point("setValue", Some(&default_config)));
1270        assert!(!is_likely_entry_point("getData", Some(&default_config)));
1271        assert!(!is_likely_entry_point("setConfig", Some(&default_config)));
1272
1273        // Boolean accessors should NOT be entry points with default config
1274        // (is_, has_, can_ are in is_likely_public_api but NOT in classify_entry_point)
1275        assert!(!is_likely_entry_point("is_valid", Some(&default_config)));
1276        assert!(!is_likely_entry_point("has_data", Some(&default_config)));
1277        assert!(!is_likely_entry_point("can_proceed", Some(&default_config)));
1278
1279        // Builder/converter patterns should NOT be entry points with default config
1280        // (from_, to_, with_, as_ are in is_likely_public_api but NOT classify_entry_point)
1281        assert!(!is_likely_entry_point("from_bytes", Some(&default_config)));
1282        assert!(!is_likely_entry_point("to_string", Some(&default_config)));
1283        assert!(!is_likely_entry_point("with_timeout", Some(&default_config)));
1284        assert!(!is_likely_entry_point("as_ref", Some(&default_config)));
1285
1286        // But they SHOULD be entry points when include_public_api_patterns is enabled
1287        let permissive_config = DeadCodeConfig {
1288            include_public_api_patterns: true,
1289            ..Default::default()
1290        };
1291
1292        assert!(is_likely_entry_point("UserManager", Some(&permissive_config)));
1293        assert!(is_likely_entry_point("getUser", Some(&permissive_config)));
1294        assert!(is_likely_entry_point("is_valid", Some(&permissive_config)));
1295        assert!(is_likely_entry_point("from_bytes", Some(&permissive_config)));
1296        assert!(is_likely_entry_point("to_string", Some(&permissive_config)));
1297
1298        // Core entry points should ALWAYS work regardless of config
1299        assert!(is_likely_entry_point("main", Some(&default_config)));
1300        assert!(is_likely_entry_point("test_something", Some(&default_config)));
1301        assert!(is_likely_entry_point("onClick", Some(&default_config))); // callback
1302        assert!(is_likely_entry_point("create_user", Some(&default_config))); // factory (create_ prefix)
1303        assert!(is_likely_entry_point("get_user", Some(&default_config))); // API endpoint (get_ prefix)
1304    }
1305
1306    #[test]
1307    fn test_extra_entry_patterns() {
1308        // Test that extra_entry_patterns config works correctly
1309
1310        let config = DeadCodeConfig {
1311            extra_entry_patterns: vec!["plugin_".to_string(), "hook_".to_string()],
1312            ..Default::default()
1313        };
1314
1315        // Custom patterns should match
1316        assert!(is_likely_entry_point("plugin_load", Some(&config)));
1317        assert!(is_likely_entry_point("plugin_unload", Some(&config)));
1318        assert!(is_likely_entry_point("hook_before", Some(&config)));
1319        assert!(is_likely_entry_point("hook_after", Some(&config)));
1320
1321        // Without config, these should NOT match
1322        assert!(!is_likely_entry_point("plugin_load", Some(&DeadCodeConfig::default())));
1323        assert!(!is_likely_entry_point("hook_before", Some(&DeadCodeConfig::default())));
1324
1325        // Regular functions should still not match
1326        assert!(!is_likely_entry_point("process_data", Some(&config)));
1327        assert!(!is_likely_entry_point("helper", Some(&config)));
1328    }
1329
1330    #[test]
1331    fn test_balanced_confidence_scoring() {
1332        // BUG-008 fix verification: Confidence scoring should be balanced
1333        // - Start at 0.5 (neutral), not 1.0
1334        // - Short utility functions should NOT get high confidence
1335        // - Multiple factors should contribute symmetrically
1336
1337        let mut graph = CallGraph::default();
1338
1339        // Create test functions with different characteristics
1340        let short_func = FunctionRef {
1341            file: "utils.py".to_string(),
1342            name: "x".to_string(), // Very short name (1 char)
1343            qualified_name: None,
1344        };
1345
1346        let pascal_case_func = FunctionRef {
1347            file: "models.py".to_string(),
1348            name: "UserManager".to_string(), // PascalCase
1349            qualified_name: None,
1350        };
1351
1352        let private_func = FunctionRef {
1353            file: "internal.py".to_string(),
1354            name: "_helper".to_string(), // Private naming
1355            qualified_name: None,
1356        };
1357
1358        let api_func = FunctionRef {
1359            file: "src/api/routes.py".to_string(),
1360            name: "process_request".to_string(),
1361            qualified_name: None,
1362        };
1363
1364        let dead_caller = FunctionRef {
1365            file: "dead.py".to_string(),
1366            name: "dead_caller".to_string(),
1367            qualified_name: None,
1368        };
1369
1370        let dead_callee1 = FunctionRef {
1371            file: "dead.py".to_string(),
1372            name: "dead_callee1".to_string(),
1373            qualified_name: None,
1374        };
1375
1376        let dead_callee2 = FunctionRef {
1377            file: "dead.py".to_string(),
1378            name: "dead_callee2".to_string(),
1379            qualified_name: None,
1380        };
1381
1382        let dead_callee3 = FunctionRef {
1383            file: "dead.py".to_string(),
1384            name: "dead_callee3".to_string(),
1385            qualified_name: None,
1386        };
1387
1388        let factory_func = FunctionRef {
1389            file: "factories.py".to_string(),
1390            name: "create_user".to_string(), // Factory pattern
1391            qualified_name: None,
1392        };
1393
1394        // Add edges for dead_caller calling multiple dead functions
1395        graph.edges.push(CallEdge {
1396            caller: dead_caller.clone(),
1397            callee: dead_callee1.clone(),
1398            call_line: 1,
1399        });
1400        graph.edges.push(CallEdge {
1401            caller: dead_caller.clone(),
1402            callee: dead_callee2.clone(),
1403            call_line: 2,
1404        });
1405        graph.edges.push(CallEdge {
1406            caller: dead_caller.clone(),
1407            callee: dead_callee3.clone(),
1408            call_line: 3,
1409        });
1410
1411        graph.build_indexes();
1412
1413        // Empty reachable set (all functions are "dead" for testing purposes)
1414        let reachable = HashSet::new();
1415
1416        // Test 1: Short function should NOT get high confidence (was 0.8 with old code)
1417        // With balanced scoring: 0.5 (base) + 0.2 (no callers) + 0.1 (not in common path) - 0.1 (short) = 0.7
1418        let short_conf = compute_confidence(&short_func, &graph, &reachable);
1419        assert!(
1420            short_conf < 0.8,
1421            "Short function 'x' should have confidence < 0.8, got {}",
1422            short_conf
1423        );
1424        assert!(
1425            short_conf >= 0.6 && short_conf <= 0.75,
1426            "Short function 'x' should have balanced confidence around 0.7, got {}",
1427            short_conf
1428        );
1429
1430        // Test 2: PascalCase function should have lower confidence
1431        // 0.5 (base) + 0.2 (no callers) + 0.1 (not in common path) - 0.15 (PascalCase) = 0.65
1432        let pascal_conf = compute_confidence(&pascal_case_func, &graph, &reachable);
1433        assert!(
1434            pascal_conf < short_conf,
1435            "PascalCase function should have lower confidence than short function"
1436        );
1437
1438        // Test 3: Private function should have higher confidence
1439        // 0.5 (base) + 0.2 (no callers) + 0.1 (not in common path) + 0.1 (private) = 0.9
1440        let private_conf = compute_confidence(&private_func, &graph, &reachable);
1441        assert!(
1442            private_conf > short_conf,
1443            "Private function should have higher confidence than short function"
1444        );
1445
1446        // Test 4: Function in /api/ should have lower confidence
1447        // 0.5 (base) + 0.2 (no callers) - 0.2 (in /api/) = 0.5
1448        // Note: is_in_common_module_path returns true, so no +0.1 for that
1449        let api_conf = compute_confidence(&api_func, &graph, &reachable);
1450        assert!(
1451            api_conf <= 0.55,
1452            "Function in /api/ path should have confidence around 0.5, got {}",
1453            api_conf
1454        );
1455
1456        // Test 5: Function calling multiple dead functions should have high confidence
1457        // 0.5 (base) + 0.1 (not in common path) + 0.3 (3 dead callees, capped) = 0.9
1458        // Note: has callers/callees so no +0.2 for no callers
1459        let dead_caller_conf = compute_confidence(&dead_caller, &graph, &reachable);
1460        assert!(
1461            dead_caller_conf >= 0.8,
1462            "Function calling 3 dead functions should have confidence >= 0.8, got {}",
1463            dead_caller_conf
1464        );
1465
1466        // Test 6: Factory function should have reduced confidence
1467        // 0.5 (base) + 0.2 (no callers) + 0.1 (not in common path) - 0.15 (factory) = 0.65
1468        let factory_conf = compute_confidence(&factory_func, &graph, &reachable);
1469        assert!(
1470            factory_conf < 0.7,
1471            "Factory function should have confidence < 0.7, got {}",
1472            factory_conf
1473        );
1474
1475        // Test 7: Verify base confidence is now 0.5 (neutral) instead of 1.0
1476        // A function with no special characteristics should be around 0.5 + 0.2 (no callers) = 0.7
1477        let neutral_func = FunctionRef {
1478            file: "src/lib/module.py".to_string(), // In /lib/ path
1479            name: "process".to_string(),           // Generic name, 7 chars
1480            qualified_name: None,
1481        };
1482        let neutral_conf = compute_confidence(&neutral_func, &graph, &reachable);
1483        // 0.5 (base) + 0.2 (no callers) = 0.7 (in common path, so no +0.1)
1484        assert!(
1485            neutral_conf >= 0.6 && neutral_conf <= 0.75,
1486            "Neutral function should have confidence around 0.7, got {}",
1487            neutral_conf
1488        );
1489    }
1490
1491    #[test]
1492    fn test_is_in_common_module_path() {
1493        // Test the helper function for module path detection
1494        assert!(is_in_common_module_path("/project/api/routes.py"));
1495        assert!(is_in_common_module_path("src/public/index.html"));
1496        assert!(is_in_common_module_path("/app/lib/utils.py"));
1497        assert!(is_in_common_module_path("src/handlers/user.py"));
1498        assert!(is_in_common_module_path("/project/routes/auth.ts"));
1499        assert!(is_in_common_module_path("app/controllers/main.rb"));
1500        assert!(is_in_common_module_path("src/endpoints/v1.py"));
1501        assert!(is_in_common_module_path("/app/views/home.py"));
1502        assert!(is_in_common_module_path("backend/services/auth.py"));
1503        assert!(is_in_common_module_path("cli/commands/deploy.py"));
1504
1505        // Non-common paths should return false
1506        assert!(!is_in_common_module_path("src/utils/helper.py"));
1507        assert!(!is_in_common_module_path("internal/processor.py"));
1508        assert!(!is_in_common_module_path("core/database.py"));
1509        assert!(!is_in_common_module_path("models/user.py"));
1510        assert!(!is_in_common_module_path("tests/test_main.py"));
1511    }
1512}