Skip to main content

perl_parser/dead_code/
mod.rs

1//! Dead code detection for Perl codebases (stub implementation)
2//!
3//! This module identifies unused code including unreachable code and unused symbols.
4//! Currently a stub implementation to demonstrate the architecture.
5
6use perl_workspace::workspace_index::{SymbolKind, WorkspaceIndex, fs_path_to_uri, uri_to_fs_path};
7use serde::{Deserialize, Serialize};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10
11/// Types of dead code detected during Perl script analysis
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum DeadCodeType {
14    /// Subroutine defined but never called
15    UnusedSubroutine,
16    /// Variable declared but never used
17    UnusedVariable,
18    /// Constant defined but never referenced
19    UnusedConstant,
20    /// Package declared but never used
21    UnusedPackage,
22    /// Code that can never be executed
23    UnreachableCode,
24    /// Conditional branch that is never taken
25    DeadBranch,
26    /// Module imported but never used
27    UnusedImport,
28    /// Function exported but never used externally
29    UnusedExport,
30}
31
32/// A piece of dead code detected during analysis
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct DeadCode {
35    /// Type of dead code (subroutine, variable, etc.)
36    pub code_type: DeadCodeType,
37    /// Name of the dead code element if available
38    pub name: Option<String>,
39    /// File path where the dead code is located
40    pub file_path: PathBuf,
41    /// Starting line number (1-based)
42    pub start_line: usize,
43    /// Ending line number (1-based)
44    pub end_line: usize,
45    /// Human-readable explanation of why this is considered dead code
46    pub reason: String,
47    /// Confidence level (0.0-1.0) in the detection accuracy
48    pub confidence: f32,
49    /// Optional suggestion for fixing the dead code
50    pub suggestion: Option<String>,
51}
52
53/// Dead code analysis result for a Perl workspace
54#[derive(Debug, Serialize, Deserialize)]
55pub struct DeadCodeAnalysis {
56    /// List of all dead code instances found
57    pub dead_code: Vec<DeadCode>,
58    /// Statistical summary of dead code analysis
59    pub stats: DeadCodeStats,
60    /// Number of files analyzed in the workspace
61    pub files_analyzed: usize,
62    /// Total lines of code analyzed
63    pub total_lines: usize,
64}
65
66/// Statistical summary of dead code analysis results
67#[derive(Debug, Default, Serialize, Deserialize)]
68pub struct DeadCodeStats {
69    /// Number of unused subroutines detected
70    pub unused_subroutines: usize,
71    /// Number of unused variables detected
72    pub unused_variables: usize,
73    /// Number of unused constants detected
74    pub unused_constants: usize,
75    /// Number of unused packages detected
76    pub unused_packages: usize,
77    /// Number of unreachable code statements
78    pub unreachable_statements: usize,
79    /// Number of dead conditional branches
80    pub dead_branches: usize,
81    /// Total lines of dead code identified
82    pub total_dead_lines: usize,
83}
84
85/// Dead code detector
86pub struct DeadCodeDetector {
87    workspace_index: WorkspaceIndex,
88    entry_points: HashSet<PathBuf>,
89}
90
91impl DeadCodeDetector {
92    /// Create a new dead code detector with the given workspace index
93    ///
94    /// # Arguments
95    /// * `workspace_index` - Indexed workspace containing symbol definitions and references
96    pub fn new(workspace_index: WorkspaceIndex) -> Self {
97        Self { workspace_index, entry_points: HashSet::new() }
98    }
99
100    /// Add an entry point (main script)
101    pub fn add_entry_point(&mut self, path: PathBuf) {
102        self.entry_points.insert(path);
103    }
104
105    /// Analyze a single file for dead code
106    pub fn analyze_file(&self, file_path: &Path) -> Result<Vec<DeadCode>, String> {
107        let uri = fs_path_to_uri(file_path).map_err(|e| e.to_string())?;
108        let text = self
109            .workspace_index
110            .document_store()
111            .get_text(&uri)
112            .ok_or_else(|| "file not indexed".to_string())?;
113
114        let mut dead = Vec::new();
115        let mut block_depth = 0usize;
116        let mut terminator: Option<(usize, usize, String)> = None;
117
118        for (i, line) in text.lines().enumerate() {
119            let trimmed = line.trim();
120            let current_depth = block_depth;
121
122            if let Some((term_line, term_depth, term_kw)) = &terminator {
123                if current_depth < *term_depth {
124                    terminator = None;
125                } else if current_depth == *term_depth
126                    && !trimmed.is_empty()
127                    && !trimmed.starts_with('#')
128                    && !is_structural_line(trimmed)
129                {
130                    dead.push(DeadCode {
131                        code_type: DeadCodeType::UnreachableCode,
132                        name: None,
133                        file_path: file_path.to_path_buf(),
134                        start_line: i + 1,
135                        end_line: i + 1,
136                        reason: format!(
137                            "Code is unreachable after `{}` on line {}",
138                            term_kw, term_line
139                        ),
140                        confidence: 0.9,
141                        suggestion: Some("Remove or restructure this code".to_string()),
142                    });
143                    break;
144                }
145            }
146
147            if let Some(term_kw) = detect_unconditional_terminator(trimmed) {
148                terminator = Some((i + 1, current_depth, term_kw.to_string()));
149            }
150
151            block_depth += line.chars().filter(|&ch| ch == '{').count();
152            block_depth = block_depth.saturating_sub(line.chars().filter(|&ch| ch == '}').count());
153        }
154
155        // Dead branch detection: scan for constant-condition patterns.
156        detect_dead_branches(file_path, &text, &mut dead);
157
158        Ok(dead)
159    }
160
161    /// Analyze entire workspace for dead code
162    pub fn analyze_workspace(&self) -> DeadCodeAnalysis {
163        let docs = self.workspace_index.document_store().all_documents();
164        let mut dead_code = Vec::new();
165        let mut total_lines = 0;
166
167        // Per-file unreachable code
168        for doc in &docs {
169            total_lines += doc.text.lines().count();
170            if let Some(path) = uri_to_fs_path(&doc.uri) {
171                if let Ok(mut file_dead) = self.analyze_file(&path) {
172                    dead_code.append(&mut file_dead);
173                }
174            }
175        }
176
177        // Unused symbols across workspace
178        for sym in self.workspace_index.find_unused_symbols() {
179            let code_type = match sym.kind {
180                SymbolKind::Subroutine => DeadCodeType::UnusedSubroutine,
181                SymbolKind::Variable(_) => DeadCodeType::UnusedVariable,
182                SymbolKind::Constant => DeadCodeType::UnusedConstant,
183                SymbolKind::Package => DeadCodeType::UnusedPackage,
184                _ => continue,
185            };
186
187            let file_path = uri_to_fs_path(&sym.uri).unwrap_or_else(|| PathBuf::from(&sym.uri));
188
189            dead_code.push(DeadCode {
190                code_type,
191                name: Some(sym.name.clone()),
192                file_path,
193                start_line: sym.range.start.line as usize + 1,
194                end_line: sym.range.end.line as usize + 1,
195                reason: "Symbol is never used".to_string(),
196                confidence: 0.9,
197                suggestion: Some("Remove or use this symbol".to_string()),
198            });
199        }
200
201        // Compute stats
202        let mut stats = DeadCodeStats::default();
203        for item in &dead_code {
204            let lines = item.end_line.saturating_sub(item.start_line) + 1;
205            stats.total_dead_lines += lines;
206            match item.code_type {
207                DeadCodeType::UnusedSubroutine => stats.unused_subroutines += 1,
208                DeadCodeType::UnusedVariable => stats.unused_variables += 1,
209                DeadCodeType::UnusedConstant => stats.unused_constants += 1,
210                DeadCodeType::UnusedPackage => stats.unused_packages += 1,
211                DeadCodeType::UnreachableCode => stats.unreachable_statements += 1,
212                DeadCodeType::DeadBranch => stats.dead_branches += 1,
213                _ => {}
214            }
215        }
216
217        DeadCodeAnalysis { dead_code, stats, files_analyzed: docs.len(), total_lines }
218    }
219}
220
221fn is_structural_line(trimmed: &str) -> bool {
222    !trimmed.is_empty() && trimmed.chars().all(|ch| ch == '}' || ch == ';')
223}
224
225fn detect_unconditional_terminator(trimmed: &str) -> Option<&str> {
226    const TERMINATORS: [&str; 4] = ["return", "die", "exit", "CORE::exit"];
227
228    let first = trimmed
229        .split(|ch: char| ch.is_whitespace() || matches!(ch, ';' | '('))
230        .next()
231        .unwrap_or_default();
232    if !TERMINATORS.contains(&first) {
233        return None;
234    }
235
236    let after_terminator = &trimmed[first.len()..];
237    let remainder = match after_terminator.split_once('#') {
238        Some((before_comment, _)) => before_comment,
239        None => after_terminator,
240    }
241    .trim_start();
242    if contains_postfix_modifier(remainder) {
243        return None;
244    }
245
246    Some(first)
247}
248
249fn contains_postfix_modifier(remainder: &str) -> bool {
250    const POSTFIX_MODIFIERS: [&str; 7] =
251        ["if", "unless", "when", "while", "until", "for", "foreach"];
252    POSTFIX_MODIFIERS.iter().any(|keyword| contains_keyword(remainder, keyword))
253}
254
255fn contains_keyword(text: &str, keyword: &str) -> bool {
256    text.match_indices(keyword).any(|(idx, _)| {
257        let before = text[..idx].chars().next_back();
258        let after = text[idx + keyword.len()..].chars().next();
259        is_keyword_boundary(before) && is_keyword_boundary(after)
260    })
261}
262
263fn is_keyword_boundary(ch: Option<char>) -> bool {
264    ch.is_none_or(|ch| !ch.is_ascii_alphanumeric() && ch != '_')
265}
266
267/// Returns `true` if `condition` is a trivially-false constant expression.
268///
269/// Matches: `0`, `""`, `''`, `undef`, `(0)`, `( 0 )` — the standard Perl idioms
270/// used to write permanently-dead `if`/`while`/`elsif` blocks.
271///
272/// Note: `for`/`foreach` are intentionally **not** guarded by this function.
273/// `for (0) {}` iterates once with `$_ = 0`; it is a list iterator, not a
274/// boolean guard, so it is never dead code.
275fn is_always_false(condition: &str) -> bool {
276    let c = condition.trim();
277    matches!(c, "0" | "\"\"" | "''" | "undef")
278        || (c.starts_with('(') && c.ends_with(')') && is_always_false(&c[1..c.len() - 1]))
279}
280
281/// Returns `true` if `condition` is a trivially-true constant expression.
282///
283/// Matches: `1`, `"1"`, `'1'`, any non-zero integer literal, `(1)` etc.
284fn is_always_true(condition: &str) -> bool {
285    let c = condition.trim();
286    // Non-zero integer literal
287    if c.parse::<i64>().is_ok_and(|n| n != 0) {
288        return true;
289    }
290    // Non-zero float literal
291    if c.parse::<f64>().is_ok_and(|n| n != 0.0) {
292        return true;
293    }
294    // Quoted non-empty string that is not "0"
295    if (c.starts_with('"') && c.ends_with('"') || c.starts_with('\'') && c.ends_with('\''))
296        && c.len() > 2
297    {
298        let inner = &c[1..c.len() - 1];
299        return inner != "0";
300    }
301    // Parenthesised
302    c.starts_with('(') && c.ends_with(')') && is_always_true(&c[1..c.len() - 1])
303}
304
305/// Scan `text` for constant-condition dead branches and append `DeadBranch`
306/// entries to `out`.
307///
308/// Detects:
309/// - `if (0) { ... }`  — body is never executed
310/// - `while (0) { ... }` — loop body is never executed
311/// - `unless (1) { ... }` — equivalent to `if (0)`
312/// - `until (1) { ... }` — equivalent to `while (0)`
313/// - `else` block following an always-true `if (1)` — dead else branch
314///
315/// Uses a simple brace-counting heuristic to locate the block extent.
316/// Only fires for single-line condition + opening brace patterns (the most
317/// common idiom); multi-line conditions are skipped to avoid false positives.
318fn detect_dead_branches(file_path: &Path, text: &str, out: &mut Vec<DeadCode>) {
319    let lines: Vec<&str> = text.lines().collect();
320    let n = lines.len();
321    let mut i = 0;
322
323    while i < n {
324        let trimmed = lines[i].trim();
325
326        // Determine if this line opens a dead branch.
327        // We look for: KEYWORD WHITESPACE? ( CONDITION ) WHITESPACE? {
328        //
329        // `for` and `foreach` are intentionally excluded: they are list
330        // iterators in Perl, not boolean guards. `for (0) {}` executes once
331        // with $_ = 0; it is not dead code.
332        let dead_reason_and_keyword: Option<(String, &str)> = 'detect: {
333            for kw in &["if", "while", "elsif", "unless", "until"] {
334                let rest = match trimmed.strip_prefix(kw) {
335                    Some(r)
336                        if r.is_empty()
337                            || r.starts_with(|c: char| c.is_whitespace() || c == '(') =>
338                    {
339                        r.trim_start()
340                    }
341                    _ => continue,
342                };
343                // Extract balanced parentheses for the condition.
344                if !rest.starts_with('(') {
345                    continue;
346                }
347                let condition = extract_balanced_parens(rest);
348                let condition = match condition {
349                    Some(c) => c,
350                    None => continue,
351                };
352                let after_cond = rest[condition.len() + 2..].trim(); // skip '(' ... ')'
353                // Only fire if opening brace is on the same line.
354                if !after_cond.starts_with('{') && !after_cond.is_empty() {
355                    continue;
356                }
357                let inner = condition.trim();
358
359                let reason = if matches!(*kw, "unless" | "until") {
360                    // unless/until: body is dead when condition is always-true
361                    if is_always_true(inner) {
362                        Some(format!(
363                            "`{kw}` condition `{inner}` is always true — block is never executed"
364                        ))
365                    } else {
366                        None
367                    }
368                } else {
369                    // if/while/elsif: body is dead when condition is always-false
370                    if is_always_false(inner) {
371                        Some(format!(
372                            "`{kw}` condition `{inner}` is always false — block is never executed"
373                        ))
374                    } else {
375                        None
376                    }
377                };
378
379                if let Some(r) = reason {
380                    break 'detect Some((r, *kw));
381                }
382            }
383
384            // Also check `else` block following always-true `if`.
385            // We handle this by looking back at the previously emitted entry
386            // or by a simple heuristic: `} else {` on its own line after an
387            // always-true if that we tracked.  This is deferred to a follow-up;
388            // for now focus on always-false/always-true keyword conditions.
389            None
390        };
391
392        if let Some((reason, _kw)) = dead_reason_and_keyword {
393            // Find the closing brace of this block by counting brace depth.
394            let block_start = i + 1; // 1-based
395            let end_line = find_block_end(&lines, i);
396            out.push(DeadCode {
397                code_type: DeadCodeType::DeadBranch,
398                name: None,
399                file_path: file_path.to_path_buf(),
400                start_line: block_start,
401                end_line,
402                reason,
403                confidence: 0.9,
404                suggestion: Some("Remove this dead branch or fix the condition".to_string()),
405            });
406            // Skip to after the block to avoid nested false positives.
407            i = end_line;
408            continue;
409        }
410
411        i += 1;
412    }
413}
414
415/// Extract the content of the first balanced `(...)` starting at the
416/// beginning of `s`.  Returns the inner content (without the outer parens),
417/// or `None` if the parens are unbalanced or `s` doesn't start with `(`.
418fn extract_balanced_parens(s: &str) -> Option<&str> {
419    if !s.starts_with('(') {
420        return None;
421    }
422    let mut depth = 0usize;
423    for (idx, ch) in s.char_indices() {
424        match ch {
425            '(' => depth += 1,
426            ')' => {
427                depth -= 1;
428                if depth == 0 {
429                    return Some(&s[1..idx]);
430                }
431            }
432            _ => {}
433        }
434    }
435    None
436}
437
438/// Find the 1-based line number of the closing `}` for the block that opens
439/// on line `open_line` (0-based index).  Uses simple brace counting.
440/// Returns `open_line + 1` (1-based same line) if the block closes on the
441/// same line, or the last line of the file if braces are unbalanced.
442fn find_block_end(lines: &[&str], open_line: usize) -> usize {
443    let mut depth = 0i32;
444    for (i, line) in lines.iter().enumerate().skip(open_line) {
445        for ch in line.chars() {
446            match ch {
447                '{' => depth += 1,
448                '}' => {
449                    depth -= 1;
450                    if depth == 0 {
451                        return i + 1; // 1-based
452                    }
453                }
454                _ => {}
455            }
456        }
457    }
458    lines.len() // fallback: end of file
459}
460
461/// Generate a report from dead code analysis
462pub fn generate_report(analysis: &DeadCodeAnalysis) -> String {
463    let mut report = String::new();
464
465    report.push_str("=== Dead Code Analysis Report ===\n\n");
466
467    report.push_str(&format!("Files analyzed: {}\n", analysis.files_analyzed));
468    report.push_str(&format!("Total lines: {}\n", analysis.total_lines));
469    report.push_str(&format!("Dead code items: {}\n\n", analysis.dead_code.len()));
470
471    report.push_str("Statistics:\n");
472    report.push_str(&format!("  Unused subroutines: {}\n", analysis.stats.unused_subroutines));
473    report.push_str(&format!("  Unused variables: {}\n", analysis.stats.unused_variables));
474    report.push_str(&format!("  Unused constants: {}\n", analysis.stats.unused_constants));
475    report.push_str(&format!("  Unused packages: {}\n", analysis.stats.unused_packages));
476    report.push_str(&format!(
477        "  Unreachable statements: {}\n",
478        analysis.stats.unreachable_statements
479    ));
480    report.push_str(&format!("  Dead branches: {}\n", analysis.stats.dead_branches));
481    report.push_str(&format!("  Total dead lines: {}\n", analysis.stats.total_dead_lines));
482
483    report
484}