Skip to main content

perl_parser/dead_code/
mod.rs

1//! Dead code detection for Perl codebases (stub implementation)
2//!
3//! This module identifies unused code including unreachable code and unused symbols.
4//! Currently a stub implementation to demonstrate the architecture.
5
6use perl_workspace::workspace_index::{SymbolKind, WorkspaceIndex, fs_path_to_uri, uri_to_fs_path};
7use serde::{Deserialize, Serialize};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10
11/// Types of dead code detected during Perl script analysis
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum DeadCodeType {
14    /// Subroutine defined but never called
15    UnusedSubroutine,
16    /// Variable declared but never used
17    UnusedVariable,
18    /// Constant defined but never referenced
19    UnusedConstant,
20    /// Package declared but never used
21    UnusedPackage,
22    /// Code that can never be executed
23    UnreachableCode,
24    /// Conditional branch that is never taken
25    DeadBranch,
26    /// Module imported but never used
27    UnusedImport,
28    /// Function exported but never used externally
29    UnusedExport,
30}
31
32/// A piece of dead code detected during analysis
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct DeadCode {
35    /// Type of dead code (subroutine, variable, etc.)
36    pub code_type: DeadCodeType,
37    /// Name of the dead code element if available
38    pub name: Option<String>,
39    /// File path where the dead code is located
40    pub file_path: PathBuf,
41    /// Starting line number (1-based)
42    pub start_line: usize,
43    /// Ending line number (1-based)
44    pub end_line: usize,
45    /// Human-readable explanation of why this is considered dead code
46    pub reason: String,
47    /// Confidence level (0.0-1.0) in the detection accuracy
48    pub confidence: f32,
49    /// Optional suggestion for fixing the dead code
50    pub suggestion: Option<String>,
51}
52
53/// Dead code analysis result for a Perl workspace
54#[derive(Debug, Serialize, Deserialize)]
55pub struct DeadCodeAnalysis {
56    /// List of all dead code instances found
57    pub dead_code: Vec<DeadCode>,
58    /// Statistical summary of dead code analysis
59    pub stats: DeadCodeStats,
60    /// Number of files analyzed in the workspace
61    pub files_analyzed: usize,
62    /// Total lines of code analyzed
63    pub total_lines: usize,
64}
65
66/// Statistical summary of dead code analysis results
67#[derive(Debug, Default, Serialize, Deserialize)]
68pub struct DeadCodeStats {
69    /// Number of unused subroutines detected
70    pub unused_subroutines: usize,
71    /// Number of unused variables detected
72    pub unused_variables: usize,
73    /// Number of unused constants detected
74    pub unused_constants: usize,
75    /// Number of unused packages detected
76    pub unused_packages: usize,
77    /// Number of unreachable code statements
78    pub unreachable_statements: usize,
79    /// Number of dead conditional branches
80    pub dead_branches: usize,
81    /// Total lines of dead code identified
82    pub total_dead_lines: usize,
83}
84
85/// Dead code detector
86pub struct DeadCodeDetector {
87    workspace_index: WorkspaceIndex,
88    entry_points: HashSet<PathBuf>,
89}
90
91impl DeadCodeDetector {
92    /// Create a new dead code detector with the given workspace index
93    ///
94    /// # Arguments
95    /// * `workspace_index` - Indexed workspace containing symbol definitions and references
96    pub fn new(workspace_index: WorkspaceIndex) -> Self {
97        Self { workspace_index, entry_points: HashSet::new() }
98    }
99
100    /// Add an entry point (main script)
101    pub fn add_entry_point(&mut self, path: PathBuf) {
102        self.entry_points.insert(path);
103    }
104
105    /// Analyze a single file for dead code
106    pub fn analyze_file(&self, file_path: &Path) -> Result<Vec<DeadCode>, String> {
107        let uri = fs_path_to_uri(file_path).map_err(|e| e.to_string())?;
108        let text = self
109            .workspace_index
110            .document_store()
111            .get_text(&uri)
112            .ok_or_else(|| "file not indexed".to_string())?;
113
114        let mut dead = Vec::new();
115        let mut terminator: Option<(usize, String)> = None;
116
117        for (i, line) in text.lines().enumerate() {
118            let trimmed = line.trim();
119            if let Some((term_line, term_kw)) = &terminator {
120                if !trimmed.is_empty() {
121                    dead.push(DeadCode {
122                        code_type: DeadCodeType::UnreachableCode,
123                        name: None,
124                        file_path: file_path.to_path_buf(),
125                        start_line: i + 1,
126                        end_line: i + 1,
127                        reason: format!(
128                            "Code is unreachable after `{}` on line {}",
129                            term_kw, term_line
130                        ),
131                        confidence: 0.5,
132                        suggestion: Some("Remove or restructure this code".to_string()),
133                    });
134                    break;
135                }
136            }
137
138            if ["return", "die", "exit"].iter().any(|kw| trimmed.starts_with(kw)) {
139                if let Some(first_word) = trimmed.split_whitespace().next() {
140                    terminator = Some((i + 1, first_word.to_string()));
141                }
142            }
143        }
144
145        // Dead branch detection: scan for constant-condition patterns.
146        detect_dead_branches(file_path, &text, &mut dead);
147
148        Ok(dead)
149    }
150
151    /// Analyze entire workspace for dead code
152    pub fn analyze_workspace(&self) -> DeadCodeAnalysis {
153        let docs = self.workspace_index.document_store().all_documents();
154        let mut dead_code = Vec::new();
155        let mut total_lines = 0;
156
157        // Per-file unreachable code
158        for doc in &docs {
159            total_lines += doc.text.lines().count();
160            if let Some(path) = uri_to_fs_path(&doc.uri) {
161                if let Ok(mut file_dead) = self.analyze_file(&path) {
162                    dead_code.append(&mut file_dead);
163                }
164            }
165        }
166
167        // Unused symbols across workspace
168        for sym in self.workspace_index.find_unused_symbols() {
169            let code_type = match sym.kind {
170                SymbolKind::Subroutine => DeadCodeType::UnusedSubroutine,
171                SymbolKind::Variable(_) => DeadCodeType::UnusedVariable,
172                SymbolKind::Constant => DeadCodeType::UnusedConstant,
173                SymbolKind::Package => DeadCodeType::UnusedPackage,
174                _ => continue,
175            };
176
177            let file_path = uri_to_fs_path(&sym.uri).unwrap_or_else(|| PathBuf::from(&sym.uri));
178
179            dead_code.push(DeadCode {
180                code_type,
181                name: Some(sym.name.clone()),
182                file_path,
183                start_line: sym.range.start.line as usize + 1,
184                end_line: sym.range.end.line as usize + 1,
185                reason: "Symbol is never used".to_string(),
186                confidence: 0.9,
187                suggestion: Some("Remove or use this symbol".to_string()),
188            });
189        }
190
191        // Compute stats
192        let mut stats = DeadCodeStats::default();
193        for item in &dead_code {
194            let lines = item.end_line.saturating_sub(item.start_line) + 1;
195            stats.total_dead_lines += lines;
196            match item.code_type {
197                DeadCodeType::UnusedSubroutine => stats.unused_subroutines += 1,
198                DeadCodeType::UnusedVariable => stats.unused_variables += 1,
199                DeadCodeType::UnusedConstant => stats.unused_constants += 1,
200                DeadCodeType::UnusedPackage => stats.unused_packages += 1,
201                DeadCodeType::UnreachableCode => stats.unreachable_statements += 1,
202                DeadCodeType::DeadBranch => stats.dead_branches += 1,
203                _ => {}
204            }
205        }
206
207        DeadCodeAnalysis { dead_code, stats, files_analyzed: docs.len(), total_lines }
208    }
209}
210
211/// Returns `true` if `condition` is a trivially-false constant expression.
212///
213/// Matches: `0`, `""`, `''`, `undef`, `(0)`, `( 0 )` — the standard Perl idioms
214/// used to write permanently-dead `if`/`while`/`for` blocks.
215fn is_always_false(condition: &str) -> bool {
216    let c = condition.trim();
217    matches!(c, "0" | "\"\"" | "''" | "undef")
218        || (c.starts_with('(') && c.ends_with(')') && is_always_false(&c[1..c.len() - 1]))
219}
220
221/// Returns `true` if `condition` is a trivially-true constant expression.
222///
223/// Matches: `1`, `"1"`, `'1'`, any non-zero integer literal, `(1)` etc.
224fn is_always_true(condition: &str) -> bool {
225    let c = condition.trim();
226    // Non-zero integer literal
227    if c.parse::<i64>().is_ok_and(|n| n != 0) {
228        return true;
229    }
230    // Non-zero float literal
231    if c.parse::<f64>().is_ok_and(|n| n != 0.0) {
232        return true;
233    }
234    // Quoted non-empty string that is not "0"
235    if (c.starts_with('"') && c.ends_with('"') || c.starts_with('\'') && c.ends_with('\''))
236        && c.len() > 2
237    {
238        let inner = &c[1..c.len() - 1];
239        return inner != "0";
240    }
241    // Parenthesised
242    c.starts_with('(') && c.ends_with(')') && is_always_true(&c[1..c.len() - 1])
243}
244
245/// Scan `text` for constant-condition dead branches and append `DeadBranch`
246/// entries to `out`.
247///
248/// Detects:
249/// - `if (0) { ... }`  — body is never executed
250/// - `while (0) { ... }` — loop body is never executed
251/// - `unless (1) { ... }` — equivalent to `if (0)`
252/// - `until (1) { ... }` — equivalent to `while (0)`
253/// - `else` block following an always-true `if (1)` — dead else branch
254///
255/// Uses a simple brace-counting heuristic to locate the block extent.
256/// Only fires for single-line condition + opening brace patterns (the most
257/// common idiom); multi-line conditions are skipped to avoid false positives.
258fn detect_dead_branches(file_path: &Path, text: &str, out: &mut Vec<DeadCode>) {
259    let lines: Vec<&str> = text.lines().collect();
260    let n = lines.len();
261    let mut i = 0;
262
263    while i < n {
264        let trimmed = lines[i].trim();
265
266        // Determine if this line opens a dead branch.
267        // We look for: KEYWORD WHITESPACE? ( CONDITION ) WHITESPACE? {
268        let dead_reason_and_keyword: Option<(String, &str)> = 'detect: {
269            for kw in &["if", "while", "elsif", "unless", "until", "for", "foreach"] {
270                let rest = match trimmed.strip_prefix(kw) {
271                    Some(r)
272                        if r.is_empty()
273                            || r.starts_with(|c: char| c.is_whitespace() || c == '(') =>
274                    {
275                        r.trim_start()
276                    }
277                    _ => continue,
278                };
279                // Extract balanced parentheses for the condition.
280                if !rest.starts_with('(') {
281                    continue;
282                }
283                let condition = extract_balanced_parens(rest);
284                let condition = match condition {
285                    Some(c) => c,
286                    None => continue,
287                };
288                let after_cond = rest[condition.len() + 2..].trim(); // skip '(' ... ')'
289                // Only fire if opening brace is on the same line.
290                if !after_cond.starts_with('{') && !after_cond.is_empty() {
291                    continue;
292                }
293                let inner = condition.trim();
294
295                let reason = if matches!(*kw, "unless" | "until") {
296                    // unless/until: body is dead when condition is always-true
297                    if is_always_true(inner) {
298                        Some(format!(
299                            "`{kw}` condition `{inner}` is always true — block is never executed"
300                        ))
301                    } else {
302                        None
303                    }
304                } else {
305                    // if/while/for/foreach: body is dead when condition is always-false
306                    if is_always_false(inner) {
307                        Some(format!(
308                            "`{kw}` condition `{inner}` is always false — block is never executed"
309                        ))
310                    } else {
311                        None
312                    }
313                };
314
315                if let Some(r) = reason {
316                    break 'detect Some((r, *kw));
317                }
318            }
319
320            // Also check `else` block following always-true `if`.
321            // We handle this by looking back at the previously emitted entry
322            // or by a simple heuristic: `} else {` on its own line after an
323            // always-true if that we tracked.  This is deferred to a follow-up;
324            // for now focus on always-false/always-true keyword conditions.
325            None
326        };
327
328        if let Some((reason, _kw)) = dead_reason_and_keyword {
329            // Find the closing brace of this block by counting brace depth.
330            let block_start = i + 1; // 1-based
331            let end_line = find_block_end(&lines, i);
332            out.push(DeadCode {
333                code_type: DeadCodeType::DeadBranch,
334                name: None,
335                file_path: file_path.to_path_buf(),
336                start_line: block_start,
337                end_line,
338                reason,
339                confidence: 0.9,
340                suggestion: Some("Remove this dead branch or fix the condition".to_string()),
341            });
342            // Skip to after the block to avoid nested false positives.
343            i = end_line;
344            continue;
345        }
346
347        i += 1;
348    }
349}
350
351/// Extract the content of the first balanced `(...)` starting at the
352/// beginning of `s`.  Returns the inner content (without the outer parens),
353/// or `None` if the parens are unbalanced or `s` doesn't start with `(`.
354fn extract_balanced_parens(s: &str) -> Option<&str> {
355    if !s.starts_with('(') {
356        return None;
357    }
358    let mut depth = 0usize;
359    for (idx, ch) in s.char_indices() {
360        match ch {
361            '(' => depth += 1,
362            ')' => {
363                depth -= 1;
364                if depth == 0 {
365                    return Some(&s[1..idx]);
366                }
367            }
368            _ => {}
369        }
370    }
371    None
372}
373
374/// Find the 1-based line number of the closing `}` for the block that opens
375/// on line `open_line` (0-based index).  Uses simple brace counting.
376/// Returns `open_line + 1` (1-based same line) if the block closes on the
377/// same line, or the last line of the file if braces are unbalanced.
378fn find_block_end(lines: &[&str], open_line: usize) -> usize {
379    let mut depth = 0i32;
380    for (i, line) in lines.iter().enumerate().skip(open_line) {
381        for ch in line.chars() {
382            match ch {
383                '{' => depth += 1,
384                '}' => {
385                    depth -= 1;
386                    if depth == 0 {
387                        return i + 1; // 1-based
388                    }
389                }
390                _ => {}
391            }
392        }
393    }
394    lines.len() // fallback: end of file
395}
396
397/// Generate a report from dead code analysis
398pub fn generate_report(analysis: &DeadCodeAnalysis) -> String {
399    let mut report = String::new();
400
401    report.push_str("=== Dead Code Analysis Report ===\n\n");
402
403    report.push_str(&format!("Files analyzed: {}\n", analysis.files_analyzed));
404    report.push_str(&format!("Total lines: {}\n", analysis.total_lines));
405    report.push_str(&format!("Dead code items: {}\n\n", analysis.dead_code.len()));
406
407    report.push_str("Statistics:\n");
408    report.push_str(&format!("  Unused subroutines: {}\n", analysis.stats.unused_subroutines));
409    report.push_str(&format!("  Unused variables: {}\n", analysis.stats.unused_variables));
410    report.push_str(&format!("  Unused constants: {}\n", analysis.stats.unused_constants));
411    report.push_str(&format!("  Unused packages: {}\n", analysis.stats.unused_packages));
412    report.push_str(&format!(
413        "  Unreachable statements: {}\n",
414        analysis.stats.unreachable_statements
415    ));
416    report.push_str(&format!("  Dead branches: {}\n", analysis.stats.dead_branches));
417    report.push_str(&format!("  Total dead lines: {}\n", analysis.stats.total_dead_lines));
418
419    report
420}