Skip to main content

perl_parser/dead_code/
mod.rs

1//! Dead code detection for Perl codebases (stub implementation)
2//!
3//! This module identifies unused code including unreachable code and unused symbols.
4//! Currently a stub implementation to demonstrate the architecture.
5
6use perl_workspace::workspace_index::{SymbolKind, WorkspaceIndex, fs_path_to_uri, uri_to_fs_path};
7use serde::{Deserialize, Serialize};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10
11/// Types of dead code detected during Perl script analysis
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum DeadCodeType {
14    /// Subroutine defined but never called
15    UnusedSubroutine,
16    /// Variable declared but never used
17    UnusedVariable,
18    /// Constant defined but never referenced
19    UnusedConstant,
20    /// Package declared but never used
21    UnusedPackage,
22    /// Code that can never be executed
23    UnreachableCode,
24    /// Conditional branch that is never taken
25    DeadBranch,
26    /// Module imported but never used
27    UnusedImport,
28    /// Function exported but never used externally
29    UnusedExport,
30}
31
32/// A piece of dead code detected during analysis
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct DeadCode {
35    /// Type of dead code (subroutine, variable, etc.)
36    pub code_type: DeadCodeType,
37    /// Name of the dead code element if available
38    pub name: Option<String>,
39    /// File path where the dead code is located
40    pub file_path: PathBuf,
41    /// Starting line number (1-based)
42    pub start_line: usize,
43    /// Ending line number (1-based)
44    pub end_line: usize,
45    /// Human-readable explanation of why this is considered dead code
46    pub reason: String,
47    /// Confidence level (0.0-1.0) in the detection accuracy
48    pub confidence: f32,
49    /// Optional suggestion for fixing the dead code
50    pub suggestion: Option<String>,
51}
52
53/// Dead code analysis result for a Perl workspace
54#[derive(Debug, Serialize, Deserialize)]
55pub struct DeadCodeAnalysis {
56    /// List of all dead code instances found
57    pub dead_code: Vec<DeadCode>,
58    /// Statistical summary of dead code analysis
59    pub stats: DeadCodeStats,
60    /// Number of files analyzed in the workspace
61    pub files_analyzed: usize,
62    /// Total lines of code analyzed
63    pub total_lines: usize,
64}
65
66/// Statistical summary of dead code analysis results
67#[derive(Debug, Default, Serialize, Deserialize)]
68pub struct DeadCodeStats {
69    /// Number of unused subroutines detected
70    pub unused_subroutines: usize,
71    /// Number of unused variables detected
72    pub unused_variables: usize,
73    /// Number of unused constants detected
74    pub unused_constants: usize,
75    /// Number of unused packages detected
76    pub unused_packages: usize,
77    /// Number of unreachable code statements
78    pub unreachable_statements: usize,
79    /// Number of dead conditional branches
80    pub dead_branches: usize,
81    /// Total lines of dead code identified
82    pub total_dead_lines: usize,
83}
84
85/// Dead code detector
86pub struct DeadCodeDetector {
87    workspace_index: WorkspaceIndex,
88    entry_points: HashSet<PathBuf>,
89}
90
91impl DeadCodeDetector {
92    /// Create a new dead code detector with the given workspace index
93    ///
94    /// # Arguments
95    /// * `workspace_index` - Indexed workspace containing symbol definitions and references
96    pub fn new(workspace_index: WorkspaceIndex) -> Self {
97        Self { workspace_index, entry_points: HashSet::new() }
98    }
99
100    /// Add an entry point (main script)
101    pub fn add_entry_point(&mut self, path: PathBuf) {
102        self.entry_points.insert(path);
103    }
104
105    /// Analyze a single file for dead code
106    pub fn analyze_file(&self, file_path: &Path) -> Result<Vec<DeadCode>, String> {
107        let uri = fs_path_to_uri(file_path).map_err(|e| e.to_string())?;
108        let text = self
109            .workspace_index
110            .document_store()
111            .get_text(&uri)
112            .ok_or_else(|| "file not indexed".to_string())?;
113
114        let mut dead = Vec::new();
115        let mut block_depth = 0usize;
116        let mut terminator: Option<(usize, usize, String)> = None;
117
118        for (i, line) in text.lines().enumerate() {
119            let trimmed = line.trim();
120            let current_depth = block_depth;
121
122            if let Some((term_line, term_depth, term_kw)) = &terminator {
123                if current_depth < *term_depth {
124                    terminator = None;
125                } else if current_depth == *term_depth
126                    && !trimmed.is_empty()
127                    && !trimmed.starts_with('#')
128                    && !is_structural_line(trimmed)
129                {
130                    dead.push(DeadCode {
131                        code_type: DeadCodeType::UnreachableCode,
132                        name: None,
133                        file_path: file_path.to_path_buf(),
134                        start_line: i + 1,
135                        end_line: i + 1,
136                        reason: format!(
137                            "Code is unreachable after `{}` on line {}",
138                            term_kw, term_line
139                        ),
140                        confidence: 0.9,
141                        suggestion: Some("Remove or restructure this code".to_string()),
142                    });
143                    break;
144                }
145            }
146
147            if let Some(term_kw) = detect_unconditional_terminator(trimmed) {
148                terminator = Some((i + 1, current_depth, term_kw.to_string()));
149            }
150
151            block_depth += line.chars().filter(|&ch| ch == '{').count();
152            block_depth = block_depth.saturating_sub(line.chars().filter(|&ch| ch == '}').count());
153        }
154
155        // Dead branch detection: scan for constant-condition patterns.
156        detect_dead_branches(file_path, &text, &mut dead);
157
158        Ok(dead)
159    }
160
161    /// Analyze entire workspace for dead code
162    pub fn analyze_workspace(&self) -> DeadCodeAnalysis {
163        let docs = self.workspace_index.document_store().all_documents();
164        let mut dead_code = Vec::new();
165        let mut total_lines = 0;
166
167        // Per-file unreachable code
168        for doc in &docs {
169            total_lines += doc.text.lines().count();
170            if let Some(path) = uri_to_fs_path(&doc.uri) {
171                if let Ok(mut file_dead) = self.analyze_file(&path) {
172                    dead_code.append(&mut file_dead);
173                }
174            }
175        }
176
177        // Unused symbols across workspace
178        for sym in self.workspace_index.find_unused_symbols() {
179            let code_type = match sym.kind {
180                SymbolKind::Subroutine => DeadCodeType::UnusedSubroutine,
181                SymbolKind::Variable(_) => DeadCodeType::UnusedVariable,
182                SymbolKind::Constant => DeadCodeType::UnusedConstant,
183                SymbolKind::Package => DeadCodeType::UnusedPackage,
184                _ => continue,
185            };
186
187            let file_path = uri_to_fs_path(&sym.uri).unwrap_or_else(|| PathBuf::from(&sym.uri));
188
189            dead_code.push(DeadCode {
190                code_type,
191                name: Some(sym.name.clone()),
192                file_path,
193                start_line: sym.range.start.line as usize + 1,
194                end_line: sym.range.end.line as usize + 1,
195                reason: "Symbol is never used".to_string(),
196                confidence: 0.9,
197                suggestion: Some("Remove or use this symbol".to_string()),
198            });
199        }
200
201        // Compute stats
202        let mut stats = DeadCodeStats::default();
203        for item in &dead_code {
204            let lines = item.end_line.saturating_sub(item.start_line) + 1;
205            stats.total_dead_lines += lines;
206            match item.code_type {
207                DeadCodeType::UnusedSubroutine => stats.unused_subroutines += 1,
208                DeadCodeType::UnusedVariable => stats.unused_variables += 1,
209                DeadCodeType::UnusedConstant => stats.unused_constants += 1,
210                DeadCodeType::UnusedPackage => stats.unused_packages += 1,
211                DeadCodeType::UnreachableCode => stats.unreachable_statements += 1,
212                DeadCodeType::DeadBranch => stats.dead_branches += 1,
213                _ => {}
214            }
215        }
216
217        DeadCodeAnalysis { dead_code, stats, files_analyzed: docs.len(), total_lines }
218    }
219}
220
221fn is_structural_line(trimmed: &str) -> bool {
222    !trimmed.is_empty() && trimmed.chars().all(|ch| ch == '}' || ch == ';')
223}
224
225fn detect_unconditional_terminator(trimmed: &str) -> Option<&str> {
226    const TERMINATORS: [&str; 4] = ["return", "die", "exit", "CORE::exit"];
227
228    let first = trimmed
229        .split(|ch: char| ch.is_whitespace() || matches!(ch, ';' | '('))
230        .next()
231        .unwrap_or_default();
232    if !TERMINATORS.contains(&first) {
233        return None;
234    }
235
236    let after_terminator = &trimmed[first.len()..];
237    let remainder = match after_terminator.split_once('#') {
238        Some((before_comment, _)) => before_comment,
239        None => after_terminator,
240    }
241    .trim_start();
242    if contains_postfix_condition(remainder) {
243        return None;
244    }
245
246    Some(first)
247}
248
249fn contains_postfix_condition(remainder: &str) -> bool {
250    const CONDITIONS: [&str; 7] = ["if", "unless", "when", "while", "until", "for", "foreach"];
251    CONDITIONS.iter().any(|keyword| contains_keyword(remainder, keyword))
252}
253
254fn contains_keyword(text: &str, keyword: &str) -> bool {
255    text.match_indices(keyword).any(|(idx, _)| {
256        let before = text[..idx].chars().next_back();
257        let after = text[idx + keyword.len()..].chars().next();
258        is_keyword_boundary(before) && is_keyword_boundary(after)
259    })
260}
261
262fn is_keyword_boundary(ch: Option<char>) -> bool {
263    ch.is_none_or(|ch| !ch.is_ascii_alphanumeric() && ch != '_')
264}
265
266/// Returns `true` if `condition` is a trivially-false constant expression.
267///
268/// Matches: `0`, `""`, `''`, `undef`, `(0)`, `( 0 )` — the standard Perl idioms
269/// used to write permanently-dead `if`/`while`/`for` blocks.
270fn is_always_false(condition: &str) -> bool {
271    let c = condition.trim();
272    matches!(c, "0" | "\"\"" | "''" | "undef")
273        || (c.starts_with('(') && c.ends_with(')') && is_always_false(&c[1..c.len() - 1]))
274}
275
276/// Returns `true` if `condition` is a trivially-true constant expression.
277///
278/// Matches: `1`, `"1"`, `'1'`, any non-zero integer literal, `(1)` etc.
279fn is_always_true(condition: &str) -> bool {
280    let c = condition.trim();
281    // Non-zero integer literal
282    if c.parse::<i64>().is_ok_and(|n| n != 0) {
283        return true;
284    }
285    // Non-zero float literal
286    if c.parse::<f64>().is_ok_and(|n| n != 0.0) {
287        return true;
288    }
289    // Quoted non-empty string that is not "0"
290    if (c.starts_with('"') && c.ends_with('"') || c.starts_with('\'') && c.ends_with('\''))
291        && c.len() > 2
292    {
293        let inner = &c[1..c.len() - 1];
294        return inner != "0";
295    }
296    // Parenthesised
297    c.starts_with('(') && c.ends_with(')') && is_always_true(&c[1..c.len() - 1])
298}
299
300/// Scan `text` for constant-condition dead branches and append `DeadBranch`
301/// entries to `out`.
302///
303/// Detects:
304/// - `if (0) { ... }`  — body is never executed
305/// - `while (0) { ... }` — loop body is never executed
306/// - `unless (1) { ... }` — equivalent to `if (0)`
307/// - `until (1) { ... }` — equivalent to `while (0)`
308/// - `else` block following an always-true `if (1)` — dead else branch
309///
310/// Uses a simple brace-counting heuristic to locate the block extent.
311/// Only fires for single-line condition + opening brace patterns (the most
312/// common idiom); multi-line conditions are skipped to avoid false positives.
313fn detect_dead_branches(file_path: &Path, text: &str, out: &mut Vec<DeadCode>) {
314    let lines: Vec<&str> = text.lines().collect();
315    let n = lines.len();
316    let mut i = 0;
317
318    while i < n {
319        let trimmed = lines[i].trim();
320
321        // Determine if this line opens a dead branch.
322        // We look for: KEYWORD WHITESPACE? ( CONDITION ) WHITESPACE? {
323        let dead_reason_and_keyword: Option<(String, &str)> = 'detect: {
324            for kw in &["if", "while", "elsif", "unless", "until", "for", "foreach"] {
325                let rest = match trimmed.strip_prefix(kw) {
326                    Some(r)
327                        if r.is_empty()
328                            || r.starts_with(|c: char| c.is_whitespace() || c == '(') =>
329                    {
330                        r.trim_start()
331                    }
332                    _ => continue,
333                };
334                // Extract balanced parentheses for the condition.
335                if !rest.starts_with('(') {
336                    continue;
337                }
338                let condition = extract_balanced_parens(rest);
339                let condition = match condition {
340                    Some(c) => c,
341                    None => continue,
342                };
343                let after_cond = rest[condition.len() + 2..].trim(); // skip '(' ... ')'
344                // Only fire if opening brace is on the same line.
345                if !after_cond.starts_with('{') && !after_cond.is_empty() {
346                    continue;
347                }
348                let inner = condition.trim();
349
350                let reason = if matches!(*kw, "unless" | "until") {
351                    // unless/until: body is dead when condition is always-true
352                    if is_always_true(inner) {
353                        Some(format!(
354                            "`{kw}` condition `{inner}` is always true — block is never executed"
355                        ))
356                    } else {
357                        None
358                    }
359                } else {
360                    // if/while/for/foreach: body is dead when condition is always-false
361                    if is_always_false(inner) {
362                        Some(format!(
363                            "`{kw}` condition `{inner}` is always false — block is never executed"
364                        ))
365                    } else {
366                        None
367                    }
368                };
369
370                if let Some(r) = reason {
371                    break 'detect Some((r, *kw));
372                }
373            }
374
375            // Also check `else` block following always-true `if`.
376            // We handle this by looking back at the previously emitted entry
377            // or by a simple heuristic: `} else {` on its own line after an
378            // always-true if that we tracked.  This is deferred to a follow-up;
379            // for now focus on always-false/always-true keyword conditions.
380            None
381        };
382
383        if let Some((reason, _kw)) = dead_reason_and_keyword {
384            // Find the closing brace of this block by counting brace depth.
385            let block_start = i + 1; // 1-based
386            let end_line = find_block_end(&lines, i);
387            out.push(DeadCode {
388                code_type: DeadCodeType::DeadBranch,
389                name: None,
390                file_path: file_path.to_path_buf(),
391                start_line: block_start,
392                end_line,
393                reason,
394                confidence: 0.9,
395                suggestion: Some("Remove this dead branch or fix the condition".to_string()),
396            });
397            // Skip to after the block to avoid nested false positives.
398            i = end_line;
399            continue;
400        }
401
402        i += 1;
403    }
404}
405
406/// Extract the content of the first balanced `(...)` starting at the
407/// beginning of `s`.  Returns the inner content (without the outer parens),
408/// or `None` if the parens are unbalanced or `s` doesn't start with `(`.
409fn extract_balanced_parens(s: &str) -> Option<&str> {
410    if !s.starts_with('(') {
411        return None;
412    }
413    let mut depth = 0usize;
414    for (idx, ch) in s.char_indices() {
415        match ch {
416            '(' => depth += 1,
417            ')' => {
418                depth -= 1;
419                if depth == 0 {
420                    return Some(&s[1..idx]);
421                }
422            }
423            _ => {}
424        }
425    }
426    None
427}
428
429/// Find the 1-based line number of the closing `}` for the block that opens
430/// on line `open_line` (0-based index).  Uses simple brace counting.
431/// Returns `open_line + 1` (1-based same line) if the block closes on the
432/// same line, or the last line of the file if braces are unbalanced.
433fn find_block_end(lines: &[&str], open_line: usize) -> usize {
434    let mut depth = 0i32;
435    for (i, line) in lines.iter().enumerate().skip(open_line) {
436        for ch in line.chars() {
437            match ch {
438                '{' => depth += 1,
439                '}' => {
440                    depth -= 1;
441                    if depth == 0 {
442                        return i + 1; // 1-based
443                    }
444                }
445                _ => {}
446            }
447        }
448    }
449    lines.len() // fallback: end of file
450}
451
452/// Generate a report from dead code analysis
453pub fn generate_report(analysis: &DeadCodeAnalysis) -> String {
454    let mut report = String::new();
455
456    report.push_str("=== Dead Code Analysis Report ===\n\n");
457
458    report.push_str(&format!("Files analyzed: {}\n", analysis.files_analyzed));
459    report.push_str(&format!("Total lines: {}\n", analysis.total_lines));
460    report.push_str(&format!("Dead code items: {}\n\n", analysis.dead_code.len()));
461
462    report.push_str("Statistics:\n");
463    report.push_str(&format!("  Unused subroutines: {}\n", analysis.stats.unused_subroutines));
464    report.push_str(&format!("  Unused variables: {}\n", analysis.stats.unused_variables));
465    report.push_str(&format!("  Unused constants: {}\n", analysis.stats.unused_constants));
466    report.push_str(&format!("  Unused packages: {}\n", analysis.stats.unused_packages));
467    report.push_str(&format!(
468        "  Unreachable statements: {}\n",
469        analysis.stats.unreachable_statements
470    ));
471    report.push_str(&format!("  Dead branches: {}\n", analysis.stats.dead_branches));
472    report.push_str(&format!("  Total dead lines: {}\n", analysis.stats.total_dead_lines));
473
474    report
475}