Skip to main content

perl_dead_code/
lib.rs

1//! Dead code detection for Perl codebases (stub implementation)
2//!
3//! This module identifies unused code including unreachable code and unused symbols.
4//! Currently a stub implementation to demonstrate the architecture.
5
6use perl_workspace_index::workspace_index::{
7    SymbolKind, WorkspaceIndex, fs_path_to_uri, uri_to_fs_path,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::path::{Path, PathBuf};
12
13/// Types of dead code detected during Perl script analysis
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum DeadCodeType {
16    /// Subroutine defined but never called
17    UnusedSubroutine,
18    /// Variable declared but never used
19    UnusedVariable,
20    /// Constant defined but never referenced
21    UnusedConstant,
22    /// Package declared but never used
23    UnusedPackage,
24    /// Code that can never be executed
25    UnreachableCode,
26    /// Conditional branch that is never taken
27    DeadBranch,
28    /// Module imported but never used
29    UnusedImport,
30    /// Function exported but never used externally
31    UnusedExport,
32}
33
34/// A piece of dead code detected during analysis
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct DeadCode {
37    /// Type of dead code (subroutine, variable, etc.)
38    pub code_type: DeadCodeType,
39    /// Name of the dead code element if available
40    pub name: Option<String>,
41    /// File path where the dead code is located
42    pub file_path: PathBuf,
43    /// Starting line number (1-based)
44    pub start_line: usize,
45    /// Ending line number (1-based)
46    pub end_line: usize,
47    /// Human-readable explanation of why this is considered dead code
48    pub reason: String,
49    /// Confidence level (0.0-1.0) in the detection accuracy
50    pub confidence: f32,
51    /// Optional suggestion for fixing the dead code
52    pub suggestion: Option<String>,
53}
54
55/// Dead code analysis result for a Perl workspace
56#[derive(Debug, Serialize, Deserialize)]
57pub struct DeadCodeAnalysis {
58    /// List of all dead code instances found
59    pub dead_code: Vec<DeadCode>,
60    /// Statistical summary of dead code analysis
61    pub stats: DeadCodeStats,
62    /// Number of files analyzed in the workspace
63    pub files_analyzed: usize,
64    /// Total lines of code analyzed
65    pub total_lines: usize,
66}
67
68/// Statistical summary of dead code analysis results
69#[derive(Debug, Default, Serialize, Deserialize)]
70pub struct DeadCodeStats {
71    /// Number of unused subroutines detected
72    pub unused_subroutines: usize,
73    /// Number of unused variables detected
74    pub unused_variables: usize,
75    /// Number of unused constants detected
76    pub unused_constants: usize,
77    /// Number of unused packages detected
78    pub unused_packages: usize,
79    /// Number of unreachable code statements
80    pub unreachable_statements: usize,
81    /// Number of dead conditional branches
82    pub dead_branches: usize,
83    /// Total lines of dead code identified
84    pub total_dead_lines: usize,
85}
86
87/// Dead code detector
88pub struct DeadCodeDetector {
89    workspace_index: WorkspaceIndex,
90    entry_points: HashSet<PathBuf>,
91}
92
93impl DeadCodeDetector {
94    /// Create a new dead code detector with the given workspace index
95    ///
96    /// # Arguments
97    /// * `workspace_index` - Indexed workspace containing symbol definitions and references
98    pub fn new(workspace_index: WorkspaceIndex) -> Self {
99        Self { workspace_index, entry_points: HashSet::new() }
100    }
101
102    /// Add an entry point (main script)
103    pub fn add_entry_point(&mut self, path: PathBuf) {
104        self.entry_points.insert(path);
105    }
106
107    /// Analyze a single file for dead code
108    pub fn analyze_file(&self, file_path: &Path) -> Result<Vec<DeadCode>, String> {
109        let uri = fs_path_to_uri(file_path).map_err(|e| e.to_string())?;
110        let text = self
111            .workspace_index
112            .document_store()
113            .get_text(&uri)
114            .ok_or_else(|| "file not indexed".to_string())?;
115
116        let mut dead = Vec::new();
117        let mut terminator: Option<(usize, String)> = None;
118
119        for (i, line) in text.lines().enumerate() {
120            let trimmed = line.trim();
121            if let Some((term_line, term_kw)) = &terminator {
122                if !trimmed.is_empty() {
123                    dead.push(DeadCode {
124                        code_type: DeadCodeType::UnreachableCode,
125                        name: None,
126                        file_path: file_path.to_path_buf(),
127                        start_line: i + 1,
128                        end_line: i + 1,
129                        reason: format!(
130                            "Code is unreachable after `{}` on line {}",
131                            term_kw, term_line
132                        ),
133                        confidence: 0.5,
134                        suggestion: Some("Remove or restructure this code".to_string()),
135                    });
136                    break;
137                }
138            }
139
140            if ["return", "die", "exit"].iter().any(|kw| trimmed.starts_with(kw)) {
141                if let Some(first_word) = trimmed.split_whitespace().next() {
142                    terminator = Some((i + 1, first_word.to_string()));
143                }
144            }
145        }
146
147        // Dead branch detection: scan for constant-condition patterns.
148        detect_dead_branches(file_path, &text, &mut dead);
149
150        Ok(dead)
151    }
152
153    /// Analyze entire workspace for dead code
154    pub fn analyze_workspace(&self) -> DeadCodeAnalysis {
155        let docs = self.workspace_index.document_store().all_documents();
156        let mut dead_code = Vec::new();
157        let mut total_lines = 0;
158
159        // Per-file unreachable code
160        for doc in &docs {
161            total_lines += doc.text.lines().count();
162            if let Some(path) = uri_to_fs_path(&doc.uri) {
163                if let Ok(mut file_dead) = self.analyze_file(&path) {
164                    dead_code.append(&mut file_dead);
165                }
166            }
167        }
168
169        // Unused symbols across workspace
170        for sym in self.workspace_index.find_unused_symbols() {
171            let code_type = match sym.kind {
172                SymbolKind::Subroutine => DeadCodeType::UnusedSubroutine,
173                SymbolKind::Variable(_) => DeadCodeType::UnusedVariable,
174                SymbolKind::Constant => DeadCodeType::UnusedConstant,
175                SymbolKind::Package => DeadCodeType::UnusedPackage,
176                _ => continue,
177            };
178
179            let file_path = uri_to_fs_path(&sym.uri).unwrap_or_else(|| PathBuf::from(&sym.uri));
180
181            dead_code.push(DeadCode {
182                code_type,
183                name: Some(sym.name.clone()),
184                file_path,
185                start_line: sym.range.start.line as usize + 1,
186                end_line: sym.range.end.line as usize + 1,
187                reason: "Symbol is never used".to_string(),
188                confidence: 0.9,
189                suggestion: Some("Remove or use this symbol".to_string()),
190            });
191        }
192
193        // Compute stats
194        let mut stats = DeadCodeStats::default();
195        for item in &dead_code {
196            let lines = item.end_line.saturating_sub(item.start_line) + 1;
197            stats.total_dead_lines += lines;
198            match item.code_type {
199                DeadCodeType::UnusedSubroutine => stats.unused_subroutines += 1,
200                DeadCodeType::UnusedVariable => stats.unused_variables += 1,
201                DeadCodeType::UnusedConstant => stats.unused_constants += 1,
202                DeadCodeType::UnusedPackage => stats.unused_packages += 1,
203                DeadCodeType::UnreachableCode => stats.unreachable_statements += 1,
204                DeadCodeType::DeadBranch => stats.dead_branches += 1,
205                _ => {}
206            }
207        }
208
209        DeadCodeAnalysis { dead_code, stats, files_analyzed: docs.len(), total_lines }
210    }
211}
212
213/// Returns `true` if `condition` is a trivially-false constant expression.
214///
215/// Matches: `0`, `""`, `''`, `undef`, `(0)`, `( 0 )` — the standard Perl idioms
216/// used to write permanently-dead `if`/`while`/`for` blocks.
217fn is_always_false(condition: &str) -> bool {
218    let c = condition.trim();
219    matches!(c, "0" | "\"\"" | "''" | "undef")
220        || (c.starts_with('(') && c.ends_with(')') && is_always_false(&c[1..c.len() - 1]))
221}
222
223/// Returns `true` if `condition` is a trivially-true constant expression.
224///
225/// Matches: `1`, `"1"`, `'1'`, any non-zero integer literal, `(1)` etc.
226fn is_always_true(condition: &str) -> bool {
227    let c = condition.trim();
228    // Non-zero integer literal
229    if c.parse::<i64>().is_ok_and(|n| n != 0) {
230        return true;
231    }
232    // Non-zero float literal
233    if c.parse::<f64>().is_ok_and(|n| n != 0.0) {
234        return true;
235    }
236    // Quoted non-empty string that is not "0"
237    if (c.starts_with('"') && c.ends_with('"') || c.starts_with('\'') && c.ends_with('\''))
238        && c.len() > 2
239    {
240        let inner = &c[1..c.len() - 1];
241        return inner != "0";
242    }
243    // Parenthesised
244    c.starts_with('(') && c.ends_with(')') && is_always_true(&c[1..c.len() - 1])
245}
246
247/// Scan `text` for constant-condition dead branches and append `DeadBranch`
248/// entries to `out`.
249///
250/// Detects:
251/// - `if (0) { ... }`  — body is never executed
252/// - `while (0) { ... }` — loop body is never executed
253/// - `unless (1) { ... }` — equivalent to `if (0)`
254/// - `until (1) { ... }` — equivalent to `while (0)`
255/// - `else` block following an always-true `if (1)` — dead else branch
256///
257/// Uses a simple brace-counting heuristic to locate the block extent.
258/// Only fires for single-line condition + opening brace patterns (the most
259/// common idiom); multi-line conditions are skipped to avoid false positives.
260fn detect_dead_branches(file_path: &Path, text: &str, out: &mut Vec<DeadCode>) {
261    let lines: Vec<&str> = text.lines().collect();
262    let n = lines.len();
263    let mut i = 0;
264
265    while i < n {
266        let trimmed = lines[i].trim();
267
268        // Determine if this line opens a dead branch.
269        // We look for: KEYWORD WHITESPACE? ( CONDITION ) WHITESPACE? {
270        let dead_reason_and_keyword: Option<(String, &str)> = 'detect: {
271            for kw in &["if", "while", "elsif", "unless", "until", "for", "foreach"] {
272                let rest = match trimmed.strip_prefix(kw) {
273                    Some(r)
274                        if r.is_empty()
275                            || r.starts_with(|c: char| c.is_whitespace() || c == '(') =>
276                    {
277                        r.trim_start()
278                    }
279                    _ => continue,
280                };
281                // Extract balanced parentheses for the condition.
282                if !rest.starts_with('(') {
283                    continue;
284                }
285                let condition = extract_balanced_parens(rest);
286                let condition = match condition {
287                    Some(c) => c,
288                    None => continue,
289                };
290                let after_cond = rest[condition.len() + 2..].trim(); // skip '(' ... ')'
291                // Only fire if opening brace is on the same line.
292                if !after_cond.starts_with('{') && !after_cond.is_empty() {
293                    continue;
294                }
295                let inner = condition.trim();
296
297                let reason = if matches!(*kw, "unless" | "until") {
298                    // unless/until: body is dead when condition is always-true
299                    if is_always_true(inner) {
300                        Some(format!(
301                            "`{kw}` condition `{inner}` is always true — block is never executed"
302                        ))
303                    } else {
304                        None
305                    }
306                } else {
307                    // if/while/for/foreach: body is dead when condition is always-false
308                    if is_always_false(inner) {
309                        Some(format!(
310                            "`{kw}` condition `{inner}` is always false — block is never executed"
311                        ))
312                    } else {
313                        None
314                    }
315                };
316
317                if let Some(r) = reason {
318                    break 'detect Some((r, *kw));
319                }
320            }
321
322            // Also check `else` block following always-true `if`.
323            // We handle this by looking back at the previously emitted entry
324            // or by a simple heuristic: `} else {` on its own line after an
325            // always-true if that we tracked.  This is deferred to a follow-up;
326            // for now focus on always-false/always-true keyword conditions.
327            None
328        };
329
330        if let Some((reason, _kw)) = dead_reason_and_keyword {
331            // Find the closing brace of this block by counting brace depth.
332            let block_start = i + 1; // 1-based
333            let end_line = find_block_end(&lines, i);
334            out.push(DeadCode {
335                code_type: DeadCodeType::DeadBranch,
336                name: None,
337                file_path: file_path.to_path_buf(),
338                start_line: block_start,
339                end_line,
340                reason,
341                confidence: 0.9,
342                suggestion: Some("Remove this dead branch or fix the condition".to_string()),
343            });
344            // Skip to after the block to avoid nested false positives.
345            i = end_line;
346            continue;
347        }
348
349        i += 1;
350    }
351}
352
353/// Extract the content of the first balanced `(...)` starting at the
354/// beginning of `s`.  Returns the inner content (without the outer parens),
355/// or `None` if the parens are unbalanced or `s` doesn't start with `(`.
356fn extract_balanced_parens(s: &str) -> Option<&str> {
357    if !s.starts_with('(') {
358        return None;
359    }
360    let mut depth = 0usize;
361    for (idx, ch) in s.char_indices() {
362        match ch {
363            '(' => depth += 1,
364            ')' => {
365                depth -= 1;
366                if depth == 0 {
367                    return Some(&s[1..idx]);
368                }
369            }
370            _ => {}
371        }
372    }
373    None
374}
375
376/// Find the 1-based line number of the closing `}` for the block that opens
377/// on line `open_line` (0-based index).  Uses simple brace counting.
378/// Returns `open_line + 1` (1-based same line) if the block closes on the
379/// same line, or the last line of the file if braces are unbalanced.
380fn find_block_end(lines: &[&str], open_line: usize) -> usize {
381    let mut depth = 0i32;
382    for (i, line) in lines.iter().enumerate().skip(open_line) {
383        for ch in line.chars() {
384            match ch {
385                '{' => depth += 1,
386                '}' => {
387                    depth -= 1;
388                    if depth == 0 {
389                        return i + 1; // 1-based
390                    }
391                }
392                _ => {}
393            }
394        }
395    }
396    lines.len() // fallback: end of file
397}
398
399/// Generate a report from dead code analysis
400pub fn generate_report(analysis: &DeadCodeAnalysis) -> String {
401    let mut report = String::new();
402
403    report.push_str("=== Dead Code Analysis Report ===\n\n");
404
405    report.push_str(&format!("Files analyzed: {}\n", analysis.files_analyzed));
406    report.push_str(&format!("Total lines: {}\n", analysis.total_lines));
407    report.push_str(&format!("Dead code items: {}\n\n", analysis.dead_code.len()));
408
409    report.push_str("Statistics:\n");
410    report.push_str(&format!("  Unused subroutines: {}\n", analysis.stats.unused_subroutines));
411    report.push_str(&format!("  Unused variables: {}\n", analysis.stats.unused_variables));
412    report.push_str(&format!("  Unused constants: {}\n", analysis.stats.unused_constants));
413    report.push_str(&format!("  Unused packages: {}\n", analysis.stats.unused_packages));
414    report.push_str(&format!(
415        "  Unreachable statements: {}\n",
416        analysis.stats.unreachable_statements
417    ));
418    report.push_str(&format!("  Dead branches: {}\n", analysis.stats.dead_branches));
419    report.push_str(&format!("  Total dead lines: {}\n", analysis.stats.total_dead_lines));
420
421    report
422}