perl_lsp_diagnostics/lints/
unreachable_code.rs

1//! Unreachable code detection (PL406)
2//!
3//! Identifies statements that cannot execute because they follow an unconditional
4//! control-flow exit (`return`, `die`, `exit`, `croak`, `confess`, `last`,
5//! `next`, `redo`).
6//!
7//! # Algorithm
8//!
9//! The lint uses **recursive statement-slice analysis** rather than a flat
10//! pre-order AST walk. This is the only correct approach: a pre-order visitor
11//! with a `reachable: bool` flag cannot distinguish "visiting a child of this
12//! node" from "visiting the next sibling", so a `return` inside a nested
13//! subroutine body would incorrectly poison sibling statements in the outer
14//! scope.
15//!
16//! The correct algorithm:
17//! 1. `check_unreachable_code` dispatches on the root, then calls `visit_node`
18//!    for each statement in top-level lists.
19//! 2. `check_statement_list` iterates a `&[Node]` linearly. When an
20//!    unconditional exit is found, all subsequent siblings get a PL406
21//!    diagnostic. Nested blocks are recursed into freshly.
22//! 3. Subroutine and method bodies (`Subroutine`, `Method`) trigger a fresh
23//!    call to `visit_node`, so a `return` in an inner sub never affects the
24//!    outer statement list.
25//! 4. `eval { }` blocks are intentionally **not** recursed into: `die` inside
26//!    `eval { }` is caught and does not exit the outer scope.
27//!
28//! # Scope of detection
29//!
30//! | Unconditional exit | Detected? |
31//! |--------------------|-----------|
32//! | `return`           | Yes |
33//! | `die "msg"`        | Yes (direct FunctionCall at statement level) |
34//! | `exit $code`       | Yes |
35//! | `croak "msg"`      | Yes |
36//! | `Carp::croak "msg"` | Yes |
37//! | `confess "msg"`    | Yes |
38//! | `Carp::confess "msg"` | Yes |
39//! | `last` in loop body | Yes |
40//! | `next` in loop body | Yes |
41//! | `redo` in loop body | Yes |
42//! | `return if $cond`  | No (conditional via StatementModifier) |
43//! | `die` inside `or`  | No (right operand of Binary, not a direct statement) |
44//! | `die` inside `eval { }` | No (caught by eval) |
45
46use perl_diagnostics_codes::DiagnosticCode;
47use perl_lsp_diagnostic_types::{Diagnostic, DiagnosticSeverity, DiagnosticTag};
48use perl_parser_core::ast::{Node, NodeKind};
49
50/// Entry point for unreachable code detection.
51///
52/// Walk the AST and emit `PL406` diagnostics for any statements that cannot
53/// be reached due to a preceding unconditional control-flow exit.
54pub fn check_unreachable_code(root: &Node, diagnostics: &mut Vec<Diagnostic>) {
55    visit_node(root, diagnostics);
56}
57
58/// Dispatch on a single node: recurse into any block-like children using fresh
59/// reachability state, and process statement lists as slices.
60fn visit_node(node: &Node, diagnostics: &mut Vec<Diagnostic>) {
61    match &node.kind {
62        // Top-level program: walk all top-level statements as a slice
63        NodeKind::Program { statements } => {
64            check_statement_list(statements, diagnostics);
65        }
66
67        // Subroutine body: fresh reachability scope — return here does not
68        // affect the outer statement list
69        NodeKind::Subroutine { body, .. } | NodeKind::Method { body, .. } => {
70            visit_node(body, diagnostics);
71        }
72
73        // Plain block: walk its statements as a slice
74        NodeKind::Block { statements } => {
75            check_statement_list(statements, diagnostics);
76        }
77
78        // If/unless: each branch is an independent scope
79        NodeKind::If { then_branch, elsif_branches, else_branch, .. } => {
80            visit_node(then_branch, diagnostics);
81            for (_, branch_body) in elsif_branches {
82                visit_node(branch_body, diagnostics);
83            }
84            if let Some(else_body) = else_branch {
85                visit_node(else_body, diagnostics);
86            }
87        }
88
89        // Loop bodies: each body is an independent scope
90        NodeKind::While { body, .. }
91        | NodeKind::For { body, .. }
92        | NodeKind::Foreach { body, .. } => {
93            visit_node(body, diagnostics);
94        }
95
96        // Given/when/default
97        NodeKind::Given { body, .. } | NodeKind::When { body, .. } | NodeKind::Default { body } => {
98            visit_node(body, diagnostics);
99        }
100
101        // PhaseBlock (BEGIN, END, etc.): walk its block
102        NodeKind::PhaseBlock { block, .. } => {
103            visit_node(block, diagnostics);
104        }
105
106        // Class body
107        NodeKind::Class { body, .. } => {
108            visit_node(body, diagnostics);
109        }
110
111        // Do block: fresh scope (do { ... })
112        NodeKind::Do { block } => {
113            visit_node(block, diagnostics);
114        }
115
116        // Try body and catch blocks: each is an independent scope
117        NodeKind::Try { body, catch_blocks, finally_block } => {
118            visit_node(body, diagnostics);
119            for (_, catch_body) in catch_blocks {
120                visit_node(catch_body, diagnostics);
121            }
122            if let Some(finally) = finally_block {
123                visit_node(finally, diagnostics);
124            }
125        }
126
127        // ExpressionStatement: recurse into the expression to catch nested
128        // subroutine literals (e.g., `my $f = sub { return 1; };`)
129        NodeKind::ExpressionStatement { expression } => {
130            visit_expr(expression, diagnostics);
131        }
132
133        // Variable declarations with initializers may contain anonymous subs
134        NodeKind::VariableDeclaration { initializer: Some(init), .. }
135        | NodeKind::VariableListDeclaration { initializer: Some(init), .. } => {
136            visit_expr(init, diagnostics);
137        }
138
139        // Eval: intentionally NOT recursed into.
140        // die inside eval { } is caught — the outer scope continues normally.
141        NodeKind::Eval { .. } => {}
142
143        // LabeledStatement: recurse into the inner statement
144        NodeKind::LabeledStatement { statement, .. } => {
145            visit_node(statement, diagnostics);
146        }
147
148        // All other nodes have no statement-list children
149        _ => {}
150    }
151}
152
153/// Recursively visit expression nodes looking for anonymous subroutine literals
154/// (so that `return` inside an anonymous sub does not appear to be a direct
155/// child of the outer statement list).
156fn visit_expr(expr: &Node, diagnostics: &mut Vec<Diagnostic>) {
157    match &expr.kind {
158        // Anonymous sub literal: fresh reachability scope
159        NodeKind::Subroutine { body, .. } => {
160            visit_node(body, diagnostics);
161        }
162
163        // Walk children of common expression forms
164        NodeKind::Assignment { lhs, rhs, .. } => {
165            visit_expr(lhs, diagnostics);
166            visit_expr(rhs, diagnostics);
167        }
168        NodeKind::Binary { left, right, .. } => {
169            visit_expr(left, diagnostics);
170            visit_expr(right, diagnostics);
171        }
172        NodeKind::Unary { operand, .. } => {
173            visit_expr(operand, diagnostics);
174        }
175        NodeKind::Ternary { condition, then_expr, else_expr } => {
176            visit_expr(condition, diagnostics);
177            visit_expr(then_expr, diagnostics);
178            visit_expr(else_expr, diagnostics);
179        }
180        NodeKind::FunctionCall { args, .. } | NodeKind::MethodCall { args, .. } => {
181            for arg in args {
182                visit_expr(arg, diagnostics);
183            }
184        }
185        NodeKind::ArrayLiteral { elements } => {
186            for elem in elements {
187                visit_expr(elem, diagnostics);
188            }
189        }
190        NodeKind::HashLiteral { pairs } => {
191            for (key, val) in pairs {
192                visit_expr(key, diagnostics);
193                visit_expr(val, diagnostics);
194            }
195        }
196        // Other expression forms don't contain sub literals; stop recursing
197        _ => {}
198    }
199}
200
201/// Walk a statement slice linearly. When an unconditional exit is found, emit
202/// PL406 for all remaining siblings in the same slice.
203///
204/// The key correctness property: after calling `check_statement_list`, nested
205/// blocks are always entered with a *fresh* call to `visit_node`, which starts
206/// with `found_exit = false`. This prevents a `return` in an inner sub from
207/// poisoning the outer statement list.
208fn check_statement_list(stmts: &[Node], diagnostics: &mut Vec<Diagnostic>) {
209    let mut found_exit = false;
210
211    for stmt in stmts {
212        if found_exit {
213            // Emit PL406 for this unreachable statement
214            diagnostics.push(Diagnostic {
215                range: (stmt.location.start, stmt.location.end),
216                severity: DiagnosticSeverity::Hint,
217                code: Some(DiagnosticCode::UnreachableCode.as_str().to_string()),
218                message: "Unreachable code: this statement cannot be executed".to_string(),
219                related_information: vec![],
220                tags: vec![DiagnosticTag::Unnecessary],
221                suggestion: Some("Remove unreachable code".to_string()),
222            });
223            // Still recurse into the unreachable node: nested subs deserve
224            // independent analysis even if their containing block is dead.
225            visit_node(stmt, diagnostics);
226        } else {
227            // Recurse first (to handle nested subs), then check for exit
228            visit_node(stmt, diagnostics);
229            if is_unconditional_exit(stmt) {
230                found_exit = true;
231            }
232        }
233    }
234}
235
236/// Returns true if this AST node represents an unconditional control-flow exit.
237///
238/// Only nodes that **directly exit** at the statement level qualify. The key
239/// restriction is that `die` inside `or` (a binary expression) does NOT count
240/// because the `or` branch is only taken when the left side is falsy — the
241/// overall statement does not always exit.
242///
243/// `StatementModifier` is explicitly `false`: `return if $cond` is conditional.
244fn is_unconditional_exit(node: &Node) -> bool {
245    match &node.kind {
246        // `return;` or `return $value;`
247        NodeKind::Return { .. } => true,
248
249        // Direct function call at statement level (not wrapped in ExpressionStatement)
250        NodeKind::FunctionCall { name, .. } => is_exit_function(name),
251
252        // `die "msg";` — the parser wraps bare function calls in ExpressionStatement
253        NodeKind::ExpressionStatement { expression } => is_unconditional_exit(expression),
254
255        // `last`, `next`, `redo` — exit the current loop iteration/block
256        NodeKind::LoopControl { op, .. } => matches!(op.as_str(), "last" | "next" | "redo"),
257
258        // `return if $cond` is CONDITIONAL — StatementModifier is never an unconditional exit
259        NodeKind::StatementModifier { .. } => false,
260
261        _ => false,
262    }
263}
264
265/// Returns true if the function name is one of the known unconditional-exit functions.
266fn is_exit_function(name: &str) -> bool {
267    matches!(name, "die" | "exit" | "croak" | "Carp::croak" | "confess" | "Carp::confess")
268}
perl_lsp_diagnostics/lints/unreachable_code.rs

perl_lsp_diagnostics/lints/
unreachable_code.rs