perl_lsp_diagnostics/lints/unreachable_code.rs
1//! Unreachable code detection (PL406)
2//!
3//! Identifies statements that cannot execute because they follow an unconditional
4//! control-flow exit (`return`, `die`, `exit`, `croak`, `confess`, `last`,
5//! `next`, `redo`).
6//!
7//! # Algorithm
8//!
9//! The lint uses **recursive statement-slice analysis** rather than a flat
10//! pre-order AST walk. This is the only correct approach: a pre-order visitor
11//! with a `reachable: bool` flag cannot distinguish "visiting a child of this
12//! node" from "visiting the next sibling", so a `return` inside a nested
13//! subroutine body would incorrectly poison sibling statements in the outer
14//! scope.
15//!
16//! The correct algorithm:
17//! 1. `check_unreachable_code` dispatches on the root, then calls `visit_node`
18//! for each statement in top-level lists.
19//! 2. `check_statement_list` iterates a `&[Node]` linearly. When an
20//! unconditional exit is found, all subsequent siblings get a PL406
21//! diagnostic. Nested blocks are recursed into freshly.
22//! 3. Subroutine and method bodies (`Subroutine`, `Method`) trigger a fresh
23//! call to `visit_node`, so a `return` in an inner sub never affects the
24//! outer statement list.
25//! 4. `eval { }` blocks are intentionally **not** recursed into: `die` inside
26//! `eval { }` is caught and does not exit the outer scope.
27//!
28//! # Scope of detection
29//!
30//! | Unconditional exit | Detected? |
31//! |--------------------|-----------|
32//! | `return` | Yes |
33//! | `die "msg"` | Yes (direct FunctionCall at statement level) |
34//! | `exit $code` | Yes |
35//! | `croak "msg"` | Yes |
36//! | `Carp::croak "msg"` | Yes |
37//! | `confess "msg"` | Yes |
38//! | `Carp::confess "msg"` | Yes |
39//! | `last` in loop body | Yes |
40//! | `next` in loop body | Yes |
41//! | `redo` in loop body | Yes |
42//! | `return if $cond` | No (conditional via StatementModifier) |
43//! | `die` inside `or` | No (right operand of Binary, not a direct statement) |
44//! | `die` inside `eval { }` | No (caught by eval) |
45
46use perl_diagnostics_codes::DiagnosticCode;
47use perl_lsp_diagnostic_types::{Diagnostic, DiagnosticSeverity, DiagnosticTag};
48use perl_parser_core::ast::{Node, NodeKind};
49
50/// Entry point for unreachable code detection.
51///
52/// Walk the AST and emit `PL406` diagnostics for any statements that cannot
53/// be reached due to a preceding unconditional control-flow exit.
54pub fn check_unreachable_code(root: &Node, diagnostics: &mut Vec<Diagnostic>) {
55 visit_node(root, diagnostics);
56}
57
58/// Dispatch on a single node: recurse into any block-like children using fresh
59/// reachability state, and process statement lists as slices.
60fn visit_node(node: &Node, diagnostics: &mut Vec<Diagnostic>) {
61 match &node.kind {
62 // Top-level program: walk all top-level statements as a slice
63 NodeKind::Program { statements } => {
64 check_statement_list(statements, diagnostics);
65 }
66
67 // Subroutine body: fresh reachability scope — return here does not
68 // affect the outer statement list
69 NodeKind::Subroutine { body, .. } | NodeKind::Method { body, .. } => {
70 visit_node(body, diagnostics);
71 }
72
73 // Plain block: walk its statements as a slice
74 NodeKind::Block { statements } => {
75 check_statement_list(statements, diagnostics);
76 }
77
78 // If/unless: each branch is an independent scope
79 NodeKind::If { then_branch, elsif_branches, else_branch, .. } => {
80 visit_node(then_branch, diagnostics);
81 for (_, branch_body) in elsif_branches {
82 visit_node(branch_body, diagnostics);
83 }
84 if let Some(else_body) = else_branch {
85 visit_node(else_body, diagnostics);
86 }
87 }
88
89 // Loop bodies: each body is an independent scope
90 NodeKind::While { body, .. }
91 | NodeKind::For { body, .. }
92 | NodeKind::Foreach { body, .. } => {
93 visit_node(body, diagnostics);
94 }
95
96 // Given/when/default
97 NodeKind::Given { body, .. } | NodeKind::When { body, .. } | NodeKind::Default { body } => {
98 visit_node(body, diagnostics);
99 }
100
101 // PhaseBlock (BEGIN, END, etc.): walk its block
102 NodeKind::PhaseBlock { block, .. } => {
103 visit_node(block, diagnostics);
104 }
105
106 // Class body
107 NodeKind::Class { body, .. } => {
108 visit_node(body, diagnostics);
109 }
110
111 // Do block: fresh scope (do { ... })
112 NodeKind::Do { block } => {
113 visit_node(block, diagnostics);
114 }
115
116 // Try body and catch blocks: each is an independent scope
117 NodeKind::Try { body, catch_blocks, finally_block } => {
118 visit_node(body, diagnostics);
119 for (_, catch_body) in catch_blocks {
120 visit_node(catch_body, diagnostics);
121 }
122 if let Some(finally) = finally_block {
123 visit_node(finally, diagnostics);
124 }
125 }
126
127 // ExpressionStatement: recurse into the expression to catch nested
128 // subroutine literals (e.g., `my $f = sub { return 1; };`)
129 NodeKind::ExpressionStatement { expression } => {
130 visit_expr(expression, diagnostics);
131 }
132
133 // Variable declarations with initializers may contain anonymous subs
134 NodeKind::VariableDeclaration { initializer: Some(init), .. }
135 | NodeKind::VariableListDeclaration { initializer: Some(init), .. } => {
136 visit_expr(init, diagnostics);
137 }
138
139 // Eval: intentionally NOT recursed into.
140 // die inside eval { } is caught — the outer scope continues normally.
141 NodeKind::Eval { .. } => {}
142
143 // LabeledStatement: recurse into the inner statement
144 NodeKind::LabeledStatement { statement, .. } => {
145 visit_node(statement, diagnostics);
146 }
147
148 // All other nodes have no statement-list children
149 _ => {}
150 }
151}
152
153/// Recursively visit expression nodes looking for anonymous subroutine literals
154/// (so that `return` inside an anonymous sub does not appear to be a direct
155/// child of the outer statement list).
156fn visit_expr(expr: &Node, diagnostics: &mut Vec<Diagnostic>) {
157 match &expr.kind {
158 // Anonymous sub literal: fresh reachability scope
159 NodeKind::Subroutine { body, .. } => {
160 visit_node(body, diagnostics);
161 }
162
163 // Walk children of common expression forms
164 NodeKind::Assignment { lhs, rhs, .. } => {
165 visit_expr(lhs, diagnostics);
166 visit_expr(rhs, diagnostics);
167 }
168 NodeKind::Binary { left, right, .. } => {
169 visit_expr(left, diagnostics);
170 visit_expr(right, diagnostics);
171 }
172 NodeKind::Unary { operand, .. } => {
173 visit_expr(operand, diagnostics);
174 }
175 NodeKind::Ternary { condition, then_expr, else_expr } => {
176 visit_expr(condition, diagnostics);
177 visit_expr(then_expr, diagnostics);
178 visit_expr(else_expr, diagnostics);
179 }
180 NodeKind::FunctionCall { args, .. } | NodeKind::MethodCall { args, .. } => {
181 for arg in args {
182 visit_expr(arg, diagnostics);
183 }
184 }
185 NodeKind::ArrayLiteral { elements } => {
186 for elem in elements {
187 visit_expr(elem, diagnostics);
188 }
189 }
190 NodeKind::HashLiteral { pairs } => {
191 for (key, val) in pairs {
192 visit_expr(key, diagnostics);
193 visit_expr(val, diagnostics);
194 }
195 }
196 // Other expression forms don't contain sub literals; stop recursing
197 _ => {}
198 }
199}
200
201/// Walk a statement slice linearly. When an unconditional exit is found, emit
202/// PL406 for all remaining siblings in the same slice.
203///
204/// The key correctness property: after calling `check_statement_list`, nested
205/// blocks are always entered with a *fresh* call to `visit_node`, which starts
206/// with `found_exit = false`. This prevents a `return` in an inner sub from
207/// poisoning the outer statement list.
208fn check_statement_list(stmts: &[Node], diagnostics: &mut Vec<Diagnostic>) {
209 let mut found_exit = false;
210
211 for stmt in stmts {
212 if found_exit {
213 // Emit PL406 for this unreachable statement
214 diagnostics.push(Diagnostic {
215 range: (stmt.location.start, stmt.location.end),
216 severity: DiagnosticSeverity::Hint,
217 code: Some(DiagnosticCode::UnreachableCode.as_str().to_string()),
218 message: "Unreachable code: this statement cannot be executed".to_string(),
219 related_information: vec![],
220 tags: vec![DiagnosticTag::Unnecessary],
221 suggestion: Some("Remove unreachable code".to_string()),
222 });
223 // Still recurse into the unreachable node: nested subs deserve
224 // independent analysis even if their containing block is dead.
225 visit_node(stmt, diagnostics);
226 } else {
227 // Recurse first (to handle nested subs), then check for exit
228 visit_node(stmt, diagnostics);
229 if is_unconditional_exit(stmt) {
230 found_exit = true;
231 }
232 }
233 }
234}
235
236/// Returns true if this AST node represents an unconditional control-flow exit.
237///
238/// Only nodes that **directly exit** at the statement level qualify. The key
239/// restriction is that `die` inside `or` (a binary expression) does NOT count
240/// because the `or` branch is only taken when the left side is falsy — the
241/// overall statement does not always exit.
242///
243/// `StatementModifier` is explicitly `false`: `return if $cond` is conditional.
244fn is_unconditional_exit(node: &Node) -> bool {
245 match &node.kind {
246 // `return;` or `return $value;`
247 NodeKind::Return { .. } => true,
248
249 // Direct function call at statement level (not wrapped in ExpressionStatement)
250 NodeKind::FunctionCall { name, .. } => is_exit_function(name),
251
252 // `die "msg";` — the parser wraps bare function calls in ExpressionStatement
253 NodeKind::ExpressionStatement { expression } => is_unconditional_exit(expression),
254
255 // `last`, `next`, `redo` — exit the current loop iteration/block
256 NodeKind::LoopControl { op, .. } => matches!(op.as_str(), "last" | "next" | "redo"),
257
258 // `return if $cond` is CONDITIONAL — StatementModifier is never an unconditional exit
259 NodeKind::StatementModifier { .. } => false,
260
261 _ => false,
262 }
263}
264
265/// Returns true if the function name is one of the known unconditional-exit functions.
266fn is_exit_function(name: &str) -> bool {
267 matches!(name, "die" | "exit" | "croak" | "Carp::croak" | "confess" | "Carp::confess")
268}