Skip to main content

perl_semantic_analyzer/analysis/scope_analyzer/
mod.rs

1//! Scope analysis and variable tracking for Perl parsing workflows
2//!
3//! This module provides comprehensive scope analysis for Perl scripts, tracking
4//! variable declarations, usage patterns, and potential issues across different
5//! scopes within the LSP workflow stages.
6//!
7//! # LSP Workflow Integration
8//!
9//! Scope analysis supports semantic validation across LSP workflow stages:
10//! - **Parse**: Identify declarations and scopes during syntax analysis
11//! - **Index**: Provide scope metadata for symbol indexing
12//! - **Navigate**: Resolve references with scope-aware lookups
13//! - **Complete**: Filter completion items based on visible bindings
14//! - **Analyze**: Report unused, shadowed, and undeclared variables
15//!
16//! # Performance
17//!
18//! - **Time complexity**: O(n) over AST nodes with scoped hash lookups
19//! - **Space complexity**: O(n) for scope tables and variable maps (memory bounded)
20//! - **Optimizations**: Fast sigil indexing to keep performance stable
21//! - **Benchmarks**: Typically <5ms for mid-sized files, low ms for large files
22//! - **Large file scaling**: Designed to scale across large file sets in workspaces
23//!
24//! # Usage Examples
25//!
26//! ```rust,ignore
27//! use perl_parser::scope_analyzer::{ScopeAnalyzer, IssueKind};
28//! use perl_parser::{Parser, ast::Node};
29//!
30//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
31//! // Analyze Perl script for scope issues
32//! let script = "my $var = 42; sub hello { print $var; }";
33//! let mut parser = Parser::new(script);
34//! let ast = parser.parse()?;
35//!
36//! let analyzer = ScopeAnalyzer::new();
37//! let pragma_map = vec![];
38//! let issues = analyzer.analyze(&ast, script, &pragma_map);
39//!
40//! // Check for common scope issues in Perl parsing code
41//! for issue in &issues {
42//!     match issue.kind {
43//!         IssueKind::UnusedVariable => println!("Unused variable: {}", issue.variable_name),
44//!         IssueKind::VariableShadowing => println!("Variable shadowing: {}", issue.variable_name),
45//!         _ => {}
46//!     }
47//! }
48//! # Ok(())
49//! # }
50//! ```
51
52mod calls_and_exprs;
53mod declarations;
54mod interpolation;
55mod scope_constructs;
56mod uses;
57
58use crate::ast::{Node, NodeKind};
59use crate::pragma_tracker::{PragmaQueryCursor, PragmaState};
60use perl_module::import::resolve_known_export_tag;
61use rustc_hash::FxHashMap;
62use std::cell::{Cell, RefCell};
63use std::collections::HashSet;
64use std::ops::Range;
65use std::rc::Rc;
66
67/// Category of scope-related issue detected during analysis.
68#[derive(Debug, Clone, Copy, PartialEq)]
69pub enum IssueKind {
70    /// A variable declared in an inner scope shadows one in an outer scope.
71    VariableShadowing,
72    /// A declared variable is never read.
73    UnusedVariable,
74    /// A variable is used without a prior declaration (`my`/`our`/`local`).
75    UndeclaredVariable,
76    /// The same variable name is declared twice in the same scope.
77    VariableRedeclaration,
78    /// A subroutine parameter name appears more than once in the signature.
79    DuplicateParameter,
80    /// A parameter name shadows a package-level (`our`) variable.
81    ParameterShadowsGlobal,
82    /// A subroutine parameter is never used inside the body.
83    UnusedParameter,
84    /// A bareword was used where a string or identifier was expected.
85    UnquotedBareword,
86    /// A variable was accessed before any initializing assignment.
87    UninitializedVariable,
88    /// Capture variable (`$1`, `$2`, etc.) used with no preceding regex match in scope.
89    CaptureVarWithoutRegexMatch,
90}
91
92/// A single scope-analysis finding with location and human-readable description.
93#[derive(Debug, Clone)]
94pub struct ScopeIssue {
95    /// The category of scope problem detected.
96    pub kind: IssueKind,
97    /// The bare variable name (without sigil) involved in the issue.
98    pub variable_name: String,
99    /// Zero-based line number of the first token of the offending construct.
100    pub line: usize,
101    /// Byte offset range `(start, end)` of the offending construct.
102    pub range: (usize, usize),
103    /// Human-readable explanation of the issue.
104    pub description: String,
105}
106
107#[derive(Debug)]
108struct Variable {
109    declaration_offset: usize,
110    is_used: RefCell<bool>,
111    is_our: bool,
112    is_initialized: RefCell<bool>,
113}
114
115/// Convert a Perl sigil to an array index for fast variable lookup.
116///
117/// Sigil indices:
118/// - `$` (scalar): 0
119/// - `@` (array): 1
120/// - `%` (hash): 2
121/// - `&` (subroutine): 3
122/// - `*` (glob): 4
123/// - Other: 5 (fallback)
124#[inline]
125pub(super) fn sigil_to_index(sigil: &str) -> usize {
126    // Use first byte for fast comparison - sigils are always single ASCII chars
127    match sigil.as_bytes().first() {
128        Some(b'$') => 0,
129        Some(b'@') => 1,
130        Some(b'%') => 2,
131        Some(b'&') => 3,
132        Some(b'*') => 4,
133        _ => 5,
134    }
135}
136
137/// Convert an array index back to a Perl sigil.
138#[inline]
139fn index_to_sigil(index: usize) -> &'static str {
140    match index {
141        0 => "$",
142        1 => "@",
143        2 => "%",
144        3 => "&",
145        4 => "*",
146        _ => "",
147    }
148}
149
150#[derive(Debug)]
151pub(super) struct Scope {
152    // Outer key: sigil index, Inner key: name
153    variables: RefCell<[Option<FxHashMap<String, Rc<Variable>>>; 6]>,
154    parent: Option<Rc<Scope>>,
155    /// Whether a regex match operation (`=~`, `m//`, `s///`) has been seen in this scope.
156    has_regex_match: Cell<bool>,
157}
158
159impl Scope {
160    fn new() -> Self {
161        let vars = std::array::from_fn(|_| None);
162        Self { variables: RefCell::new(vars), parent: None, has_regex_match: Cell::new(false) }
163    }
164
165    fn with_parent(parent: Rc<Scope>) -> Self {
166        let vars = std::array::from_fn(|_| None);
167        Self {
168            variables: RefCell::new(vars),
169            parent: Some(parent),
170            has_regex_match: Cell::new(false),
171        }
172    }
173
174    /// Returns true if this scope or any ancestor scope has seen a regex match operation.
175    fn regex_match_in_scope(&self) -> bool {
176        if self.has_regex_match.get() {
177            return true;
178        }
179        if let Some(ref parent) = self.parent { parent.regex_match_in_scope() } else { false }
180    }
181
182    fn declare_variable_parts(
183        &self,
184        sigil: &str,
185        name: &str,
186        offset: usize,
187        is_our: bool,
188        is_initialized: bool,
189    ) -> Option<IssueKind> {
190        let idx = sigil_to_index(sigil);
191
192        // First check if already declared in this scope
193        {
194            let vars = self.variables.borrow();
195            if let Some(map) = &vars[idx] {
196                if map.contains_key(name) {
197                    return Some(IssueKind::VariableRedeclaration);
198                }
199            }
200        }
201
202        // Check if it shadows a parent scope variable
203        let shadows = if let Some(ref parent) = self.parent {
204            parent.has_variable_parts(sigil, name)
205        } else {
206            false
207        };
208
209        // Now insert the variable
210        let mut vars = self.variables.borrow_mut();
211        let inner = vars[idx].get_or_insert_with(FxHashMap::default);
212
213        inner.insert(
214            name.to_string(),
215            Rc::new(Variable {
216                declaration_offset: offset,
217                is_used: RefCell::new(is_our), // 'our' variables are considered used
218                is_our,
219                is_initialized: RefCell::new(is_initialized),
220            }),
221        );
222
223        if shadows { Some(IssueKind::VariableShadowing) } else { None }
224    }
225
226    fn has_variable_parts(&self, sigil: &str, name: &str) -> bool {
227        let idx = sigil_to_index(sigil);
228        let mut current_scope = self;
229
230        loop {
231            {
232                let vars = current_scope.variables.borrow();
233                if let Some(map) = &vars[idx] {
234                    if map.contains_key(name) {
235                        return true;
236                    }
237                }
238            }
239            if let Some(ref parent) = current_scope.parent {
240                current_scope = parent;
241            } else {
242                return false;
243            }
244        }
245    }
246
247    fn use_variable_parts(&self, sigil: &str, name: &str) -> (bool, bool) {
248        let idx = sigil_to_index(sigil);
249        let mut current_scope = self;
250
251        loop {
252            {
253                let vars = current_scope.variables.borrow();
254                if let Some(map) = &vars[idx] {
255                    if let Some(var) = map.get(name) {
256                        *var.is_used.borrow_mut() = true;
257                        return (true, *var.is_initialized.borrow());
258                    }
259                }
260            }
261
262            if let Some(ref parent) = current_scope.parent {
263                current_scope = parent;
264            } else {
265                return (false, false);
266            }
267        }
268    }
269
270    fn initialize_variable_parts(&self, sigil: &str, name: &str) {
271        let idx = sigil_to_index(sigil);
272        let mut current_scope = self;
273
274        loop {
275            {
276                let vars = current_scope.variables.borrow();
277                if let Some(map) = &vars[idx] {
278                    if let Some(var) = map.get(name) {
279                        *var.is_initialized.borrow_mut() = true;
280                        return;
281                    }
282                }
283            }
284
285            if let Some(ref parent) = current_scope.parent {
286                current_scope = parent;
287            } else {
288                return;
289            }
290        }
291    }
292
293    /// Optimized method to mark a variable as initialized AND used in one lookup.
294    /// Returns true if the variable was found and updated.
295    fn initialize_and_use_variable_parts(&self, sigil: &str, name: &str) -> bool {
296        let idx = sigil_to_index(sigil);
297        let mut current_scope = self;
298
299        loop {
300            {
301                let vars = current_scope.variables.borrow();
302                if let Some(map) = &vars[idx] {
303                    if let Some(var) = map.get(name) {
304                        *var.is_used.borrow_mut() = true;
305                        *var.is_initialized.borrow_mut() = true;
306                        return true;
307                    }
308                }
309            }
310
311            if let Some(ref parent) = current_scope.parent {
312                current_scope = parent;
313            } else {
314                return false;
315            }
316        }
317    }
318
319    /// Iterate over unused variables that should be reported as diagnostics.
320    /// Filters out underscore-prefixed variables (intentionally unused) before allocation.
321    fn for_each_reportable_unused_variable<F>(&self, mut f: F)
322    where
323        F: FnMut(String, usize),
324    {
325        for (idx, inner_opt) in self.variables.borrow().iter().enumerate() {
326            if let Some(inner) = inner_opt {
327                for (name, var) in inner {
328                    if !*var.is_used.borrow() && !var.is_our {
329                        // Optimization: Check for underscore prefix before allocation
330                        if name.starts_with('_') {
331                            continue;
332                        }
333                        let full_name = format!("{}{}", index_to_sigil(idx), name);
334                        f(full_name, var.declaration_offset);
335                    }
336                }
337            }
338        }
339    }
340}
341
342/// Helper to split a full variable name into sigil and name parts.
343pub(super) fn split_variable_name(full_name: &str) -> (&str, &str) {
344    if !full_name.is_empty() {
345        let c = full_name.as_bytes()[0];
346        if c == b'$' || c == b'@' || c == b'%' || c == b'&' || c == b'*' {
347            return (&full_name[0..1], &full_name[1..]);
348        }
349    }
350    ("", full_name)
351}
352
353fn is_interpolated_var_start(byte: u8) -> bool {
354    byte.is_ascii_alphabetic() || byte == b'_'
355}
356
357fn is_interpolated_var_continue(byte: u8) -> bool {
358    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
359}
360
361fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
362    if index == 0 {
363        return false;
364    }
365
366    let mut backslashes = 0usize;
367    let mut cursor = index;
368    while cursor > 0 && bytes[cursor - 1] == b'\\' {
369        backslashes += 1;
370        cursor -= 1;
371    }
372
373    backslashes % 2 == 1
374}
375
376pub(super) enum ExtractedName<'a> {
377    Parts(&'a str, &'a str),
378    Full(String),
379}
380
381pub(super) struct AnalysisContext<'a> {
382    code: &'a str,
383    pragma_map: &'a [(Range<usize>, PragmaState)],
384    pragma_cursor: RefCell<PragmaQueryCursor>,
385    imported_barewords: HashSet<String>,
386    line_starts: RefCell<Option<Vec<usize>>>,
387    /// Current package name, updated as `package` statements are traversed.
388    current_package: RefCell<String>,
389}
390
391impl<'a> AnalysisContext<'a> {
392    fn new(ast: &Node, code: &'a str, pragma_map: &'a [(Range<usize>, PragmaState)]) -> Self {
393        Self {
394            code,
395            pragma_map,
396            pragma_cursor: RefCell::new(PragmaQueryCursor::new()),
397            imported_barewords: collect_imported_barewords(ast),
398            line_starts: RefCell::new(None),
399            current_package: RefCell::new("main".to_string()),
400        }
401    }
402
403    fn pragma_state_for_offset(&self, offset: usize) -> PragmaState {
404        self.pragma_cursor.borrow_mut().state_for_offset(self.pragma_map, offset)
405    }
406
407    fn has_imported_bareword(&self, name: &str) -> bool {
408        self.imported_barewords.contains(name)
409    }
410
411    fn get_line(&self, offset: usize) -> usize {
412        let mut line_starts_guard = self.line_starts.borrow_mut();
413        let starts = line_starts_guard.get_or_insert_with(|| {
414            let mut indices = Vec::with_capacity(self.code.len() / 40); // Estimate
415            indices.push(0);
416            for (i, b) in self.code.bytes().enumerate() {
417                if b == b'\n' {
418                    indices.push(i + 1);
419                }
420            }
421            indices
422        });
423
424        // Find the line that contains the offset
425        match starts.binary_search(&offset) {
426            Ok(idx) => idx + 1,
427            Err(idx) => idx,
428        }
429    }
430
431    fn find_catch_variable_range(
432        &self,
433        catch_body_start: usize,
434        full_name: &str,
435    ) -> Option<(usize, usize)> {
436        if full_name.is_empty() || catch_body_start == 0 || catch_body_start > self.code.len() {
437            return None;
438        }
439
440        let window_start = catch_body_start.saturating_sub(256);
441        let window = self.code.get(window_start..catch_body_start)?;
442        let catch_start = window.rfind("catch")?;
443        let search_start = catch_start + "catch".len();
444        let var_offset = window[search_start..].rfind(full_name)? + search_start;
445        let start = window_start + var_offset;
446        let end = start + full_name.len();
447
448        Some((start, end))
449    }
450}
451
452impl<'a> ExtractedName<'a> {
453    fn as_string(&self) -> String {
454        match self {
455            ExtractedName::Parts(sigil, name) => format!("{}{}", sigil, name),
456            ExtractedName::Full(s) => s.clone(),
457        }
458    }
459
460    fn parts(&self) -> (&str, &str) {
461        match self {
462            ExtractedName::Parts(sigil, name) => (sigil, name),
463            ExtractedName::Full(s) => split_variable_name(s),
464        }
465    }
466
467    fn is_empty(&self) -> bool {
468        match self {
469            ExtractedName::Parts(sigil, name) => sigil.is_empty() && name.is_empty(),
470            ExtractedName::Full(s) => s.is_empty(),
471        }
472    }
473}
474
475/// Analyzes an AST for scope-related issues such as unused variables and shadowing.
476///
477/// Produces a list of [`ScopeIssue`]s that can be surfaced as LSP diagnostics
478/// or used by the refactoring engine.  The analyzer is stateless and may be
479/// reused across multiple invocations.
480pub struct ScopeAnalyzer;
481
482impl Default for ScopeAnalyzer {
483    fn default() -> Self {
484        Self::new()
485    }
486}
487
488impl ScopeAnalyzer {
489    /// Create a new scope analyzer instance.
490    pub fn new() -> Self {
491        Self
492    }
493
494    pub(super) fn package_variable_name(
495        &self,
496        name: &str,
497        context: &AnalysisContext<'_>,
498    ) -> Option<String> {
499        if name.is_empty() || name.contains("::") {
500            return None;
501        }
502
503        let current_package = context.current_package.borrow();
504        Some(format!("{}::{}", current_package.as_str(), name))
505    }
506
507    pub(super) fn declare_variable_parts_in_context(
508        &self,
509        scope: &Rc<Scope>,
510        sigil: &str,
511        name: &str,
512        offset: usize,
513        is_our: bool,
514        is_initialized: bool,
515        context: &AnalysisContext<'_>,
516    ) -> Option<IssueKind> {
517        if is_our && let Some(qualified_name) = self.package_variable_name(name, context) {
518            return scope.declare_variable_parts(
519                sigil,
520                &qualified_name,
521                offset,
522                is_our,
523                is_initialized,
524            );
525        }
526
527        scope.declare_variable_parts(sigil, name, offset, is_our, is_initialized)
528    }
529
530    pub(super) fn has_variable_parts_in_context(
531        &self,
532        scope: &Rc<Scope>,
533        sigil: &str,
534        name: &str,
535        context: &AnalysisContext<'_>,
536    ) -> bool {
537        if scope.has_variable_parts(sigil, name) {
538            return true;
539        }
540
541        self.package_variable_name(name, context)
542            .is_some_and(|qualified_name| scope.has_variable_parts(sigil, &qualified_name))
543    }
544
545    pub(super) fn use_variable_parts_in_context(
546        &self,
547        scope: &Rc<Scope>,
548        sigil: &str,
549        name: &str,
550        context: &AnalysisContext<'_>,
551    ) -> (bool, bool) {
552        let (found, initialized) = scope.use_variable_parts(sigil, name);
553        if found {
554            return (found, initialized);
555        }
556
557        self.package_variable_name(name, context).map_or((false, false), |qualified_name| {
558            scope.use_variable_parts(sigil, &qualified_name)
559        })
560    }
561
562    pub(super) fn initialize_variable_parts_in_context(
563        &self,
564        scope: &Rc<Scope>,
565        sigil: &str,
566        name: &str,
567        context: &AnalysisContext<'_>,
568    ) {
569        if scope.has_variable_parts(sigil, name) {
570            scope.initialize_variable_parts(sigil, name);
571            return;
572        }
573
574        if let Some(qualified_name) = self.package_variable_name(name, context) {
575            scope.initialize_variable_parts(sigil, &qualified_name);
576        }
577    }
578
579    pub(super) fn initialize_and_use_variable_parts_in_context(
580        &self,
581        scope: &Rc<Scope>,
582        sigil: &str,
583        name: &str,
584        context: &AnalysisContext<'_>,
585    ) -> bool {
586        if scope.initialize_and_use_variable_parts(sigil, name) {
587            return true;
588        }
589
590        self.package_variable_name(name, context).is_some_and(|qualified_name| {
591            scope.initialize_and_use_variable_parts(sigil, &qualified_name)
592        })
593    }
594
595    /// Analyze `ast` for scope issues, using `pragma_map` to honour `use strict` regions.
596    ///
597    /// Returns all detected issues sorted by byte offset.
598    pub fn analyze(
599        &self,
600        ast: &Node,
601        code: &str,
602        pragma_map: &[(Range<usize>, PragmaState)],
603    ) -> Vec<ScopeIssue> {
604        let mut issues = Vec::new();
605        let root_scope = Rc::new(Scope::new());
606
607        // Use a vector as a stack for ancestors to avoid O(N) HashMap allocation
608        let mut ancestors: Vec<&Node> = Vec::new();
609
610        let context = AnalysisContext::new(ast, code, pragma_map);
611
612        self.analyze_node(ast, &root_scope, &mut ancestors, &mut issues, &context);
613
614        // Collect all unused variables from all scopes
615        self.collect_unused_variables(&root_scope, &mut issues, &context);
616
617        issues
618    }
619
620    pub(super) fn analyze_node<'a>(
621        &self,
622        node: &'a Node,
623        scope: &Rc<Scope>,
624        ancestors: &mut Vec<&'a Node>,
625        issues: &mut Vec<ScopeIssue>,
626        context: &AnalysisContext<'a>,
627    ) {
628        // Get effective pragma state at this node's location
629        let pragma_state = context.pragma_state_for_offset(node.location.start);
630        let strict_vars_mode = pragma_state.strict_vars || pragma_state.signatures_strict;
631        let strict_subs_mode = pragma_state.strict_subs || pragma_state.signatures_strict;
632        match &node.kind {
633            NodeKind::VariableDeclaration { declarator, variable, initializer, .. } => {
634                let _ = declarations::handle_variable_declaration(
635                    self,
636                    node,
637                    declarator,
638                    variable,
639                    initializer.as_deref(),
640                    scope,
641                    ancestors,
642                    issues,
643                    context,
644                );
645            }
646
647            NodeKind::VariableListDeclaration { declarator, variables, initializer, .. } => {
648                declarations::handle_variable_list_declaration(
649                    self,
650                    initializer.as_deref(),
651                    declarator,
652                    variables,
653                    scope,
654                    ancestors,
655                    issues,
656                    context,
657                );
658            }
659
660            NodeKind::Use { module, args, .. } => {
661                declarations::handle_use(self, node, module, args, scope, context);
662            }
663            NodeKind::Variable { sigil, name } => {
664                let _ = uses::handle_variable(
665                    self,
666                    node,
667                    sigil,
668                    name,
669                    scope,
670                    ancestors,
671                    issues,
672                    context,
673                    strict_vars_mode,
674                );
675            }
676            NodeKind::Typeglob { name } => {
677                uses::handle_typeglob(self, node, name, scope, issues, context, strict_vars_mode);
678            }
679            NodeKind::Readline { filehandle: Some(filehandle) } => {
680                uses::handle_readline(
681                    self,
682                    node,
683                    filehandle,
684                    scope,
685                    issues,
686                    context,
687                    strict_vars_mode,
688                );
689            }
690            NodeKind::FunctionCall { name, args } => {
691                calls_and_exprs::handle_function_call(
692                    self,
693                    node,
694                    name,
695                    args,
696                    scope,
697                    ancestors,
698                    issues,
699                    context,
700                    strict_vars_mode,
701                );
702            }
703            NodeKind::MethodCall { object, method, args } => {
704                calls_and_exprs::handle_method_call(
705                    self,
706                    node,
707                    object,
708                    method,
709                    args,
710                    scope,
711                    ancestors,
712                    issues,
713                    context,
714                    strict_vars_mode,
715                );
716            }
717            NodeKind::Unary { op: _, operand } => {
718                calls_and_exprs::handle_unary(
719                    self, node, operand, scope, ancestors, issues, context,
720                );
721            }
722            NodeKind::String { value, interpolated } => {
723                interpolation::handle_string(self, value, *interpolated, scope, context);
724            }
725            NodeKind::Heredoc { content, interpolated, .. } => {
726                interpolation::handle_heredoc(self, content, *interpolated, scope, context);
727            }
728            NodeKind::Assignment { lhs, rhs, op: _ } => {
729                let _ = uses::handle_assignment(
730                    self, node, lhs, rhs, scope, ancestors, issues, context,
731                );
732            }
733
734            NodeKind::Tie { variable, package, args } => {
735                uses::handle_tie(
736                    self, node, variable, package, args, scope, ancestors, issues, context,
737                );
738            }
739
740            NodeKind::Untie { variable } => {
741                uses::handle_untie(self, node, variable, scope, ancestors, issues, context);
742            }
743
744            NodeKind::Identifier { name } => {
745                uses::handle_identifier(
746                    self,
747                    node,
748                    name,
749                    issues,
750                    context,
751                    ancestors,
752                    &pragma_state,
753                    strict_subs_mode,
754                );
755            }
756
757            NodeKind::Binary { op: _, left, right } => {
758                // All binary operations (including {} and [])
759                // We don't need special handling for {} and [] here because NodeKind::Variable
760                // will handle the context-sensitive lookup (checking ancestors).
761                calls_and_exprs::handle_binary(
762                    self, node, left, right, scope, ancestors, issues, context,
763                );
764            }
765
766            NodeKind::ArrayLiteral { elements } => {
767                calls_and_exprs::handle_array_literal(
768                    self, node, elements, scope, ancestors, issues, context,
769                );
770            }
771
772            NodeKind::Block { statements } => {
773                scope_constructs::handle_block(
774                    self, node, statements, scope, ancestors, issues, context,
775                );
776            }
777
778            NodeKind::PhaseBlock { block, .. } => {
779                scope_constructs::handle_phase_block(
780                    self, node, block, scope, ancestors, issues, context,
781                );
782            }
783
784            NodeKind::For { init, condition, update, body, .. } => {
785                scope_constructs::handle_for(
786                    self,
787                    node,
788                    init.as_deref(),
789                    condition.as_deref(),
790                    update.as_deref(),
791                    body,
792                    scope,
793                    ancestors,
794                    issues,
795                    context,
796                );
797            }
798
799            NodeKind::Foreach { variable, list, body, continue_block } => {
800                scope_constructs::handle_foreach(
801                    self,
802                    node,
803                    variable,
804                    list,
805                    body,
806                    continue_block.as_deref(),
807                    scope,
808                    ancestors,
809                    issues,
810                    context,
811                );
812            }
813
814            NodeKind::Subroutine { signature, body, .. } => {
815                scope_constructs::handle_subroutine(
816                    self,
817                    node,
818                    signature.as_deref(),
819                    body,
820                    scope,
821                    ancestors,
822                    issues,
823                    context,
824                );
825            }
826
827            NodeKind::Try { body, catch_blocks, finally_block } => {
828                scope_constructs::handle_try(
829                    self,
830                    node,
831                    body,
832                    catch_blocks,
833                    finally_block.as_deref(),
834                    scope,
835                    ancestors,
836                    issues,
837                    context,
838                );
839            }
840
841            NodeKind::Package { name, block, .. } => {
842                scope_constructs::handle_package(
843                    self,
844                    node,
845                    name,
846                    block.as_deref(),
847                    scope,
848                    ancestors,
849                    issues,
850                    context,
851                );
852            }
853
854            // Regex match operations set capture variables ($1, $2, ...) in the current scope.
855            NodeKind::Match { expr, .. } => {
856                interpolation::handle_match(self, node, expr, scope, ancestors, issues, context);
857            }
858
859            NodeKind::Substitution { expr, .. } => {
860                interpolation::handle_substitution(
861                    self, node, expr, scope, ancestors, issues, context,
862                );
863            }
864
865            // Standalone regex (m// matching against $_) also sets capture variables.
866            NodeKind::Regex { .. } => {
867                interpolation::handle_regex(scope);
868            }
869
870            _ => {
871                // Recursively analyze children
872                ancestors.push(node);
873                for child in node.children() {
874                    self.analyze_node(child, scope, ancestors, issues, context);
875                }
876                ancestors.pop();
877            }
878        }
879    }
880
881    /// Resolve the variable symbol that a syntax form should count as a use.
882    ///
883    /// This keeps explicit dereference syntax precise:
884    /// - `@$ref` and `%$ref` count as uses of `$ref`
885    /// - `$arr[0]` counts as a use of `@arr`
886    /// - `$hash{k}` counts as a use of `%hash`
887    /// - Arrow dereference forms stay on the scalar reference itself
888    pub(super) fn resolve_variable_use_target<'a>(
889        &self,
890        node: &'a Node,
891        ancestors: &[&'a Node],
892        context: &AnalysisContext<'_>,
893    ) -> Option<(&'a str, &'a str)> {
894        let NodeKind::Variable { sigil, name } = &node.kind else {
895            return None;
896        };
897
898        // Explicit scalar-reference dereference forms should count as uses of the
899        // underlying scalar lexical (`$ref`) rather than a container lexical of the
900        // same bare name. This covers compact and braced syntaxes such as:
901        // - `@$ref`, `%$ref`, `$$ref`
902        // - `@{$ref}`, `%{$ref}`, `${$ref}`
903        if (sigil == "@" || sigil == "%" || sigil == "$")
904            && context
905                .code
906                .get(node.location.start..node.location.end)
907                .is_some_and(is_explicit_scalar_reference_deref)
908        {
909            return Some(("$", normalize_scalar_deref_base_name(name)));
910        }
911
912        if (sigil == "@" || sigil == "%" || sigil == "$") && name.starts_with('$') && name.len() > 1
913        {
914            return Some(("$", &name[1..]));
915        }
916
917        if sigil == "$"
918            && let Some(parent) = ancestors.last()
919            && let NodeKind::Binary { op, left, right } = &parent.kind
920            && std::ptr::eq(left.as_ref(), node)
921        {
922            match op.as_str() {
923                "[]" => return Some(("@", name)),
924                "->[]" | "->{}" => return Some(("$", name)),
925                "{}" if self.is_dynamic_method_deref_rhs(right)
926                    || self.is_dynamic_method_deref_context(parent, ancestors)
927                    || self.is_braced_dynamic_method_call(parent, context) =>
928                {
929                    return Some(("$", name));
930                }
931                "{}" => return Some(("%", name)),
932                _ => {}
933            }
934        }
935
936        // Hash slice syntax (`@hash{...}`) reads from `%hash`, not a lexical `@hash`.
937        // Bridge this so strict-vars and usage tracking resolve against the declared hash.
938        if sigil == "@"
939            && let Some(parent) = ancestors.last()
940            && let NodeKind::Binary { op, left, .. } = &parent.kind
941            && op == "{}"
942            && std::ptr::eq(left.as_ref(), node)
943        {
944            return Some(("%", name));
945        }
946
947        // When the parser interprets `print $arr[0]` as indirect-object syntax, it produces
948        // `IndirectCall { object: Variable($, "arr"), args: [ArrayLiteral([0])] }`.
949        // Similarly, `print $hash{a}` produces
950        // `IndirectCall { object: Variable($, "hash"), args: [Block([a])] }`.
951        // Bridge the sigil so that `@arr` / `%hash` are marked as used, not `$arr` / `$hash`.
952        if sigil == "$"
953            && let Some(parent) = ancestors.last()
954            && let NodeKind::IndirectCall { object, args, .. } = &parent.kind
955            && std::ptr::eq(object.as_ref(), node)
956        {
957            if let Some(first_arg) = args.first() {
958                match &first_arg.kind {
959                    NodeKind::ArrayLiteral { .. } => return Some(("@", name)),
960                    NodeKind::Block { .. } => return Some(("%", name)),
961                    _ => {}
962                }
963            }
964        }
965
966        Some((sigil, name))
967    }
968
969    pub(super) fn extract_name_like_variable<'a>(
970        &self,
971        name: &'a str,
972    ) -> Option<(&'a str, &'a str)> {
973        let (sigil, var_name) = split_variable_name(name);
974        if sigil.is_empty()
975            || var_name.is_empty()
976            || var_name.contains("::")
977            || !self.looks_like_variable_name(var_name)
978        {
979            return None;
980        }
981        Some((sigil, var_name))
982    }
983
984    pub(super) fn extract_method_name_variable<'a>(
985        &self,
986        method: &'a str,
987    ) -> Option<(&'a str, &'a str)> {
988        self.extract_name_like_variable(method).or_else(|| {
989            let inner = method.strip_prefix("${")?.strip_suffix('}')?;
990            if inner.contains("::") || !self.looks_like_variable_name(inner) {
991                return None;
992            }
993            Some(("$", inner))
994        })
995    }
996
997    pub(super) fn looks_like_variable_name(&self, name: &str) -> bool {
998        matches!(
999            name.chars().next(),
1000            Some('A'..='Z' | 'a'..='z' | '_' | '$' | '@' | '%' | '&' | '*' | '^' | '#' | '!' | '?')
1001        )
1002    }
1003
1004    pub(super) fn is_dynamic_method_deref_rhs(&self, node: &Node) -> bool {
1005        matches!(
1006            &node.kind,
1007            NodeKind::Unary { op, operand }
1008                if op == "\\"
1009                    && matches!(
1010                        &operand.kind,
1011                        NodeKind::String { .. } | NodeKind::Identifier { .. }
1012                    )
1013        )
1014    }
1015
1016    pub(super) fn is_dynamic_method_deref_context<'a>(
1017        &self,
1018        node: &'a Node,
1019        ancestors: &[&'a Node],
1020    ) -> bool {
1021        let Some(grandparent) = ancestors.iter().rev().nth(1).copied() else {
1022            return false;
1023        };
1024
1025        match &grandparent.kind {
1026            NodeKind::MethodCall { object, .. } => std::ptr::eq(object.as_ref(), node),
1027            NodeKind::FunctionCall { name, args } if name == "->()" => {
1028                args.first().is_some_and(|arg| std::ptr::eq(arg, node))
1029            }
1030            _ => false,
1031        }
1032    }
1033
1034    pub(super) fn is_braced_dynamic_method_call(
1035        &self,
1036        node: &Node,
1037        context: &AnalysisContext<'_>,
1038    ) -> bool {
1039        let Some(selector_text) = context.code.get(node.location.start..node.location.end) else {
1040            return false;
1041        };
1042        if !selector_text.contains("->${") {
1043            return false;
1044        }
1045
1046        let Some(suffix) = context.code.get(node.location.end..) else {
1047            return false;
1048        };
1049        suffix.trim_start().starts_with("()")
1050    }
1051
1052    pub(super) fn record_variable_use(
1053        &self,
1054        scope: &Rc<Scope>,
1055        strict_vars_mode: bool,
1056        context: &AnalysisContext<'_>,
1057        issues: &mut Vec<ScopeIssue>,
1058        node: &Node,
1059        sigil: &str,
1060        name: &str,
1061    ) {
1062        let (variable_used, is_initialized) =
1063            self.use_variable_parts_in_context(scope, sigil, name, context);
1064        if !variable_used {
1065            if strict_vars_mode {
1066                self.push_undeclared_variable_issue(issues, context, node, sigil, name);
1067            }
1068        } else if !is_initialized {
1069            self.push_uninitialized_variable_issue(issues, context, node, sigil, name);
1070        }
1071    }
1072
1073    pub(super) fn push_undeclared_variable_issue(
1074        &self,
1075        issues: &mut Vec<ScopeIssue>,
1076        context: &AnalysisContext<'_>,
1077        node: &Node,
1078        sigil: &str,
1079        name: &str,
1080    ) {
1081        let full_name = format!("{}{}", sigil, name);
1082        issues.push(ScopeIssue {
1083            kind: IssueKind::UndeclaredVariable,
1084            variable_name: full_name.clone(),
1085            line: context.get_line(node.location.start),
1086            range: (node.location.start, node.location.end),
1087            description: format!("Variable '{}' is used but not declared", full_name),
1088        });
1089    }
1090
1091    pub(super) fn push_uninitialized_variable_issue(
1092        &self,
1093        issues: &mut Vec<ScopeIssue>,
1094        context: &AnalysisContext<'_>,
1095        node: &Node,
1096        sigil: &str,
1097        name: &str,
1098    ) {
1099        let full_name = format!("{}{}", sigil, name);
1100        issues.push(ScopeIssue {
1101            kind: IssueKind::UninitializedVariable,
1102            variable_name: full_name.clone(),
1103            line: context.get_line(node.location.start),
1104            range: (node.location.start, node.location.end),
1105            description: format!("Variable '{}' is used before being initialized", full_name),
1106        });
1107    }
1108
1109    /// Marks variables as initialized when they appear on the left-hand side of an assignment.
1110    /// Handles scalar variables, list assignments like `($x, $y) = ...`, and nested structures.
1111    pub(super) fn mark_initialized(
1112        &self,
1113        node: &Node,
1114        scope: &Rc<Scope>,
1115        context: &AnalysisContext<'_>,
1116    ) {
1117        match &node.kind {
1118            NodeKind::Variable { sigil, name } => {
1119                if !name.contains("::") {
1120                    self.initialize_variable_parts_in_context(scope, sigil, name, context);
1121                }
1122            }
1123            // For all other node types (parens, lists, etc.), recurse into children
1124            // to find any nested variables that should be marked as initialized
1125            _ => {
1126                for child in node.children() {
1127                    self.mark_initialized(child, scope, context);
1128                }
1129            }
1130        }
1131    }
1132
1133    pub(super) fn analyze_block_with_scope<'a>(
1134        &self,
1135        node: &'a Node,
1136        scope: &Rc<Scope>,
1137        ancestors: &mut Vec<&'a Node>,
1138        issues: &mut Vec<ScopeIssue>,
1139        context: &AnalysisContext<'a>,
1140    ) {
1141        if let NodeKind::Block { statements } = &node.kind {
1142            ancestors.push(node);
1143            for stmt in statements {
1144                self.analyze_node(stmt, scope, ancestors, issues, context);
1145            }
1146            ancestors.pop();
1147        } else {
1148            self.analyze_node(node, scope, ancestors, issues, context);
1149        }
1150    }
1151
1152    pub(super) fn mark_builtin_declaration_arg_consumed(
1153        &self,
1154        node: &Node,
1155        scope: &Rc<Scope>,
1156        context: &AnalysisContext<'_>,
1157    ) {
1158        match &node.kind {
1159            NodeKind::VariableDeclaration { variable, .. } => {
1160                let extracted = self.extract_variable_name(variable);
1161                let (sigil, name) = extracted.parts();
1162                if !sigil.is_empty() && !name.is_empty() && !name.contains("::") {
1163                    let _ = self
1164                        .initialize_and_use_variable_parts_in_context(scope, sigil, name, context);
1165                }
1166            }
1167            NodeKind::VariableListDeclaration { variables, .. } => {
1168                for variable in variables {
1169                    self.mark_builtin_declaration_arg_consumed(variable, scope, context);
1170                }
1171            }
1172            NodeKind::VariableWithAttributes { variable, .. } => {
1173                self.mark_builtin_declaration_arg_consumed(variable, scope, context);
1174            }
1175            _ => {}
1176        }
1177    }
1178
1179    pub(super) fn mark_interpolated_variables_used(
1180        &self,
1181        content: &str,
1182        scope: &Rc<Scope>,
1183        context: &AnalysisContext<'_>,
1184    ) {
1185        let bytes = content.as_bytes();
1186        let mut index = 0;
1187
1188        while index < bytes.len() {
1189            let sigil = match bytes[index] {
1190                b'$' => "$",
1191                b'@' => "@",
1192                _ => {
1193                    index += 1;
1194                    continue;
1195                }
1196            };
1197
1198            if has_escaped_interpolation_marker(bytes, index) {
1199                index += 1;
1200                continue;
1201            }
1202
1203            if index + 1 >= bytes.len() {
1204                break;
1205            }
1206
1207            let (start, requires_closing_brace) =
1208                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
1209
1210            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
1211                index += 1;
1212                continue;
1213            }
1214
1215            let mut end = start + 1;
1216            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
1217                end += 1;
1218            }
1219
1220            if requires_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
1221                index += 1;
1222                continue;
1223            }
1224
1225            if let Some(name) = content.get(start..end) {
1226                if !name.contains("::") {
1227                    let _ = self.use_variable_parts_in_context(scope, sigil, name, context);
1228                }
1229            }
1230
1231            index = if requires_closing_brace { end + 1 } else { end };
1232        }
1233    }
1234
1235    pub(super) fn collect_unused_variables(
1236        &self,
1237        scope: &Rc<Scope>,
1238        issues: &mut Vec<ScopeIssue>,
1239        context: &AnalysisContext<'_>,
1240    ) {
1241        scope.for_each_reportable_unused_variable(|var_name, offset| {
1242            let start = offset.min(context.code.len());
1243            let end = (start + var_name.len()).min(context.code.len());
1244
1245            // Optimization: Generate description using the string reference before moving it
1246            let description = format!("Variable '{}' is declared but never used", var_name);
1247
1248            issues.push(ScopeIssue {
1249                kind: IssueKind::UnusedVariable,
1250                variable_name: var_name, // Move: Avoids cloning the string
1251                line: context.get_line(offset),
1252                range: (start, end),
1253                description,
1254            });
1255        });
1256    }
1257
1258    pub(super) fn extract_variable_name<'a>(&self, node: &'a Node) -> ExtractedName<'a> {
1259        match &node.kind {
1260            NodeKind::Variable { sigil, name } => ExtractedName::Parts(sigil, name),
1261            NodeKind::MandatoryParameter { variable }
1262            | NodeKind::OptionalParameter { variable, .. }
1263            | NodeKind::SlurpyParameter { variable }
1264            | NodeKind::NamedParameter { variable } => self.extract_variable_name(variable),
1265            NodeKind::ArrayLiteral { elements } => {
1266                // Handle array reference patterns like @{$ref}
1267                if elements.len() == 1 {
1268                    if let Some(first) = elements.first() {
1269                        return self.extract_variable_name(first);
1270                    }
1271                }
1272                ExtractedName::Full(String::new())
1273            }
1274            NodeKind::Binary { op, left, .. } if op == "->" => {
1275                // Handle method call patterns on variables
1276                self.extract_variable_name(left)
1277            }
1278            _ => {
1279                if let Some(child) = node.first_child() {
1280                    self.extract_variable_name(child)
1281                } else {
1282                    ExtractedName::Full(String::new())
1283                }
1284            }
1285        }
1286    }
1287
1288    /// Determines if a node is in a hash key context, where barewords are legitimate.
1289    ///
1290    /// This method efficiently detects various hash key contexts to avoid false positives
1291    /// in strict mode bareword detection. It handles:
1292    ///
1293    /// # Hash Key Contexts Detected:
1294    /// - **Hash subscripts**: `$hash{bareword_key}` or `%hash{bareword_key}`
1295    /// - **Hash literals**: `{ key => value, another_key => value2 }`
1296    /// - **Hash slices**: `@hash{key1, key2, key3}` where keys are in an array
1297    /// - **Nested hash structures**: Complex nested hash access patterns
1298    ///
1299    /// # Performance Characteristics:
1300    /// - Early termination on first positive match
1301    /// - Efficient pointer-based parent traversal
1302    /// - O(depth) complexity where depth is AST nesting level
1303    /// - Typical case: 1-3 parent checks for hash contexts
1304    ///
1305    /// # Examples:
1306    /// ```perl
1307    /// use strict;
1308    /// my %hash = (key1 => 'value1');        # key1 is in hash key context
1309    /// my $val = $hash{bareword_key};         # bareword_key is in hash key context  
1310    /// my @vals = @hash{key1, key2};          # key1, key2 are in hash key context
1311    /// print INVALID_BAREWORD;                # NOT in hash key context - should warn
1312    /// ```
1313    pub(super) fn is_in_hash_key_context(
1314        &self,
1315        node: &Node,
1316        ancestors: &[&Node],
1317        max_depth: usize,
1318    ) -> bool {
1319        let mut current = node;
1320
1321        // Traverse up the AST to find hash key contexts
1322        // Limit traversal depth to prevent excessive searching
1323        // Iterate ancestors in reverse (from immediate parent up)
1324        let len = ancestors.len();
1325
1326        for i in (0..len).rev() {
1327            if len - i > max_depth {
1328                break;
1329            }
1330
1331            let parent = ancestors[i];
1332
1333            match &parent.kind {
1334                // Method call: Class->method (Class is bareword)
1335                NodeKind::Binary { op, left, right: _ } if op == "->" => {
1336                    // Check if current node is the class name (left side of the -> operation)
1337                    if std::ptr::eq(left.as_ref(), current) {
1338                        return true;
1339                    }
1340                }
1341                NodeKind::MethodCall { object, .. } => {
1342                    // Check if current node is the class name (object)
1343                    if std::ptr::eq(object.as_ref(), current) {
1344                        return true;
1345                    }
1346                }
1347                // Hash subscript: $hash{key} or %hash{key}
1348                NodeKind::Binary { op, left: _, right } if op == "{}" => {
1349                    // Check if current node is the key (right side of the {} operation)
1350                    if std::ptr::eq(right.as_ref(), current) {
1351                        return true;
1352                    }
1353                }
1354                NodeKind::HashLiteral { pairs } => {
1355                    // Check if current node is a key in any of the pairs
1356                    for (key, _value) in pairs {
1357                        if std::ptr::eq(key, current) {
1358                            return true;
1359                        }
1360                    }
1361                }
1362                NodeKind::ArrayLiteral { .. } => {
1363                    // Check grandparent
1364                    if i > 0 {
1365                        let grandparent = ancestors[i - 1];
1366                        if let NodeKind::Binary { op, right, .. } = &grandparent.kind {
1367                            if op == "{}" && std::ptr::eq(right.as_ref(), parent) {
1368                                return true;
1369                            }
1370                        }
1371                    }
1372                }
1373                // Handle IndirectCall which parser sometimes produces for $hash{key} in print statements
1374                NodeKind::IndirectCall { object, args, .. } => {
1375                    // Check if current is one of the arguments
1376                    for arg in args {
1377                        if std::ptr::eq(arg, current) {
1378                            // Check if object is a variable that looks like a hash
1379                            if let NodeKind::Variable { sigil, .. } = &object.kind {
1380                                if sigil == "$" {
1381                                    return true;
1382                                }
1383                            }
1384                        }
1385                    }
1386                }
1387                _ => {}
1388            }
1389
1390            current = parent;
1391        }
1392
1393        false
1394    }
1395
1396    /// Return one human-readable fix suggestion per issue.
1397    pub fn get_suggestions(&self, issues: &[ScopeIssue]) -> Vec<String> {
1398        issues
1399            .iter()
1400            .map(|issue| match issue.kind {
1401                IssueKind::VariableShadowing => {
1402                    format!("Consider rename '{}' to avoid shadowing", issue.variable_name)
1403                }
1404                IssueKind::UnusedVariable => {
1405                    format!(
1406                        "Remove unused variable '{}' or prefix with underscore",
1407                        issue.variable_name
1408                    )
1409                }
1410                IssueKind::UndeclaredVariable => {
1411                    format!("Declare '{}' with 'my', 'our', or 'local'", issue.variable_name)
1412                }
1413                IssueKind::VariableRedeclaration => {
1414                    format!("Remove duplicate declaration of '{}'", issue.variable_name)
1415                }
1416                IssueKind::DuplicateParameter => {
1417                    format!("Remove or rename duplicate parameter '{}'", issue.variable_name)
1418                }
1419                IssueKind::ParameterShadowsGlobal => {
1420                    format!("Rename parameter '{}' to avoid shadowing", issue.variable_name)
1421                }
1422                IssueKind::UnusedParameter => {
1423                    format!("Rename '{}' with underscore or add comment", issue.variable_name)
1424                }
1425                IssueKind::UnquotedBareword => {
1426                    format!("Quote bareword '{}' or declare as filehandle", issue.variable_name)
1427                }
1428                IssueKind::UninitializedVariable => {
1429                    format!("Initialize '{}' before use", issue.variable_name)
1430                }
1431                IssueKind::CaptureVarWithoutRegexMatch => {
1432                    format!(
1433                        "Perform a regex match (=~ /.../) before using capture variable '{}'",
1434                        issue.variable_name
1435                    )
1436                }
1437            })
1438            .collect()
1439    }
1440}
1441
1442fn collect_imported_barewords(ast: &Node) -> HashSet<String> {
1443    fn push_symbol(imported: &mut HashSet<String>, module: &str, token: &str) {
1444        let symbol = token.trim().trim_matches('\'').trim_matches('"').trim();
1445        if symbol.is_empty() || symbol == "," {
1446            return;
1447        }
1448
1449        if symbol.starts_with(':') {
1450            if let Some(expanded) = resolve_known_export_tag(module, symbol) {
1451                imported.extend(expanded.iter().map(|name| (*name).to_string()));
1452            }
1453            return;
1454        }
1455
1456        let is_bareword = symbol.bytes().all(|byte| byte.is_ascii_alphanumeric() || byte == b'_')
1457            && symbol
1458                .as_bytes()
1459                .first()
1460                .is_some_and(|first| first.is_ascii_alphabetic() || *first == b'_');
1461        if is_bareword {
1462            imported.insert(symbol.to_string());
1463        }
1464    }
1465
1466    fn require_module_name(node: &Node) -> Option<String> {
1467        let NodeKind::FunctionCall { name, args } = &node.kind else {
1468            return None;
1469        };
1470        if name != "require" {
1471            return None;
1472        }
1473        let first = args.first()?;
1474        match &first.kind {
1475            NodeKind::Identifier { name } => Some(name.clone()),
1476            NodeKind::String { value, .. } => {
1477                let cleaned = value.trim_matches('\'').trim_matches('"').trim();
1478                if cleaned.is_empty() {
1479                    return None;
1480                }
1481                Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
1482            }
1483            _ => None,
1484        }
1485    }
1486
1487    fn require_variable_name(node: &Node) -> Option<String> {
1488        let NodeKind::FunctionCall { name, args } = &node.kind else {
1489            return None;
1490        };
1491        if name != "require" {
1492            return None;
1493        }
1494        let first = args.first()?;
1495        let NodeKind::Variable { sigil, name } = &first.kind else {
1496            return None;
1497        };
1498        (sigil == "$" && !name.contains("::")).then(|| name.clone())
1499    }
1500
1501    fn maybe_record_manual_imports(
1502        node: &Node,
1503        required_modules: &HashSet<String>,
1504        imported: &mut HashSet<String>,
1505    ) {
1506        let NodeKind::MethodCall { object, method, args } = &node.kind else {
1507            return;
1508        };
1509        if method != "import" {
1510            return;
1511        }
1512        let NodeKind::Identifier { name: module } = &object.kind else {
1513            return;
1514        };
1515        if !required_modules.contains(module) {
1516            return;
1517        }
1518        for arg in args {
1519            match &arg.kind {
1520                NodeKind::String { value, .. } => push_symbol(imported, module, value),
1521                NodeKind::Identifier { name } => {
1522                    if name.starts_with("qw") {
1523                        let content = name
1524                            .trim_start_matches("qw")
1525                            .trim_start_matches(|c: char| "([{/<|!".contains(c))
1526                            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
1527                        for token in content.split_whitespace() {
1528                            push_symbol(imported, module, token);
1529                        }
1530                    } else {
1531                        push_symbol(imported, module, name);
1532                    }
1533                }
1534                NodeKind::ArrayLiteral { elements } => {
1535                    for el in elements {
1536                        if let NodeKind::String { value, .. } = &el.kind {
1537                            push_symbol(imported, module, value);
1538                        }
1539                    }
1540                }
1541                _ => {}
1542            }
1543        }
1544    }
1545
1546    fn maybe_record_dynamic_manual_imports(
1547        node: &Node,
1548        dynamic_require_vars: &HashSet<String>,
1549        imported: &mut HashSet<String>,
1550    ) {
1551        let NodeKind::MethodCall { object, method, args } = &node.kind else {
1552            return;
1553        };
1554        if method != "import" {
1555            return;
1556        }
1557        let NodeKind::Variable { sigil, name } = &object.kind else {
1558            return;
1559        };
1560        if sigil != "$" || !dynamic_require_vars.contains(name) {
1561            return;
1562        }
1563
1564        for arg in args {
1565            match &arg.kind {
1566                NodeKind::String { value, .. } => push_symbol(imported, "", value),
1567                NodeKind::Identifier { name } => {
1568                    if name.starts_with("qw") {
1569                        let content = name
1570                            .trim_start_matches("qw")
1571                            .trim_start_matches(|c: char| "([{/<|!".contains(c))
1572                            .trim_end_matches(|c: char| ")]}/|!>".contains(c));
1573                        for token in content.split_whitespace() {
1574                            push_symbol(imported, "", token);
1575                        }
1576                    } else {
1577                        push_symbol(imported, "", name);
1578                    }
1579                }
1580                NodeKind::ArrayLiteral { elements } => {
1581                    for el in elements {
1582                        if let NodeKind::String { value, .. } = &el.kind {
1583                            push_symbol(imported, "", value);
1584                        }
1585                    }
1586                }
1587                _ => {}
1588            }
1589        }
1590    }
1591
1592    /// Unwrap an `ExpressionStatement` node to its inner expression, or return
1593    /// the node itself if it is not an expression statement.
1594    fn inner_node(stmt: &Node) -> &Node {
1595        if let NodeKind::ExpressionStatement { expression } = &stmt.kind {
1596            expression.as_ref()
1597        } else {
1598            stmt
1599        }
1600    }
1601
1602    // `in_eval` — when true we are inside a runtime `eval { }` block and
1603    // `require` statements are no longer static; skip the require+import
1604    // suppression analysis for the current block.
1605    fn visit(node: &Node, imported: &mut HashSet<String>, in_eval: bool) {
1606        if let NodeKind::Use { module, args, .. } = &node.kind {
1607            for arg in args {
1608                if arg.starts_with("qw") {
1609                    let content = arg
1610                        .trim_start_matches("qw")
1611                        .trim_start_matches(|c: char| "([{/<|!".contains(c))
1612                        .trim_end_matches(|c: char| ")]}/|!>".contains(c));
1613                    for token in content.split_whitespace() {
1614                        push_symbol(imported, module, token);
1615                    }
1616                } else {
1617                    push_symbol(imported, module, arg);
1618                }
1619            }
1620        } else if !in_eval {
1621            if let NodeKind::Program { statements } | NodeKind::Block { statements } = &node.kind {
1622                let required_modules: HashSet<String> = statements
1623                    .iter()
1624                    .filter_map(|stmt| require_module_name(inner_node(stmt)))
1625                    .collect();
1626                let dynamic_require_vars: HashSet<String> = statements
1627                    .iter()
1628                    .filter_map(|stmt| require_variable_name(inner_node(stmt)))
1629                    .collect();
1630                if !required_modules.is_empty() || !dynamic_require_vars.is_empty() {
1631                    for stmt in statements {
1632                        let inner = inner_node(stmt);
1633                        maybe_record_manual_imports(inner, &required_modules, imported);
1634                        maybe_record_dynamic_manual_imports(inner, &dynamic_require_vars, imported);
1635                    }
1636                }
1637            }
1638        }
1639
1640        // Propagate eval context: children of an Eval block are runtime.
1641        let child_in_eval = in_eval || matches!(&node.kind, NodeKind::Eval { .. });
1642        for child in node.children() {
1643            visit(child, imported, child_in_eval);
1644        }
1645    }
1646
1647    let mut imported = HashSet::new();
1648    visit(ast, &mut imported, false);
1649    imported
1650}
1651
1652/// Returns true if `name` (without sigil) is a numbered capture variable.
1653///
1654/// Capture variables are `$1`, `$2`, ..., `$9`, `$10`, `$11`, etc.
1655/// `$0` is the program name and is NOT a capture variable.
1656#[inline]
1657pub(super) fn is_capture_variable(name: &str) -> bool {
1658    // Must be non-empty, all digits, and not "0" (which is $0 = program name)
1659    !name.is_empty() && name != "0" && name.as_bytes().iter().all(|c| c.is_ascii_digit())
1660}
1661
1662/// Check if a variable is a built-in Perl global variable
1663pub(super) fn is_builtin_global(sigil: &str, name: &str) -> bool {
1664    // Fast path: most user variables start with lowercase and are not built-ins
1665    // Exception: $a and $b are built-in sort variables
1666    if !name.is_empty() {
1667        let first = name.as_bytes()[0];
1668        if first.is_ascii_lowercase() {
1669            // Optimization: Combine length and byte check to avoid multiple comparisons
1670            if name.len() > 1 || (first != b'a' && first != b'b') {
1671                return false;
1672            }
1673        }
1674    }
1675
1676    let sigil_byte = match sigil.as_bytes().first() {
1677        Some(b) => *b,
1678        None => {
1679            return match name {
1680                // Filehandles (no sigil)
1681                "STDIN" | "STDOUT" | "STDERR" | "DATA" | "ARGVOUT" => true,
1682                _ => false,
1683            };
1684        }
1685    };
1686
1687    match sigil_byte {
1688        b'$' => match name {
1689            // Special variables
1690            "_" | "!" | "@" | "?" | "^" | "$" | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8"
1691            | "9" | "." | "," | "/" | "\\" | "\"" | ";" | "%" | "=" | "-" | "~" | "|" | "&"
1692            | "`" | "'" | "+" | "[" | "]" | "^A" | "^C" | "^D" | "^E" | "^F" | "^H" | "^I" | "^L"
1693            | "^M" | "^N" | "^O" | "^P" | "^R" | "^S" | "^T" | "^V" | "^W" | "^X" |
1694            // Common globals
1695            "ARGV" | "VERSION" | "AUTOLOAD" |
1696            // Sort variables
1697            "a" | "b" |
1698            // Error variables
1699            "EVAL_ERROR" | "ERRNO" | "EXTENDED_OS_ERROR" | "CHILD_ERROR" |
1700            "PROCESS_ID" | "PROGRAM_NAME" |
1701            // Perl version variables
1702            "PERL_VERSION" | "OLD_PERL_VERSION" |
1703            // Perl internal special values (perlguts/perlapi) — used in XS and introspection code
1704            "PL_sv_yes" | "PL_sv_no" | "PL_sv_undef" => true,
1705            _ => {
1706                // Check patterns
1707                // $^X (single-char) control variables — lexer produces name `^X`.
1708                // ${^NAME} (multi-char) control variables — lexer produces name `{^NAME}`.
1709                // Both should be treated as built-ins.
1710                //
1711                // Form 1: `^` followed by one or more ASCII uppercase letters or underscores.
1712                //   Examples: `^A`, `^W`, `^MATCH`, `^PREMATCH`, `^POSTMATCH`.
1713                // Form 2: `{^NAME}` — same but wrapped in braces by the lexer.
1714                //   Examples: `{^MATCH}`, `{^PREMATCH}`, `{^POSTMATCH}`.
1715                let caret_name = if let Some(inner) = name
1716                    .strip_prefix('{')
1717                    .and_then(|s| s.strip_suffix('}'))
1718                {
1719                    inner
1720                } else {
1721                    name
1722                };
1723                if let Some(rest) = caret_name.strip_prefix('^') {
1724                    if !rest.is_empty()
1725                        && rest
1726                            .as_bytes()
1727                            .iter()
1728                            .all(|c| c.is_ascii_uppercase() || *c == b'_')
1729                    {
1730                        return true;
1731                    }
1732                }
1733
1734                // Numbered capture variables ($1, $2, etc.)
1735                // Note: $0-$9 are already handled in the match above, but this covers $10+
1736                // Optimization: use byte check to avoid utf-8 decoding
1737                if !name.is_empty() && name.as_bytes().iter().all(|c| c.is_ascii_digit()) {
1738                    return true;
1739                }
1740
1741                false
1742            }
1743        },
1744        b'@' => matches!(name, "_" | "+" | "-" | "INC" | "ARGV" | "EXPORT" | "EXPORT_OK" | "ISA"),
1745        b'%' => matches!(name, "_" | "+" | "-" | "!" | "ENV" | "INC" | "SIG" | "EXPORT_TAGS"),
1746        _ => false,
1747    }
1748}
1749
1750/// Check if an identifier is a known Perl built-in function
1751pub(super) fn is_known_function(name: &str) -> bool {
1752    if name.is_empty() {
1753        return false;
1754    }
1755    if matches!(name, "PL_sv_yes" | "PL_sv_no" | "PL_sv_undef") {
1756        return true;
1757    }
1758    // Optimization: All known functions are lowercase or start with non-uppercase chars
1759    if name.as_bytes()[0].is_ascii_uppercase() {
1760        return false;
1761    }
1762
1763    match name {
1764        // I/O functions
1765        "print" | "printf" | "say" | "open" | "close" | "read" | "write" | "seek" | "tell"
1766        | "eof" | "fileno" | "binmode" | "sysopen" | "sysread" | "syswrite" | "sysclose"
1767        | "select" |
1768        // String functions
1769        "chomp" | "chop" | "chr" | "crypt" | "fc" | "hex" | "index" | "lc" | "lcfirst" | "length"
1770        | "oct" | "ord" | "pack" | "q" | "qq" | "qr" | "quotemeta" | "qw" | "qx" | "reverse"
1771        | "rindex" | "sprintf" | "substr" | "tr" | "uc" | "ucfirst" | "unpack" |
1772        // Array/List functions
1773        "pop" | "push" | "shift" | "unshift" | "splice" | "split" | "join" | "grep" | "map"
1774        | "sort" |
1775        // Hash functions
1776        "delete" | "each" | "exists" | "keys" | "values" |
1777        // Control flow
1778        "die" | "exit" | "return" | "goto" | "last" | "next" | "redo" | "continue" | "break"
1779        | "given" | "when" | "default" |
1780        // File test operators
1781        "stat" | "lstat" | "-r" | "-w" | "-x" | "-o" | "-R" | "-W" | "-X" | "-O" | "-e" | "-z"
1782        | "-s" | "-f" | "-d" | "-l" | "-p" | "-S" | "-b" | "-c" | "-t" | "-u" | "-g" | "-k"
1783        | "-T" | "-B" | "-M" | "-A" | "-C" |
1784        // System functions
1785        "system" | "exec" | "fork" | "wait" | "waitpid" | "kill" | "sleep" | "alarm"
1786        | "getpgrp" | "getppid" | "getpriority" | "setpgrp" | "setpriority" | "time" | "times"
1787        | "localtime" | "gmtime" |
1788        // Math functions
1789        "abs" | "atan2" | "cos" | "exp" | "int" | "log" | "rand" | "sin" | "sqrt" | "srand" |
1790        // Misc functions
1791        "defined" | "undef" | "ref" | "bless" | "tie" | "tied" | "untie" | "eval" | "caller"
1792        | "import" | "require" | "use" | "do" | "package" | "sub" | "my" | "our" | "local"
1793        | "state" | "scalar" | "wantarray" | "warn" => true,
1794        _ => false,
1795    }
1796}
1797
1798/// Builtins whose declaration-capable arguments are all consumed by the builtin itself.
1799///
1800/// Keep this list explicit and conservative. Only include builtins where the parser already
1801/// emits declaration nodes for the relevant argument, and where treating that declaration as
1802/// used avoids false diagnostics after the call.
1803///
1804/// Position semantics:
1805/// - Position 0: `open`, `opendir`, `sysopen`, `socket`, `accept`, `dbmopen`
1806/// - Position 1: `read`, `sysread`, `recv`, `shmread`
1807/// - Positions 0 and 1: `pipe`, `socketpair`
1808pub(super) fn builtin_declaration_arg_positions(name: &str) -> &'static [usize] {
1809    match name {
1810        // Position 0: the first argument is the new handle/socket
1811        "open" | "opendir" | "sysopen" | "socket" | "accept" | "dbmopen" => &[0],
1812        // Position 1: the second argument is the buffer (first is an existing handle)
1813        "read" | "sysread" | "recv" | "shmread" => &[1],
1814        // pipe: both first arguments are new handles
1815        "pipe" => &[0, 1],
1816        // socketpair: both first arguments are new sockets
1817        "socketpair" => &[0, 1],
1818        _ => &[],
1819    }
1820}
1821
1822/// Builtins that operate on `$_` by default when called with zero arguments.
1823///
1824/// When any of these is invoked as a bare call (no args), Perl implicitly reads
1825/// (and in some cases modifies) `$_`. Marking `$_` as used at call sites prevents
1826/// false "unused" or "uninitialized" diagnostics for lexically-scoped `my $_`.
1827pub(super) fn is_topic_defaulting_builtin(name: &str) -> bool {
1828    matches!(
1829        name,
1830        "chomp"
1831            | "chop"
1832            | "chr"
1833            | "hex"
1834            | "lc"
1835            | "lcfirst"
1836            | "length"
1837            | "oct"
1838            | "ord"
1839            | "uc"
1840            | "ucfirst"
1841            | "abs"
1842            | "int"
1843            | "log"
1844            | "sqrt"
1845            | "cos"
1846            | "sin"
1847            | "exp"
1848            | "print"
1849            | "say"
1850    )
1851}
1852
1853/// Topic-defaulting builtins that also modify `$_` when called without args.
1854pub(super) fn is_topic_modifying_builtin(name: &str) -> bool {
1855    matches!(name, "chomp" | "chop")
1856}
1857
1858fn is_explicit_scalar_reference_deref(source: &str) -> bool {
1859    source.starts_with("@$")
1860        || source.starts_with("%$")
1861        || source.starts_with("$$")
1862        || source.starts_with("@{$")
1863        || source.starts_with("%{$")
1864        || source.starts_with("${$")
1865}
1866
1867fn normalize_scalar_deref_base_name(name: &str) -> &str {
1868    let unwrapped =
1869        name.strip_prefix('{').and_then(|inner| inner.strip_suffix('}')).unwrap_or(name);
1870
1871    unwrapped.strip_prefix('$').unwrap_or(unwrapped)
1872}
1873
1874/// Check if an identifier is a known filehandle
1875#[allow(dead_code)]
1876fn is_filehandle(name: &str) -> bool {
1877    match name {
1878        "STDIN" | "STDOUT" | "STDERR" | "ARGV" | "ARGVOUT" | "DATA" | "STDHANDLE"
1879        | "__PACKAGE__" | "__FILE__" | "__LINE__" | "__SUB__" | "__END__" | "__DATA__" => true,
1880        _ => {
1881            // Check if it's all uppercase (common convention for filehandles)
1882            name.chars().all(|c| c.is_ascii_uppercase() || c == '_') && !name.is_empty()
1883        }
1884    }
1885}