Skip to main content

perl_semantic_analyzer/analysis/
scope_analyzer.rs

1//! Scope analysis and variable tracking for Perl parsing workflows
2//!
3//! This module provides comprehensive scope analysis for Perl scripts, tracking
4//! variable declarations, usage patterns, and potential issues across different
5//! scopes within the LSP workflow stages.
6//!
7//! # LSP Workflow Integration
8//!
9//! Scope analysis supports semantic validation across LSP workflow stages:
10//! - **Parse**: Identify declarations and scopes during syntax analysis
11//! - **Index**: Provide scope metadata for symbol indexing
12//! - **Navigate**: Resolve references with scope-aware lookups
13//! - **Complete**: Filter completion items based on visible bindings
14//! - **Analyze**: Report unused, shadowed, and undeclared variables
15//!
16//! # Performance
17//!
18//! - **Time complexity**: O(n) over AST nodes with scoped hash lookups
19//! - **Space complexity**: O(n) for scope tables and variable maps (memory bounded)
20//! - **Optimizations**: Fast sigil indexing to keep performance stable
21//! - **Benchmarks**: Typically <5ms for mid-sized files, low ms for large files
22//! - **Large file scaling**: Designed to scale across large file sets in workspaces
23//!
24//! # Usage Examples
25//!
26//! ```rust,ignore
27//! use perl_parser::scope_analyzer::{ScopeAnalyzer, IssueKind};
28//! use perl_parser::{Parser, ast::Node};
29//!
30//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
31//! // Analyze Perl script for scope issues
32//! let script = "my $var = 42; sub hello { print $var; }";
33//! let mut parser = Parser::new(script);
34//! let ast = parser.parse()?;
35//!
36//! let analyzer = ScopeAnalyzer::new();
37//! let pragma_map = vec![];
38//! let issues = analyzer.analyze(&ast, script, &pragma_map);
39//!
40//! // Check for common scope issues in Perl parsing code
41//! for issue in &issues {
42//!     match issue.kind {
43//!         IssueKind::UnusedVariable => println!("Unused variable: {}", issue.variable_name),
44//!         IssueKind::VariableShadowing => println!("Variable shadowing: {}", issue.variable_name),
45//!         _ => {}
46//!     }
47//! }
48//! # Ok(())
49//! # }
50//! ```
51
52use crate::ast::{Node, NodeKind};
53use crate::pragma_tracker::{PragmaState, PragmaTracker};
54use rustc_hash::FxHashMap;
55use std::cell::RefCell;
56use std::ops::Range;
57use std::rc::Rc;
58
59#[derive(Debug, Clone, Copy, PartialEq)]
60pub enum IssueKind {
61    VariableShadowing,
62    UnusedVariable,
63    UndeclaredVariable,
64    VariableRedeclaration,
65    DuplicateParameter,
66    ParameterShadowsGlobal,
67    UnusedParameter,
68    UnquotedBareword,
69    UninitializedVariable,
70}
71
72#[derive(Debug, Clone)]
73pub struct ScopeIssue {
74    pub kind: IssueKind,
75    pub variable_name: String,
76    pub line: usize,
77    pub range: (usize, usize),
78    pub description: String,
79}
80
81#[derive(Debug)]
82struct Variable {
83    declaration_offset: usize,
84    is_used: RefCell<bool>,
85    is_our: bool,
86    is_initialized: RefCell<bool>,
87}
88
89/// Convert a Perl sigil to an array index for fast variable lookup.
90///
91/// Sigil indices:
92/// - `$` (scalar): 0
93/// - `@` (array): 1
94/// - `%` (hash): 2
95/// - `&` (subroutine): 3
96/// - `*` (glob): 4
97/// - Other: 5 (fallback)
98#[inline]
99fn sigil_to_index(sigil: &str) -> usize {
100    // Use first byte for fast comparison - sigils are always single ASCII chars
101    match sigil.as_bytes().first() {
102        Some(b'$') => 0,
103        Some(b'@') => 1,
104        Some(b'%') => 2,
105        Some(b'&') => 3,
106        Some(b'*') => 4,
107        _ => 5,
108    }
109}
110
111/// Convert an array index back to a Perl sigil.
112#[inline]
113fn index_to_sigil(index: usize) -> &'static str {
114    match index {
115        0 => "$",
116        1 => "@",
117        2 => "%",
118        3 => "&",
119        4 => "*",
120        _ => "",
121    }
122}
123
124#[derive(Debug)]
125struct Scope {
126    // Outer key: sigil index, Inner key: name
127    variables: RefCell<[Option<FxHashMap<String, Rc<Variable>>>; 6]>,
128    parent: Option<Rc<Scope>>,
129}
130
131impl Scope {
132    fn new() -> Self {
133        let vars = std::array::from_fn(|_| None);
134        Self { variables: RefCell::new(vars), parent: None }
135    }
136
137    fn with_parent(parent: Rc<Scope>) -> Self {
138        let vars = std::array::from_fn(|_| None);
139        Self { variables: RefCell::new(vars), parent: Some(parent) }
140    }
141
142    fn declare_variable_parts(
143        &self,
144        sigil: &str,
145        name: &str,
146        offset: usize,
147        is_our: bool,
148        is_initialized: bool,
149    ) -> Option<IssueKind> {
150        let idx = sigil_to_index(sigil);
151
152        // First check if already declared in this scope
153        {
154            let vars = self.variables.borrow();
155            if let Some(map) = &vars[idx] {
156                if map.contains_key(name) {
157                    return Some(IssueKind::VariableRedeclaration);
158                }
159            }
160        }
161
162        // Check if it shadows a parent scope variable
163        let shadows = if let Some(ref parent) = self.parent {
164            parent.has_variable_parts(sigil, name)
165        } else {
166            false
167        };
168
169        // Now insert the variable
170        let mut vars = self.variables.borrow_mut();
171        let inner = vars[idx].get_or_insert_with(FxHashMap::default);
172
173        inner.insert(
174            name.to_string(),
175            Rc::new(Variable {
176                declaration_offset: offset,
177                is_used: RefCell::new(is_our), // 'our' variables are considered used
178                is_our,
179                is_initialized: RefCell::new(is_initialized),
180            }),
181        );
182
183        if shadows { Some(IssueKind::VariableShadowing) } else { None }
184    }
185
186    fn has_variable_parts(&self, sigil: &str, name: &str) -> bool {
187        let idx = sigil_to_index(sigil);
188        let mut current_scope = self;
189
190        loop {
191            {
192                let vars = current_scope.variables.borrow();
193                if let Some(map) = &vars[idx] {
194                    if map.contains_key(name) {
195                        return true;
196                    }
197                }
198            }
199            if let Some(ref parent) = current_scope.parent {
200                current_scope = parent;
201            } else {
202                return false;
203            }
204        }
205    }
206
207    fn use_variable_parts(&self, sigil: &str, name: &str) -> (bool, bool) {
208        let idx = sigil_to_index(sigil);
209        let mut current_scope = self;
210
211        loop {
212            {
213                let vars = current_scope.variables.borrow();
214                if let Some(map) = &vars[idx] {
215                    if let Some(var) = map.get(name) {
216                        *var.is_used.borrow_mut() = true;
217                        return (true, *var.is_initialized.borrow());
218                    }
219                }
220            }
221
222            if let Some(ref parent) = current_scope.parent {
223                current_scope = parent;
224            } else {
225                return (false, false);
226            }
227        }
228    }
229
230    fn initialize_variable_parts(&self, sigil: &str, name: &str) {
231        let idx = sigil_to_index(sigil);
232        let mut current_scope = self;
233
234        loop {
235            {
236                let vars = current_scope.variables.borrow();
237                if let Some(map) = &vars[idx] {
238                    if let Some(var) = map.get(name) {
239                        *var.is_initialized.borrow_mut() = true;
240                        return;
241                    }
242                }
243            }
244
245            if let Some(ref parent) = current_scope.parent {
246                current_scope = parent;
247            } else {
248                return;
249            }
250        }
251    }
252
253    /// Optimized method to mark a variable as initialized AND used in one lookup.
254    /// Returns true if the variable was found and updated.
255    fn initialize_and_use_variable_parts(&self, sigil: &str, name: &str) -> bool {
256        let idx = sigil_to_index(sigil);
257        let mut current_scope = self;
258
259        loop {
260            {
261                let vars = current_scope.variables.borrow();
262                if let Some(map) = &vars[idx] {
263                    if let Some(var) = map.get(name) {
264                        *var.is_used.borrow_mut() = true;
265                        *var.is_initialized.borrow_mut() = true;
266                        return true;
267                    }
268                }
269            }
270
271            if let Some(ref parent) = current_scope.parent {
272                current_scope = parent;
273            } else {
274                return false;
275            }
276        }
277    }
278
279    /// Iterate over unused variables that should be reported as diagnostics.
280    /// Filters out underscore-prefixed variables (intentionally unused) before allocation.
281    fn for_each_reportable_unused_variable<F>(&self, mut f: F)
282    where
283        F: FnMut(String, usize),
284    {
285        for (idx, inner_opt) in self.variables.borrow().iter().enumerate() {
286            if let Some(inner) = inner_opt {
287                for (name, var) in inner {
288                    if !*var.is_used.borrow() && !var.is_our {
289                        // Optimization: Check for underscore prefix before allocation
290                        if name.starts_with('_') {
291                            continue;
292                        }
293                        let full_name = format!("{}{}", index_to_sigil(idx), name);
294                        f(full_name, var.declaration_offset);
295                    }
296                }
297            }
298        }
299    }
300}
301
302/// Helper to split a full variable name into sigil and name parts.
303fn split_variable_name(full_name: &str) -> (&str, &str) {
304    if !full_name.is_empty() {
305        let c = full_name.as_bytes()[0];
306        if c == b'$' || c == b'@' || c == b'%' || c == b'&' || c == b'*' {
307            return (&full_name[0..1], &full_name[1..]);
308        }
309    }
310    ("", full_name)
311}
312
313fn is_interpolated_var_start(byte: u8) -> bool {
314    byte.is_ascii_alphabetic() || byte == b'_'
315}
316
317fn is_interpolated_var_continue(byte: u8) -> bool {
318    byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
319}
320
321fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
322    if index == 0 {
323        return false;
324    }
325
326    let mut backslashes = 0usize;
327    let mut cursor = index;
328    while cursor > 0 && bytes[cursor - 1] == b'\\' {
329        backslashes += 1;
330        cursor -= 1;
331    }
332
333    backslashes % 2 == 1
334}
335
336enum ExtractedName<'a> {
337    Parts(&'a str, &'a str),
338    Full(String),
339}
340
341struct AnalysisContext<'a> {
342    code: &'a str,
343    pragma_map: &'a [(Range<usize>, PragmaState)],
344    line_starts: RefCell<Option<Vec<usize>>>,
345}
346
347impl<'a> AnalysisContext<'a> {
348    fn new(code: &'a str, pragma_map: &'a [(Range<usize>, PragmaState)]) -> Self {
349        Self { code, pragma_map, line_starts: RefCell::new(None) }
350    }
351
352    fn get_line(&self, offset: usize) -> usize {
353        let mut line_starts_guard = self.line_starts.borrow_mut();
354        let starts = line_starts_guard.get_or_insert_with(|| {
355            let mut indices = Vec::with_capacity(self.code.len() / 40); // Estimate
356            indices.push(0);
357            for (i, b) in self.code.bytes().enumerate() {
358                if b == b'\n' {
359                    indices.push(i + 1);
360                }
361            }
362            indices
363        });
364
365        // Find the line that contains the offset
366        match starts.binary_search(&offset) {
367            Ok(idx) => idx + 1,
368            Err(idx) => idx,
369        }
370    }
371}
372
373impl<'a> ExtractedName<'a> {
374    fn as_string(&self) -> String {
375        match self {
376            ExtractedName::Parts(sigil, name) => format!("{}{}", sigil, name),
377            ExtractedName::Full(s) => s.clone(),
378        }
379    }
380
381    fn parts(&self) -> (&str, &str) {
382        match self {
383            ExtractedName::Parts(sigil, name) => (sigil, name),
384            ExtractedName::Full(s) => split_variable_name(s),
385        }
386    }
387
388    fn is_empty(&self) -> bool {
389        match self {
390            ExtractedName::Parts(sigil, name) => sigil.is_empty() && name.is_empty(),
391            ExtractedName::Full(s) => s.is_empty(),
392        }
393    }
394}
395
396pub struct ScopeAnalyzer;
397
398impl Default for ScopeAnalyzer {
399    fn default() -> Self {
400        Self::new()
401    }
402}
403
404impl ScopeAnalyzer {
405    pub fn new() -> Self {
406        Self
407    }
408
409    pub fn analyze(
410        &self,
411        ast: &Node,
412        code: &str,
413        pragma_map: &[(Range<usize>, PragmaState)],
414    ) -> Vec<ScopeIssue> {
415        let mut issues = Vec::new();
416        let root_scope = Rc::new(Scope::new());
417
418        // Use a vector as a stack for ancestors to avoid O(N) HashMap allocation
419        let mut ancestors: Vec<&Node> = Vec::new();
420
421        let context = AnalysisContext::new(code, pragma_map);
422
423        self.analyze_node(ast, &root_scope, &mut ancestors, &mut issues, &context);
424
425        // Collect all unused variables from all scopes
426        self.collect_unused_variables(&root_scope, &mut issues, &context);
427
428        issues
429    }
430
431    fn analyze_node<'a>(
432        &self,
433        node: &'a Node,
434        scope: &Rc<Scope>,
435        ancestors: &mut Vec<&'a Node>,
436        issues: &mut Vec<ScopeIssue>,
437        context: &AnalysisContext<'a>,
438    ) {
439        // Get effective pragma state at this node's location
440        let pragma_state = PragmaTracker::state_for_offset(context.pragma_map, node.location.start);
441        let strict_mode = pragma_state.strict_subs;
442        match &node.kind {
443            NodeKind::VariableDeclaration { declarator, variable, initializer, .. } => {
444                let extracted = self.extract_variable_name(variable);
445                let (sigil, var_name_part) = extracted.parts();
446
447                let is_our = declarator == "our";
448                let is_initialized = initializer.is_some();
449
450                // If checking initializer first (e.g. my $x = $x), we need to analyze initializer in
451                // current scope BEFORE declaring the variable (standard Perl behavior)
452                // Actually Perl evaluates RHS before LHS assignment, so usages in initializer refer to OUTER scope.
453                // So we analyze initializer first.
454                if let Some(init) = initializer {
455                    self.analyze_node(init, scope, ancestors, issues, context);
456                }
457
458                if let Some(issue_kind) = scope.declare_variable_parts(
459                    sigil,
460                    var_name_part,
461                    variable.location.start,
462                    is_our,
463                    is_initialized,
464                ) {
465                    let line = context.get_line(variable.location.start);
466                    // Optimization: Only allocate full name string when we actually have an issue to report
467                    let full_name = extracted.as_string();
468                    // Build description first (borrows full_name), then move full_name into struct
469                    let description = match issue_kind {
470                        IssueKind::VariableShadowing => {
471                            format!("Variable '{}' shadows a variable in outer scope", full_name)
472                        }
473                        IssueKind::VariableRedeclaration => {
474                            format!("Variable '{}' is already declared in this scope", full_name)
475                        }
476                        _ => String::new(),
477                    };
478                    issues.push(ScopeIssue {
479                        kind: issue_kind,
480                        variable_name: full_name,
481                        line,
482                        range: (variable.location.start, variable.location.end),
483                        description,
484                    });
485                }
486            }
487
488            NodeKind::VariableListDeclaration { declarator, variables, initializer, .. } => {
489                let is_our = declarator == "our";
490                let is_initialized = initializer.is_some();
491
492                // Analyze initializer first
493                if let Some(init) = initializer {
494                    self.analyze_node(init, scope, ancestors, issues, context);
495                }
496
497                for variable in variables {
498                    let extracted = self.extract_variable_name(variable);
499                    let (sigil, var_name_part) = extracted.parts();
500
501                    if let Some(issue_kind) = scope.declare_variable_parts(
502                        sigil,
503                        var_name_part,
504                        variable.location.start,
505                        is_our,
506                        is_initialized,
507                    ) {
508                        let line = context.get_line(variable.location.start);
509                        // Optimization: Only allocate full name string when we actually have an issue to report
510                        let full_name = extracted.as_string();
511                        // Build description first (borrows full_name), then move full_name into struct
512                        let description = match issue_kind {
513                            IssueKind::VariableShadowing => {
514                                format!(
515                                    "Variable '{}' shadows a variable in outer scope",
516                                    full_name
517                                )
518                            }
519                            IssueKind::VariableRedeclaration => {
520                                format!(
521                                    "Variable '{}' is already declared in this scope",
522                                    full_name
523                                )
524                            }
525                            _ => String::new(),
526                        };
527                        issues.push(ScopeIssue {
528                            kind: issue_kind,
529                            variable_name: full_name,
530                            line,
531                            range: (variable.location.start, variable.location.end),
532                            description,
533                        });
534                    }
535                }
536            }
537
538            NodeKind::Use { module, args, .. } => {
539                // Handle 'use vars' pragma for global variable declarations
540                if module == "vars" {
541                    for arg in args {
542                        // Parse qw() style arguments to extract individual variable names
543                        if arg.starts_with("qw(") && arg.ends_with(")") {
544                            let content = &arg[3..arg.len() - 1]; // Remove qw( and )
545                            for var_name in content.split_whitespace() {
546                                if !var_name.is_empty() {
547                                    let (sigil, name) = split_variable_name(var_name);
548                                    if !sigil.is_empty() {
549                                        // Declare these variables as globals in the current scope
550                                        scope.declare_variable_parts(
551                                            sigil,
552                                            name,
553                                            node.location.start,
554                                            true,
555                                            true,
556                                        ); // true = is_our (global), true = initialized (assumed)
557                                    }
558                                }
559                            }
560                        } else {
561                            // Handle regular variable names (not in qw())
562                            let var_name = arg.trim();
563                            if !var_name.is_empty() {
564                                let (sigil, name) = split_variable_name(var_name);
565                                if !sigil.is_empty() {
566                                    scope.declare_variable_parts(
567                                        sigil,
568                                        name,
569                                        node.location.start,
570                                        true,
571                                        true,
572                                    );
573                                }
574                            }
575                        }
576                    }
577                }
578            }
579            NodeKind::Variable { sigil, name } => {
580                // Skip built-in global variables
581                // Optimization: Check built-ins first to avoid string scan for "::" on common globals
582                if is_builtin_global(sigil, name) {
583                    return;
584                }
585
586                // Skip package-qualified variables
587                if name.contains("::") {
588                    return;
589                }
590
591                // Try to use the variable - allocation free!
592                let (mut variable_used, mut is_initialized) = scope.use_variable_parts(sigil, name);
593
594                // If not found as simple variable, check if this is part of a hash/array access pattern
595                if !variable_used && (sigil == "$" || sigil == "@") {
596                    // Check parent for hash/array access context
597                    if let Some(parent) = ancestors.last() {
598                        if let NodeKind::Binary { op, left, .. } = &parent.kind {
599                            // Only check if this node is the LEFT side of the access
600                            if std::ptr::eq(left.as_ref(), node) {
601                                if op == "{}" || op == "->{}" {
602                                    // Check if the corresponding hash exists
603                                    let (hash_used, hash_init) =
604                                        scope.use_variable_parts("%", name);
605                                    if hash_used {
606                                        variable_used = true;
607                                        is_initialized = hash_init;
608                                    }
609                                } else if op == "[]" || op == "->[]" {
610                                    // Check if the corresponding array exists
611                                    let (array_used, array_init) =
612                                        scope.use_variable_parts("@", name);
613                                    if array_used {
614                                        variable_used = true;
615                                        is_initialized = array_init;
616                                    }
617                                }
618                            }
619                        }
620                    }
621                }
622
623                // Variable not found - check if we should report it
624                if !variable_used {
625                    if strict_mode {
626                        let full_name = format!("{}{}", sigil, name);
627                        issues.push(ScopeIssue {
628                            kind: IssueKind::UndeclaredVariable,
629                            variable_name: full_name.clone(),
630                            line: context.get_line(node.location.start),
631                            range: (node.location.start, node.location.end),
632                            description: format!(
633                                "Variable '{}' is used but not declared",
634                                full_name
635                            ),
636                        });
637                    }
638                } else if !is_initialized {
639                    // Variable found but used before initialization
640                    let full_name = format!("{}{}", sigil, name);
641                    issues.push(ScopeIssue {
642                        kind: IssueKind::UninitializedVariable,
643                        variable_name: full_name.clone(),
644                        line: context.get_line(node.location.start),
645                        range: (node.location.start, node.location.end),
646                        description: format!(
647                            "Variable '{}' is used before being initialized",
648                            full_name
649                        ),
650                    });
651                }
652            }
653            NodeKind::String { value, interpolated } => {
654                if *interpolated
655                    || value.starts_with('"')
656                    || value.starts_with('`')
657                    || value.starts_with("qq")
658                    || value.starts_with("qx")
659                {
660                    self.mark_interpolated_variables_used(value, scope);
661                }
662            }
663            NodeKind::Heredoc { content, interpolated, .. } => {
664                if *interpolated {
665                    self.mark_interpolated_variables_used(content, scope);
666                }
667            }
668            NodeKind::Assignment { lhs, rhs, op: _ } => {
669                // Handle assignment: LHS variable becomes initialized
670                // First analyze RHS (usages)
671                self.analyze_node(rhs, scope, ancestors, issues, context);
672
673                // Optimization: Handle simple scalar assignment directly to avoid double lookup
674                // (mark_initialized + analyze_node both perform lookups)
675                if let NodeKind::Variable { sigil, name } = &lhs.kind {
676                    if !name.contains("::") && !is_builtin_global(sigil, name) {
677                        if scope.initialize_and_use_variable_parts(sigil, name) {
678                            return;
679                        }
680                    }
681                }
682
683                // Then analyze LHS
684                // We need to recursively mark variables as initialized in the LHS structure
685                // This handles scalars ($x = 1) and lists (($x, $y) = (1, 2))
686                self.mark_initialized(lhs, scope);
687
688                // Recurse into LHS to trigger UndeclaredVariable checks
689                // Note: 'use_variable' marks as used, which is technically correct for assignment too (write usage)
690                self.analyze_node(lhs, scope, ancestors, issues, context);
691            }
692
693            NodeKind::Tie { variable, package, args } => {
694                ancestors.push(node);
695                // Analyze arguments first
696                self.analyze_node(package, scope, ancestors, issues, context);
697                for arg in args {
698                    self.analyze_node(arg, scope, ancestors, issues, context);
699                }
700
701                if let NodeKind::VariableDeclaration { .. } = variable.kind {
702                    // Must analyze declaration FIRST to declare it, then mark initialized
703                    self.analyze_node(variable, scope, ancestors, issues, context);
704                    self.mark_initialized(variable, scope);
705                } else {
706                    // For existing variables, mark initialized then analyze (usage)
707                    self.mark_initialized(variable, scope);
708                    self.analyze_node(variable, scope, ancestors, issues, context);
709                }
710
711                ancestors.pop();
712            }
713
714            NodeKind::Untie { variable } => {
715                ancestors.push(node);
716                self.analyze_node(variable, scope, ancestors, issues, context);
717                ancestors.pop();
718            }
719
720            NodeKind::Identifier { name } => {
721                // Check for barewords under strict mode, excluding hash keys
722                // Hybrid check: Fast path for immediate hash keys (depth 1), then known functions, then deep check
723                if strict_mode
724                    && !self.is_in_hash_key_context(node, ancestors, 1)
725                    && !is_known_function(name)
726                    && !self.is_in_hash_key_context(node, ancestors, 10)
727                {
728                    issues.push(ScopeIssue {
729                        kind: IssueKind::UnquotedBareword,
730                        variable_name: name.clone(),
731                        line: context.get_line(node.location.start),
732                        range: (node.location.start, node.location.end),
733                        description: format!("Bareword '{}' not allowed under 'use strict'", name),
734                    });
735                }
736            }
737
738            NodeKind::Binary { op: _, left, right } => {
739                // All binary operations (including {} and [])
740                // We don't need special handling for {} and [] here because NodeKind::Variable
741                // will handle the context-sensitive lookup (checking ancestors).
742                ancestors.push(node);
743                self.analyze_node(left, scope, ancestors, issues, context);
744                self.analyze_node(right, scope, ancestors, issues, context);
745                ancestors.pop();
746            }
747
748            NodeKind::ArrayLiteral { elements } => {
749                ancestors.push(node);
750                for element in elements {
751                    self.analyze_node(element, scope, ancestors, issues, context);
752                }
753                ancestors.pop();
754            }
755
756            NodeKind::Block { statements } => {
757                let block_scope = Rc::new(Scope::with_parent(scope.clone()));
758                ancestors.push(node);
759                for stmt in statements {
760                    self.analyze_node(stmt, &block_scope, ancestors, issues, context);
761                }
762                ancestors.pop();
763                self.collect_unused_variables(&block_scope, issues, context);
764            }
765
766            NodeKind::For { init, condition, update, body, .. } => {
767                let loop_scope = Rc::new(Scope::with_parent(scope.clone()));
768
769                ancestors.push(node);
770
771                if let Some(init_node) = init {
772                    self.analyze_node(init_node, &loop_scope, ancestors, issues, context);
773                }
774                if let Some(cond) = condition {
775                    self.analyze_node(cond, &loop_scope, ancestors, issues, context);
776                }
777                if let Some(upd) = update {
778                    self.analyze_node(upd, &loop_scope, ancestors, issues, context);
779                }
780                self.analyze_node(body, &loop_scope, ancestors, issues, context);
781
782                ancestors.pop();
783
784                self.collect_unused_variables(&loop_scope, issues, context);
785            }
786
787            NodeKind::Foreach { variable, list, body, continue_block } => {
788                let loop_scope = Rc::new(Scope::with_parent(scope.clone()));
789
790                ancestors.push(node);
791
792                // Declare the loop variable
793                self.analyze_node(variable, &loop_scope, ancestors, issues, context);
794                self.analyze_node(list, &loop_scope, ancestors, issues, context);
795                self.analyze_node(body, &loop_scope, ancestors, issues, context);
796                if let Some(cb) = continue_block {
797                    self.analyze_node(cb, &loop_scope, ancestors, issues, context);
798                }
799
800                ancestors.pop();
801
802                self.collect_unused_variables(&loop_scope, issues, context);
803            }
804
805            NodeKind::Subroutine { signature, body, .. } => {
806                let sub_scope = Rc::new(Scope::with_parent(scope.clone()));
807
808                // Check for duplicate parameters and shadowing
809                let mut param_names = std::collections::HashSet::new();
810
811                // Extract parameters from signature if present
812                // Optimization: Use slice to avoid cloning the parameters vector (deep copy of AST nodes)
813                let params_to_check: &[Node] = if let Some(sig) = signature {
814                    match &sig.kind {
815                        NodeKind::Signature { parameters } => parameters.as_slice(),
816                        _ => &[],
817                    }
818                } else {
819                    &[]
820                };
821
822                for param in params_to_check {
823                    let extracted = self.extract_variable_name(param);
824                    if !extracted.is_empty() {
825                        let full_name = extracted.as_string();
826                        let (sigil, name) = extracted.parts();
827
828                        // Check for duplicate parameters
829                        if !param_names.insert(full_name.clone()) {
830                            issues.push(ScopeIssue {
831                                kind: IssueKind::DuplicateParameter,
832                                variable_name: full_name.clone(),
833                                line: context.get_line(param.location.start),
834                                range: (param.location.start, param.location.end),
835                                description: format!(
836                                    "Duplicate parameter '{}' in subroutine signature",
837                                    full_name
838                                ),
839                            });
840                        }
841
842                        // Check if parameter shadows a global or parent scope variable
843                        if scope.has_variable_parts(sigil, name) {
844                            issues.push(ScopeIssue {
845                                kind: IssueKind::ParameterShadowsGlobal,
846                                variable_name: full_name.clone(),
847                                line: context.get_line(param.location.start),
848                                range: (param.location.start, param.location.end),
849                                description: format!(
850                                    "Parameter '{}' shadows a variable from outer scope",
851                                    full_name
852                                ),
853                            });
854                        }
855
856                        // Declare the parameter in subroutine scope
857                        sub_scope.declare_variable_parts(
858                            sigil,
859                            name,
860                            param.location.start,
861                            false,
862                            true,
863                        ); // Parameters are initialized
864                        // Don't mark parameters as automatically used yet - track their actual usage
865                    }
866                }
867
868                ancestors.push(node);
869                self.analyze_node(body, &sub_scope, ancestors, issues, context);
870                ancestors.pop();
871
872                // Check for unused parameters
873                if let Some(sig) = signature {
874                    if let NodeKind::Signature { parameters } = &sig.kind {
875                        for param in parameters {
876                            let extracted = self.extract_variable_name(param);
877                            if !extracted.is_empty() {
878                                let (sigil, name) = extracted.parts();
879                                let full_name = extracted.as_string();
880
881                                // Skip parameters starting with underscore (intentionally unused)
882                                if name.starts_with('_') {
883                                    continue;
884                                }
885
886                                // Optimization: Access variable directly from current scope to avoid Rc clone
887                                let idx = sigil_to_index(sigil);
888                                let vars = sub_scope.variables.borrow();
889                                if let Some(map) = vars[idx].as_ref() {
890                                    if let Some(var) = map.get(name) {
891                                        if !*var.is_used.borrow() {
892                                            issues.push(ScopeIssue {
893                                                kind: IssueKind::UnusedParameter,
894                                                variable_name: full_name.clone(),
895                                                line: context.get_line(param.location.start),
896                                                range: (param.location.start, param.location.end),
897                                                description: format!(
898                                                    "Parameter '{}' is declared but never used",
899                                                    full_name
900                                                ),
901                                            });
902                                            // Mark as used to prevent double reporting
903                                            *var.is_used.borrow_mut() = true;
904                                        }
905                                    }
906                                }
907                            }
908                        }
909                    }
910                }
911
912                self.collect_unused_variables(&sub_scope, issues, context);
913            }
914
915            NodeKind::FunctionCall { args, .. } => {
916                // Handle function arguments, which may contain complex variable patterns
917                ancestors.push(node);
918                for arg in args {
919                    self.analyze_node(arg, scope, ancestors, issues, context);
920                }
921                ancestors.pop();
922            }
923
924            _ => {
925                // Recursively analyze children
926                ancestors.push(node);
927                for child in node.children() {
928                    self.analyze_node(child, scope, ancestors, issues, context);
929                }
930                ancestors.pop();
931            }
932        }
933    }
934
935    /// Marks variables as initialized when they appear on the left-hand side of an assignment.
936    /// Handles scalar variables, list assignments like `($x, $y) = ...`, and nested structures.
937    fn mark_initialized(&self, node: &Node, scope: &Rc<Scope>) {
938        match &node.kind {
939            NodeKind::Variable { sigil, name } => {
940                if !name.contains("::") {
941                    scope.initialize_variable_parts(sigil, name);
942                }
943            }
944            // For all other node types (parens, lists, etc.), recurse into children
945            // to find any nested variables that should be marked as initialized
946            _ => {
947                for child in node.children() {
948                    self.mark_initialized(child, scope);
949                }
950            }
951        }
952    }
953
954    fn mark_interpolated_variables_used(&self, content: &str, scope: &Rc<Scope>) {
955        let bytes = content.as_bytes();
956        let mut index = 0;
957
958        while index < bytes.len() {
959            let sigil = match bytes[index] {
960                b'$' => "$",
961                b'@' => "@",
962                _ => {
963                    index += 1;
964                    continue;
965                }
966            };
967
968            if has_escaped_interpolation_marker(bytes, index) {
969                index += 1;
970                continue;
971            }
972
973            if index + 1 >= bytes.len() {
974                break;
975            }
976
977            let (start, requires_closing_brace) =
978                if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
979
980            if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
981                index += 1;
982                continue;
983            }
984
985            let mut end = start + 1;
986            while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
987                end += 1;
988            }
989
990            if requires_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
991                index += 1;
992                continue;
993            }
994
995            if let Some(name) = content.get(start..end) {
996                if !name.contains("::") {
997                    let _ = scope.use_variable_parts(sigil, name);
998                }
999            }
1000
1001            index = if requires_closing_brace { end + 1 } else { end };
1002        }
1003    }
1004
1005    fn collect_unused_variables(
1006        &self,
1007        scope: &Rc<Scope>,
1008        issues: &mut Vec<ScopeIssue>,
1009        context: &AnalysisContext<'_>,
1010    ) {
1011        scope.for_each_reportable_unused_variable(|var_name, offset| {
1012            let start = offset.min(context.code.len());
1013            let end = (start + var_name.len()).min(context.code.len());
1014
1015            // Optimization: Generate description using the string reference before moving it
1016            let description = format!("Variable '{}' is declared but never used", var_name);
1017
1018            issues.push(ScopeIssue {
1019                kind: IssueKind::UnusedVariable,
1020                variable_name: var_name, // Move: Avoids cloning the string
1021                line: context.get_line(offset),
1022                range: (start, end),
1023                description,
1024            });
1025        });
1026    }
1027
1028    fn extract_variable_name<'a>(&self, node: &'a Node) -> ExtractedName<'a> {
1029        match &node.kind {
1030            NodeKind::Variable { sigil, name } => ExtractedName::Parts(sigil, name),
1031            NodeKind::MandatoryParameter { variable }
1032            | NodeKind::OptionalParameter { variable, .. }
1033            | NodeKind::SlurpyParameter { variable }
1034            | NodeKind::NamedParameter { variable } => self.extract_variable_name(variable),
1035            NodeKind::ArrayLiteral { elements } => {
1036                // Handle array reference patterns like @{$ref}
1037                if elements.len() == 1 {
1038                    if let Some(first) = elements.first() {
1039                        return self.extract_variable_name(first);
1040                    }
1041                }
1042                ExtractedName::Full(String::new())
1043            }
1044            NodeKind::Binary { op, left, .. } if op == "->" => {
1045                // Handle method call patterns on variables
1046                self.extract_variable_name(left)
1047            }
1048            _ => {
1049                if let Some(child) = node.first_child() {
1050                    self.extract_variable_name(child)
1051                } else {
1052                    ExtractedName::Full(String::new())
1053                }
1054            }
1055        }
1056    }
1057
1058    /// Determines if a node is in a hash key context, where barewords are legitimate.
1059    ///
1060    /// This method efficiently detects various hash key contexts to avoid false positives
1061    /// in strict mode bareword detection. It handles:
1062    ///
1063    /// # Hash Key Contexts Detected:
1064    /// - **Hash subscripts**: `$hash{bareword_key}` or `%hash{bareword_key}`
1065    /// - **Hash literals**: `{ key => value, another_key => value2 }`
1066    /// - **Hash slices**: `@hash{key1, key2, key3}` where keys are in an array
1067    /// - **Nested hash structures**: Complex nested hash access patterns
1068    ///
1069    /// # Performance Characteristics:
1070    /// - Early termination on first positive match
1071    /// - Efficient pointer-based parent traversal
1072    /// - O(depth) complexity where depth is AST nesting level
1073    /// - Typical case: 1-3 parent checks for hash contexts
1074    ///
1075    /// # Examples:
1076    /// ```perl
1077    /// use strict;
1078    /// my %hash = (key1 => 'value1');        # key1 is in hash key context
1079    /// my $val = $hash{bareword_key};         # bareword_key is in hash key context  
1080    /// my @vals = @hash{key1, key2};          # key1, key2 are in hash key context
1081    /// print INVALID_BAREWORD;                # NOT in hash key context - should warn
1082    /// ```
1083    fn is_in_hash_key_context(&self, node: &Node, ancestors: &[&Node], max_depth: usize) -> bool {
1084        let mut current = node;
1085
1086        // Traverse up the AST to find hash key contexts
1087        // Limit traversal depth to prevent excessive searching
1088        // Iterate ancestors in reverse (from immediate parent up)
1089        let len = ancestors.len();
1090
1091        for i in (0..len).rev() {
1092            if len - i > max_depth {
1093                break;
1094            }
1095
1096            let parent = ancestors[i];
1097
1098            match &parent.kind {
1099                // Method call: Class->method (Class is bareword)
1100                NodeKind::Binary { op, left, right: _ } if op == "->" => {
1101                    // Check if current node is the class name (left side of the -> operation)
1102                    if std::ptr::eq(left.as_ref(), current) {
1103                        return true;
1104                    }
1105                }
1106                NodeKind::MethodCall { object, .. } => {
1107                    // Check if current node is the class name (object)
1108                    if std::ptr::eq(object.as_ref(), current) {
1109                        return true;
1110                    }
1111                }
1112                // Hash subscript: $hash{key} or %hash{key}
1113                NodeKind::Binary { op, left: _, right } if op == "{}" => {
1114                    // Check if current node is the key (right side of the {} operation)
1115                    if std::ptr::eq(right.as_ref(), current) {
1116                        return true;
1117                    }
1118                }
1119                NodeKind::HashLiteral { pairs } => {
1120                    // Check if current node is a key in any of the pairs
1121                    for (key, _value) in pairs {
1122                        if std::ptr::eq(key, current) {
1123                            return true;
1124                        }
1125                    }
1126                }
1127                NodeKind::ArrayLiteral { .. } => {
1128                    // Check grandparent
1129                    if i > 0 {
1130                        let grandparent = ancestors[i - 1];
1131                        if let NodeKind::Binary { op, right, .. } = &grandparent.kind {
1132                            if op == "{}" && std::ptr::eq(right.as_ref(), parent) {
1133                                return true;
1134                            }
1135                        }
1136                    }
1137                }
1138                // Handle IndirectCall which parser sometimes produces for $hash{key} in print statements
1139                NodeKind::IndirectCall { object, args, .. } => {
1140                    // Check if current is one of the arguments
1141                    for arg in args {
1142                        if std::ptr::eq(arg, current) {
1143                            // Check if object is a variable that looks like a hash
1144                            if let NodeKind::Variable { sigil, .. } = &object.kind {
1145                                if sigil == "$" {
1146                                    return true;
1147                                }
1148                            }
1149                        }
1150                    }
1151                }
1152                _ => {}
1153            }
1154
1155            current = parent;
1156        }
1157
1158        false
1159    }
1160
1161    pub fn get_suggestions(&self, issues: &[ScopeIssue]) -> Vec<String> {
1162        issues
1163            .iter()
1164            .map(|issue| match issue.kind {
1165                IssueKind::VariableShadowing => {
1166                    format!("Consider rename '{}' to avoid shadowing", issue.variable_name)
1167                }
1168                IssueKind::UnusedVariable => {
1169                    format!(
1170                        "Remove unused variable '{}' or prefix with underscore",
1171                        issue.variable_name
1172                    )
1173                }
1174                IssueKind::UndeclaredVariable => {
1175                    format!("Declare '{}' with 'my', 'our', or 'local'", issue.variable_name)
1176                }
1177                IssueKind::VariableRedeclaration => {
1178                    format!("Remove duplicate declaration of '{}'", issue.variable_name)
1179                }
1180                IssueKind::DuplicateParameter => {
1181                    format!("Remove or rename duplicate parameter '{}'", issue.variable_name)
1182                }
1183                IssueKind::ParameterShadowsGlobal => {
1184                    format!("Rename parameter '{}' to avoid shadowing", issue.variable_name)
1185                }
1186                IssueKind::UnusedParameter => {
1187                    format!("Rename '{}' with underscore or add comment", issue.variable_name)
1188                }
1189                IssueKind::UnquotedBareword => {
1190                    format!("Quote bareword '{}' or declare as filehandle", issue.variable_name)
1191                }
1192                IssueKind::UninitializedVariable => {
1193                    format!("Initialize '{}' before use", issue.variable_name)
1194                }
1195            })
1196            .collect()
1197    }
1198}
1199
1200/// Check if a variable is a built-in Perl global variable
1201fn is_builtin_global(sigil: &str, name: &str) -> bool {
1202    // Fast path: most user variables start with lowercase and are not built-ins
1203    // Exception: $a and $b are built-in sort variables
1204    if !name.is_empty() {
1205        let first = name.as_bytes()[0];
1206        if first.is_ascii_lowercase() {
1207            // Optimization: Combine length and byte check to avoid multiple comparisons
1208            if name.len() > 1 || (first != b'a' && first != b'b') {
1209                return false;
1210            }
1211        }
1212    }
1213
1214    let sigil_byte = match sigil.as_bytes().first() {
1215        Some(b) => *b,
1216        None => {
1217            return match name {
1218                // Filehandles (no sigil)
1219                "STDIN" | "STDOUT" | "STDERR" | "DATA" | "ARGVOUT" => true,
1220                _ => false,
1221            };
1222        }
1223    };
1224
1225    match sigil_byte {
1226        b'$' => match name {
1227            // Special variables
1228            "_" | "!" | "@" | "?" | "^" | "$" | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8"
1229            | "9" | "." | "," | "/" | "\\" | "\"" | ";" | "%" | "=" | "-" | "~" | "|" | "&"
1230            | "`" | "'" | "+" | "[" | "]" | "^A" | "^C" | "^D" | "^E" | "^F" | "^H" | "^I" | "^L"
1231            | "^M" | "^N" | "^O" | "^P" | "^R" | "^S" | "^T" | "^V" | "^W" | "^X" |
1232            // Common globals
1233            "ARGV" | "VERSION" | "AUTOLOAD" |
1234            // Sort variables
1235            "a" | "b" |
1236            // Error variables
1237            "EVAL_ERROR" | "ERRNO" | "EXTENDED_OS_ERROR" | "CHILD_ERROR" |
1238            "PROCESS_ID" | "PROGRAM_NAME" |
1239            // Perl version variables
1240            "PERL_VERSION" | "OLD_PERL_VERSION" => true,
1241            _ => {
1242                // Check patterns
1243                // $^[A-Z] variables
1244                if name.starts_with('^') && name.len() == 2 {
1245                    // Optimization: access byte directly since we know len is 2 and it's ASCII range
1246                    let second = name.as_bytes()[1];
1247                    if second.is_ascii_uppercase() {
1248                        return true;
1249                    }
1250                }
1251
1252                // Numbered capture variables ($1, $2, etc.)
1253                // Note: $0-$9 are already handled in the match above, but this covers $10+
1254                // Optimization: use byte check to avoid utf-8 decoding
1255                if !name.is_empty() && name.as_bytes().iter().all(|c| c.is_ascii_digit()) {
1256                    return true;
1257                }
1258
1259                false
1260            }
1261        },
1262        b'@' => matches!(name, "_" | "+" | "INC" | "ARGV" | "EXPORT" | "EXPORT_OK" | "ISA"),
1263        b'%' => matches!(name, "_" | "+" | "ENV" | "INC" | "SIG" | "EXPORT_TAGS"),
1264        _ => false,
1265    }
1266}
1267
1268/// Check if an identifier is a known Perl built-in function
1269fn is_known_function(name: &str) -> bool {
1270    if name.is_empty() {
1271        return false;
1272    }
1273    // Optimization: All known functions are lowercase or start with non-uppercase chars
1274    if name.as_bytes()[0].is_ascii_uppercase() {
1275        return false;
1276    }
1277
1278    match name {
1279        // I/O functions
1280        "print" | "printf" | "say" | "open" | "close" | "read" | "write" | "seek" | "tell"
1281        | "eof" | "fileno" | "binmode" | "sysopen" | "sysread" | "syswrite" | "sysclose"
1282        | "select" |
1283        // String functions
1284        "chomp" | "chop" | "chr" | "crypt" | "fc" | "hex" | "index" | "lc" | "lcfirst" | "length"
1285        | "oct" | "ord" | "pack" | "q" | "qq" | "qr" | "quotemeta" | "qw" | "qx" | "reverse"
1286        | "rindex" | "sprintf" | "substr" | "tr" | "uc" | "ucfirst" | "unpack" |
1287        // Array/List functions
1288        "pop" | "push" | "shift" | "unshift" | "splice" | "split" | "join" | "grep" | "map"
1289        | "sort" |
1290        // Hash functions
1291        "delete" | "each" | "exists" | "keys" | "values" |
1292        // Control flow
1293        "die" | "exit" | "return" | "goto" | "last" | "next" | "redo" | "continue" | "break"
1294        | "given" | "when" | "default" |
1295        // File test operators
1296        "stat" | "lstat" | "-r" | "-w" | "-x" | "-o" | "-R" | "-W" | "-X" | "-O" | "-e" | "-z"
1297        | "-s" | "-f" | "-d" | "-l" | "-p" | "-S" | "-b" | "-c" | "-t" | "-u" | "-g" | "-k"
1298        | "-T" | "-B" | "-M" | "-A" | "-C" |
1299        // System functions
1300        "system" | "exec" | "fork" | "wait" | "waitpid" | "kill" | "sleep" | "alarm"
1301        | "getpgrp" | "getppid" | "getpriority" | "setpgrp" | "setpriority" | "time" | "times"
1302        | "localtime" | "gmtime" |
1303        // Math functions
1304        "abs" | "atan2" | "cos" | "exp" | "int" | "log" | "rand" | "sin" | "sqrt" | "srand" |
1305        // Misc functions
1306        "defined" | "undef" | "ref" | "bless" | "tie" | "tied" | "untie" | "eval" | "caller"
1307        | "import" | "require" | "use" | "do" | "package" | "sub" | "my" | "our" | "local"
1308        | "state" | "scalar" | "wantarray" | "warn" => true,
1309        _ => false,
1310    }
1311}
1312
1313/// Check if an identifier is a known filehandle
1314#[allow(dead_code)]
1315fn is_filehandle(name: &str) -> bool {
1316    match name {
1317        "STDIN" | "STDOUT" | "STDERR" | "ARGV" | "ARGVOUT" | "DATA" | "STDHANDLE"
1318        | "__PACKAGE__" | "__FILE__" | "__LINE__" | "__SUB__" | "__END__" | "__DATA__" => true,
1319        _ => {
1320            // Check if it's all uppercase (common convention for filehandles)
1321            name.chars().all(|c| c.is_ascii_uppercase() || c == '_') && !name.is_empty()
1322        }
1323    }
1324}