mod calls_and_exprs;
mod declarations;
mod interpolation;
mod scope_constructs;
mod uses;
use crate::ast::{Node, NodeKind};
use crate::pragma_tracker::{PragmaQueryCursor, PragmaState};
use perl_module::import::resolve_known_export_tag;
use rustc_hash::FxHashMap;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::ops::Range;
use std::rc::Rc;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum IssueKind {
VariableShadowing,
UnusedVariable,
UndeclaredVariable,
VariableRedeclaration,
DuplicateParameter,
ParameterShadowsGlobal,
UnusedParameter,
UnquotedBareword,
UninitializedVariable,
CaptureVarWithoutRegexMatch,
}
#[derive(Debug, Clone)]
pub struct ScopeIssue {
pub kind: IssueKind,
pub variable_name: String,
pub line: usize,
pub range: (usize, usize),
pub description: String,
}
#[derive(Debug)]
struct Variable {
declaration_offset: usize,
is_used: RefCell<bool>,
is_our: bool,
is_initialized: RefCell<bool>,
}
#[inline]
pub(super) fn sigil_to_index(sigil: &str) -> usize {
match sigil.as_bytes().first() {
Some(b'$') => 0,
Some(b'@') => 1,
Some(b'%') => 2,
Some(b'&') => 3,
Some(b'*') => 4,
_ => 5,
}
}
#[inline]
fn index_to_sigil(index: usize) -> &'static str {
match index {
0 => "$",
1 => "@",
2 => "%",
3 => "&",
4 => "*",
_ => "",
}
}
#[derive(Debug)]
pub(super) struct Scope {
variables: RefCell<[Option<FxHashMap<String, Rc<Variable>>>; 6]>,
parent: Option<Rc<Scope>>,
has_regex_match: Cell<bool>,
}
impl Scope {
fn new() -> Self {
let vars = std::array::from_fn(|_| None);
Self { variables: RefCell::new(vars), parent: None, has_regex_match: Cell::new(false) }
}
fn with_parent(parent: Rc<Scope>) -> Self {
let vars = std::array::from_fn(|_| None);
Self {
variables: RefCell::new(vars),
parent: Some(parent),
has_regex_match: Cell::new(false),
}
}
fn regex_match_in_scope(&self) -> bool {
if self.has_regex_match.get() {
return true;
}
if let Some(ref parent) = self.parent { parent.regex_match_in_scope() } else { false }
}
fn declare_variable_parts(
&self,
sigil: &str,
name: &str,
offset: usize,
is_our: bool,
is_initialized: bool,
) -> Option<IssueKind> {
let idx = sigil_to_index(sigil);
{
let vars = self.variables.borrow();
if let Some(map) = &vars[idx] {
if map.contains_key(name) {
return Some(IssueKind::VariableRedeclaration);
}
}
}
let shadows = if let Some(ref parent) = self.parent {
parent.has_variable_parts(sigil, name)
} else {
false
};
let mut vars = self.variables.borrow_mut();
let inner = vars[idx].get_or_insert_with(FxHashMap::default);
inner.insert(
name.to_string(),
Rc::new(Variable {
declaration_offset: offset,
is_used: RefCell::new(is_our), is_our,
is_initialized: RefCell::new(is_initialized),
}),
);
if shadows { Some(IssueKind::VariableShadowing) } else { None }
}
fn has_variable_parts(&self, sigil: &str, name: &str) -> bool {
let idx = sigil_to_index(sigil);
let mut current_scope = self;
loop {
{
let vars = current_scope.variables.borrow();
if let Some(map) = &vars[idx] {
if map.contains_key(name) {
return true;
}
}
}
if let Some(ref parent) = current_scope.parent {
current_scope = parent;
} else {
return false;
}
}
}
fn use_variable_parts(&self, sigil: &str, name: &str) -> (bool, bool) {
let idx = sigil_to_index(sigil);
let mut current_scope = self;
loop {
{
let vars = current_scope.variables.borrow();
if let Some(map) = &vars[idx] {
if let Some(var) = map.get(name) {
*var.is_used.borrow_mut() = true;
return (true, *var.is_initialized.borrow());
}
}
}
if let Some(ref parent) = current_scope.parent {
current_scope = parent;
} else {
return (false, false);
}
}
}
fn initialize_variable_parts(&self, sigil: &str, name: &str) {
let idx = sigil_to_index(sigil);
let mut current_scope = self;
loop {
{
let vars = current_scope.variables.borrow();
if let Some(map) = &vars[idx] {
if let Some(var) = map.get(name) {
*var.is_initialized.borrow_mut() = true;
return;
}
}
}
if let Some(ref parent) = current_scope.parent {
current_scope = parent;
} else {
return;
}
}
}
fn initialize_and_use_variable_parts(&self, sigil: &str, name: &str) -> bool {
let idx = sigil_to_index(sigil);
let mut current_scope = self;
loop {
{
let vars = current_scope.variables.borrow();
if let Some(map) = &vars[idx] {
if let Some(var) = map.get(name) {
*var.is_used.borrow_mut() = true;
*var.is_initialized.borrow_mut() = true;
return true;
}
}
}
if let Some(ref parent) = current_scope.parent {
current_scope = parent;
} else {
return false;
}
}
}
fn for_each_reportable_unused_variable<F>(&self, mut f: F)
where
F: FnMut(String, usize),
{
for (idx, inner_opt) in self.variables.borrow().iter().enumerate() {
if let Some(inner) = inner_opt {
for (name, var) in inner {
if !*var.is_used.borrow() && !var.is_our {
if name.starts_with('_') {
continue;
}
let full_name = format!("{}{}", index_to_sigil(idx), name);
f(full_name, var.declaration_offset);
}
}
}
}
}
}
pub(super) fn split_variable_name(full_name: &str) -> (&str, &str) {
if !full_name.is_empty() {
let c = full_name.as_bytes()[0];
if c == b'$' || c == b'@' || c == b'%' || c == b'&' || c == b'*' {
return (&full_name[0..1], &full_name[1..]);
}
}
("", full_name)
}
fn is_interpolated_var_start(byte: u8) -> bool {
byte.is_ascii_alphabetic() || byte == b'_'
}
fn is_interpolated_var_continue(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || byte == b'_' || byte == b':'
}
fn has_escaped_interpolation_marker(bytes: &[u8], index: usize) -> bool {
if index == 0 {
return false;
}
let mut backslashes = 0usize;
let mut cursor = index;
while cursor > 0 && bytes[cursor - 1] == b'\\' {
backslashes += 1;
cursor -= 1;
}
backslashes % 2 == 1
}
pub(super) enum ExtractedName<'a> {
Parts(&'a str, &'a str),
Full(String),
}
pub(super) struct AnalysisContext<'a> {
code: &'a str,
pragma_map: &'a [(Range<usize>, PragmaState)],
pragma_cursor: RefCell<PragmaQueryCursor>,
imported_barewords: HashSet<String>,
line_starts: RefCell<Option<Vec<usize>>>,
current_package: RefCell<String>,
}
impl<'a> AnalysisContext<'a> {
fn new(ast: &Node, code: &'a str, pragma_map: &'a [(Range<usize>, PragmaState)]) -> Self {
Self {
code,
pragma_map,
pragma_cursor: RefCell::new(PragmaQueryCursor::new()),
imported_barewords: collect_imported_barewords(ast),
line_starts: RefCell::new(None),
current_package: RefCell::new("main".to_string()),
}
}
fn pragma_state_for_offset(&self, offset: usize) -> PragmaState {
self.pragma_cursor.borrow_mut().state_for_offset(self.pragma_map, offset)
}
fn has_imported_bareword(&self, name: &str) -> bool {
self.imported_barewords.contains(name)
}
fn get_line(&self, offset: usize) -> usize {
let mut line_starts_guard = self.line_starts.borrow_mut();
let starts = line_starts_guard.get_or_insert_with(|| {
let mut indices = Vec::with_capacity(self.code.len() / 40); indices.push(0);
for (i, b) in self.code.bytes().enumerate() {
if b == b'\n' {
indices.push(i + 1);
}
}
indices
});
match starts.binary_search(&offset) {
Ok(idx) => idx + 1,
Err(idx) => idx,
}
}
fn find_catch_variable_range(
&self,
catch_body_start: usize,
full_name: &str,
) -> Option<(usize, usize)> {
if full_name.is_empty() || catch_body_start == 0 || catch_body_start > self.code.len() {
return None;
}
let window_start = catch_body_start.saturating_sub(256);
let window = self.code.get(window_start..catch_body_start)?;
let catch_start = window.rfind("catch")?;
let search_start = catch_start + "catch".len();
let var_offset = window[search_start..].rfind(full_name)? + search_start;
let start = window_start + var_offset;
let end = start + full_name.len();
Some((start, end))
}
}
impl<'a> ExtractedName<'a> {
fn as_string(&self) -> String {
match self {
ExtractedName::Parts(sigil, name) => format!("{}{}", sigil, name),
ExtractedName::Full(s) => s.clone(),
}
}
fn parts(&self) -> (&str, &str) {
match self {
ExtractedName::Parts(sigil, name) => (sigil, name),
ExtractedName::Full(s) => split_variable_name(s),
}
}
fn is_empty(&self) -> bool {
match self {
ExtractedName::Parts(sigil, name) => sigil.is_empty() && name.is_empty(),
ExtractedName::Full(s) => s.is_empty(),
}
}
}
pub struct ScopeAnalyzer;
impl Default for ScopeAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl ScopeAnalyzer {
pub fn new() -> Self {
Self
}
pub(super) fn package_variable_name(
&self,
name: &str,
context: &AnalysisContext<'_>,
) -> Option<String> {
if name.is_empty() || name.contains("::") {
return None;
}
let current_package = context.current_package.borrow();
Some(format!("{}::{}", current_package.as_str(), name))
}
pub(super) fn declare_variable_parts_in_context(
&self,
scope: &Rc<Scope>,
sigil: &str,
name: &str,
offset: usize,
is_our: bool,
is_initialized: bool,
context: &AnalysisContext<'_>,
) -> Option<IssueKind> {
if is_our && let Some(qualified_name) = self.package_variable_name(name, context) {
return scope.declare_variable_parts(
sigil,
&qualified_name,
offset,
is_our,
is_initialized,
);
}
scope.declare_variable_parts(sigil, name, offset, is_our, is_initialized)
}
pub(super) fn has_variable_parts_in_context(
&self,
scope: &Rc<Scope>,
sigil: &str,
name: &str,
context: &AnalysisContext<'_>,
) -> bool {
if scope.has_variable_parts(sigil, name) {
return true;
}
self.package_variable_name(name, context)
.is_some_and(|qualified_name| scope.has_variable_parts(sigil, &qualified_name))
}
pub(super) fn use_variable_parts_in_context(
&self,
scope: &Rc<Scope>,
sigil: &str,
name: &str,
context: &AnalysisContext<'_>,
) -> (bool, bool) {
let (found, initialized) = scope.use_variable_parts(sigil, name);
if found {
return (found, initialized);
}
self.package_variable_name(name, context).map_or((false, false), |qualified_name| {
scope.use_variable_parts(sigil, &qualified_name)
})
}
pub(super) fn initialize_variable_parts_in_context(
&self,
scope: &Rc<Scope>,
sigil: &str,
name: &str,
context: &AnalysisContext<'_>,
) {
if scope.has_variable_parts(sigil, name) {
scope.initialize_variable_parts(sigil, name);
return;
}
if let Some(qualified_name) = self.package_variable_name(name, context) {
scope.initialize_variable_parts(sigil, &qualified_name);
}
}
pub(super) fn initialize_and_use_variable_parts_in_context(
&self,
scope: &Rc<Scope>,
sigil: &str,
name: &str,
context: &AnalysisContext<'_>,
) -> bool {
if scope.initialize_and_use_variable_parts(sigil, name) {
return true;
}
self.package_variable_name(name, context).is_some_and(|qualified_name| {
scope.initialize_and_use_variable_parts(sigil, &qualified_name)
})
}
pub fn analyze(
&self,
ast: &Node,
code: &str,
pragma_map: &[(Range<usize>, PragmaState)],
) -> Vec<ScopeIssue> {
let mut issues = Vec::new();
let root_scope = Rc::new(Scope::new());
let mut ancestors: Vec<&Node> = Vec::new();
let context = AnalysisContext::new(ast, code, pragma_map);
self.analyze_node(ast, &root_scope, &mut ancestors, &mut issues, &context);
self.collect_unused_variables(&root_scope, &mut issues, &context);
issues
}
pub(super) fn analyze_node<'a>(
&self,
node: &'a Node,
scope: &Rc<Scope>,
ancestors: &mut Vec<&'a Node>,
issues: &mut Vec<ScopeIssue>,
context: &AnalysisContext<'a>,
) {
let pragma_state = context.pragma_state_for_offset(node.location.start);
let strict_vars_mode = pragma_state.strict_vars || pragma_state.signatures_strict;
let strict_subs_mode = pragma_state.strict_subs || pragma_state.signatures_strict;
match &node.kind {
NodeKind::VariableDeclaration { declarator, variable, initializer, .. } => {
let _ = declarations::handle_variable_declaration(
self,
node,
declarator,
variable,
initializer.as_deref(),
scope,
ancestors,
issues,
context,
);
}
NodeKind::VariableListDeclaration { declarator, variables, initializer, .. } => {
declarations::handle_variable_list_declaration(
self,
initializer.as_deref(),
declarator,
variables,
scope,
ancestors,
issues,
context,
);
}
NodeKind::Use { module, args, .. } => {
declarations::handle_use(self, node, module, args, scope, context);
}
NodeKind::Variable { sigil, name } => {
let _ = uses::handle_variable(
self,
node,
sigil,
name,
scope,
ancestors,
issues,
context,
strict_vars_mode,
);
}
NodeKind::Typeglob { name } => {
uses::handle_typeglob(self, node, name, scope, issues, context, strict_vars_mode);
}
NodeKind::Readline { filehandle: Some(filehandle) } => {
uses::handle_readline(
self,
node,
filehandle,
scope,
issues,
context,
strict_vars_mode,
);
}
NodeKind::FunctionCall { name, args } => {
calls_and_exprs::handle_function_call(
self,
node,
name,
args,
scope,
ancestors,
issues,
context,
strict_vars_mode,
);
}
NodeKind::MethodCall { object, method, args } => {
calls_and_exprs::handle_method_call(
self,
node,
object,
method,
args,
scope,
ancestors,
issues,
context,
strict_vars_mode,
);
}
NodeKind::Unary { op: _, operand } => {
calls_and_exprs::handle_unary(
self, node, operand, scope, ancestors, issues, context,
);
}
NodeKind::String { value, interpolated } => {
interpolation::handle_string(self, value, *interpolated, scope, context);
}
NodeKind::Heredoc { content, interpolated, .. } => {
interpolation::handle_heredoc(self, content, *interpolated, scope, context);
}
NodeKind::Assignment { lhs, rhs, op: _ } => {
let _ = uses::handle_assignment(
self, node, lhs, rhs, scope, ancestors, issues, context,
);
}
NodeKind::Tie { variable, package, args } => {
uses::handle_tie(
self, node, variable, package, args, scope, ancestors, issues, context,
);
}
NodeKind::Untie { variable } => {
uses::handle_untie(self, node, variable, scope, ancestors, issues, context);
}
NodeKind::Identifier { name } => {
uses::handle_identifier(
self,
node,
name,
issues,
context,
ancestors,
&pragma_state,
strict_subs_mode,
);
}
NodeKind::Binary { op: _, left, right } => {
calls_and_exprs::handle_binary(
self, node, left, right, scope, ancestors, issues, context,
);
}
NodeKind::ArrayLiteral { elements } => {
calls_and_exprs::handle_array_literal(
self, node, elements, scope, ancestors, issues, context,
);
}
NodeKind::Block { statements } => {
scope_constructs::handle_block(
self, node, statements, scope, ancestors, issues, context,
);
}
NodeKind::PhaseBlock { block, .. } => {
scope_constructs::handle_phase_block(
self, node, block, scope, ancestors, issues, context,
);
}
NodeKind::For { init, condition, update, body, .. } => {
scope_constructs::handle_for(
self,
node,
init.as_deref(),
condition.as_deref(),
update.as_deref(),
body,
scope,
ancestors,
issues,
context,
);
}
NodeKind::Foreach { variable, list, body, continue_block } => {
scope_constructs::handle_foreach(
self,
node,
variable,
list,
body,
continue_block.as_deref(),
scope,
ancestors,
issues,
context,
);
}
NodeKind::Subroutine { signature, body, .. } => {
scope_constructs::handle_subroutine(
self,
node,
signature.as_deref(),
body,
scope,
ancestors,
issues,
context,
);
}
NodeKind::Try { body, catch_blocks, finally_block } => {
scope_constructs::handle_try(
self,
node,
body,
catch_blocks,
finally_block.as_deref(),
scope,
ancestors,
issues,
context,
);
}
NodeKind::Package { name, block, .. } => {
scope_constructs::handle_package(
self,
node,
name,
block.as_deref(),
scope,
ancestors,
issues,
context,
);
}
NodeKind::Match { expr, .. } => {
interpolation::handle_match(self, node, expr, scope, ancestors, issues, context);
}
NodeKind::Substitution { expr, .. } => {
interpolation::handle_substitution(
self, node, expr, scope, ancestors, issues, context,
);
}
NodeKind::Regex { .. } => {
interpolation::handle_regex(scope);
}
_ => {
ancestors.push(node);
for child in node.children() {
self.analyze_node(child, scope, ancestors, issues, context);
}
ancestors.pop();
}
}
}
pub(super) fn resolve_variable_use_target<'a>(
&self,
node: &'a Node,
ancestors: &[&'a Node],
context: &AnalysisContext<'_>,
) -> Option<(&'a str, &'a str)> {
let NodeKind::Variable { sigil, name } = &node.kind else {
return None;
};
if (sigil == "@" || sigil == "%" || sigil == "$")
&& context
.code
.get(node.location.start..node.location.end)
.is_some_and(is_explicit_scalar_reference_deref)
{
return Some(("$", normalize_scalar_deref_base_name(name)));
}
if (sigil == "@" || sigil == "%" || sigil == "$") && name.starts_with('$') && name.len() > 1
{
return Some(("$", &name[1..]));
}
if sigil == "$"
&& let Some(parent) = ancestors.last()
&& let NodeKind::Binary { op, left, right } = &parent.kind
&& std::ptr::eq(left.as_ref(), node)
{
match op.as_str() {
"[]" => return Some(("@", name)),
"->[]" | "->{}" => return Some(("$", name)),
"{}" if self.is_dynamic_method_deref_rhs(right)
|| self.is_dynamic_method_deref_context(parent, ancestors)
|| self.is_braced_dynamic_method_call(parent, context) =>
{
return Some(("$", name));
}
"{}" => return Some(("%", name)),
_ => {}
}
}
if sigil == "@"
&& let Some(parent) = ancestors.last()
&& let NodeKind::Binary { op, left, .. } = &parent.kind
&& op == "{}"
&& std::ptr::eq(left.as_ref(), node)
{
return Some(("%", name));
}
if sigil == "$"
&& let Some(parent) = ancestors.last()
&& let NodeKind::IndirectCall { object, args, .. } = &parent.kind
&& std::ptr::eq(object.as_ref(), node)
{
if let Some(first_arg) = args.first() {
match &first_arg.kind {
NodeKind::ArrayLiteral { .. } => return Some(("@", name)),
NodeKind::Block { .. } => return Some(("%", name)),
_ => {}
}
}
}
Some((sigil, name))
}
pub(super) fn extract_name_like_variable<'a>(
&self,
name: &'a str,
) -> Option<(&'a str, &'a str)> {
let (sigil, var_name) = split_variable_name(name);
if sigil.is_empty()
|| var_name.is_empty()
|| var_name.contains("::")
|| !self.looks_like_variable_name(var_name)
{
return None;
}
Some((sigil, var_name))
}
pub(super) fn extract_method_name_variable<'a>(
&self,
method: &'a str,
) -> Option<(&'a str, &'a str)> {
self.extract_name_like_variable(method).or_else(|| {
let inner = method.strip_prefix("${")?.strip_suffix('}')?;
if inner.contains("::") || !self.looks_like_variable_name(inner) {
return None;
}
Some(("$", inner))
})
}
pub(super) fn looks_like_variable_name(&self, name: &str) -> bool {
matches!(
name.chars().next(),
Some('A'..='Z' | 'a'..='z' | '_' | '$' | '@' | '%' | '&' | '*' | '^' | '#' | '!' | '?')
)
}
pub(super) fn is_dynamic_method_deref_rhs(&self, node: &Node) -> bool {
matches!(
&node.kind,
NodeKind::Unary { op, operand }
if op == "\\"
&& matches!(
&operand.kind,
NodeKind::String { .. } | NodeKind::Identifier { .. }
)
)
}
pub(super) fn is_dynamic_method_deref_context<'a>(
&self,
node: &'a Node,
ancestors: &[&'a Node],
) -> bool {
let Some(grandparent) = ancestors.iter().rev().nth(1).copied() else {
return false;
};
match &grandparent.kind {
NodeKind::MethodCall { object, .. } => std::ptr::eq(object.as_ref(), node),
NodeKind::FunctionCall { name, args } if name == "->()" => {
args.first().is_some_and(|arg| std::ptr::eq(arg, node))
}
_ => false,
}
}
pub(super) fn is_braced_dynamic_method_call(
&self,
node: &Node,
context: &AnalysisContext<'_>,
) -> bool {
let Some(selector_text) = context.code.get(node.location.start..node.location.end) else {
return false;
};
if !selector_text.contains("->${") {
return false;
}
let Some(suffix) = context.code.get(node.location.end..) else {
return false;
};
suffix.trim_start().starts_with("()")
}
pub(super) fn record_variable_use(
&self,
scope: &Rc<Scope>,
strict_vars_mode: bool,
context: &AnalysisContext<'_>,
issues: &mut Vec<ScopeIssue>,
node: &Node,
sigil: &str,
name: &str,
) {
let (variable_used, is_initialized) =
self.use_variable_parts_in_context(scope, sigil, name, context);
if !variable_used {
if strict_vars_mode {
self.push_undeclared_variable_issue(issues, context, node, sigil, name);
}
} else if !is_initialized {
self.push_uninitialized_variable_issue(issues, context, node, sigil, name);
}
}
pub(super) fn push_undeclared_variable_issue(
&self,
issues: &mut Vec<ScopeIssue>,
context: &AnalysisContext<'_>,
node: &Node,
sigil: &str,
name: &str,
) {
let full_name = format!("{}{}", sigil, name);
issues.push(ScopeIssue {
kind: IssueKind::UndeclaredVariable,
variable_name: full_name.clone(),
line: context.get_line(node.location.start),
range: (node.location.start, node.location.end),
description: format!("Variable '{}' is used but not declared", full_name),
});
}
pub(super) fn push_uninitialized_variable_issue(
&self,
issues: &mut Vec<ScopeIssue>,
context: &AnalysisContext<'_>,
node: &Node,
sigil: &str,
name: &str,
) {
let full_name = format!("{}{}", sigil, name);
issues.push(ScopeIssue {
kind: IssueKind::UninitializedVariable,
variable_name: full_name.clone(),
line: context.get_line(node.location.start),
range: (node.location.start, node.location.end),
description: format!("Variable '{}' is used before being initialized", full_name),
});
}
pub(super) fn mark_initialized(
&self,
node: &Node,
scope: &Rc<Scope>,
context: &AnalysisContext<'_>,
) {
match &node.kind {
NodeKind::Variable { sigil, name } => {
if !name.contains("::") {
self.initialize_variable_parts_in_context(scope, sigil, name, context);
}
}
_ => {
for child in node.children() {
self.mark_initialized(child, scope, context);
}
}
}
}
pub(super) fn analyze_block_with_scope<'a>(
&self,
node: &'a Node,
scope: &Rc<Scope>,
ancestors: &mut Vec<&'a Node>,
issues: &mut Vec<ScopeIssue>,
context: &AnalysisContext<'a>,
) {
if let NodeKind::Block { statements } = &node.kind {
ancestors.push(node);
for stmt in statements {
self.analyze_node(stmt, scope, ancestors, issues, context);
}
ancestors.pop();
} else {
self.analyze_node(node, scope, ancestors, issues, context);
}
}
pub(super) fn mark_builtin_declaration_arg_consumed(
&self,
node: &Node,
scope: &Rc<Scope>,
context: &AnalysisContext<'_>,
) {
match &node.kind {
NodeKind::VariableDeclaration { variable, .. } => {
let extracted = self.extract_variable_name(variable);
let (sigil, name) = extracted.parts();
if !sigil.is_empty() && !name.is_empty() && !name.contains("::") {
let _ = self
.initialize_and_use_variable_parts_in_context(scope, sigil, name, context);
}
}
NodeKind::VariableListDeclaration { variables, .. } => {
for variable in variables {
self.mark_builtin_declaration_arg_consumed(variable, scope, context);
}
}
NodeKind::VariableWithAttributes { variable, .. } => {
self.mark_builtin_declaration_arg_consumed(variable, scope, context);
}
_ => {}
}
}
pub(super) fn mark_interpolated_variables_used(
&self,
content: &str,
scope: &Rc<Scope>,
context: &AnalysisContext<'_>,
) {
let bytes = content.as_bytes();
let mut index = 0;
while index < bytes.len() {
let sigil = match bytes[index] {
b'$' => "$",
b'@' => "@",
_ => {
index += 1;
continue;
}
};
if has_escaped_interpolation_marker(bytes, index) {
index += 1;
continue;
}
if index + 1 >= bytes.len() {
break;
}
let (start, requires_closing_brace) =
if bytes[index + 1] == b'{' { (index + 2, true) } else { (index + 1, false) };
if start >= bytes.len() || !is_interpolated_var_start(bytes[start]) {
index += 1;
continue;
}
let mut end = start + 1;
while end < bytes.len() && is_interpolated_var_continue(bytes[end]) {
end += 1;
}
if requires_closing_brace && (end >= bytes.len() || bytes[end] != b'}') {
index += 1;
continue;
}
if let Some(name) = content.get(start..end) {
if !name.contains("::") {
let _ = self.use_variable_parts_in_context(scope, sigil, name, context);
}
}
index = if requires_closing_brace { end + 1 } else { end };
}
}
pub(super) fn collect_unused_variables(
&self,
scope: &Rc<Scope>,
issues: &mut Vec<ScopeIssue>,
context: &AnalysisContext<'_>,
) {
scope.for_each_reportable_unused_variable(|var_name, offset| {
let start = offset.min(context.code.len());
let end = (start + var_name.len()).min(context.code.len());
let description = format!("Variable '{}' is declared but never used", var_name);
issues.push(ScopeIssue {
kind: IssueKind::UnusedVariable,
variable_name: var_name, line: context.get_line(offset),
range: (start, end),
description,
});
});
}
pub(super) fn extract_variable_name<'a>(&self, node: &'a Node) -> ExtractedName<'a> {
match &node.kind {
NodeKind::Variable { sigil, name } => ExtractedName::Parts(sigil, name),
NodeKind::MandatoryParameter { variable }
| NodeKind::OptionalParameter { variable, .. }
| NodeKind::SlurpyParameter { variable }
| NodeKind::NamedParameter { variable } => self.extract_variable_name(variable),
NodeKind::ArrayLiteral { elements } => {
if elements.len() == 1 {
if let Some(first) = elements.first() {
return self.extract_variable_name(first);
}
}
ExtractedName::Full(String::new())
}
NodeKind::Binary { op, left, .. } if op == "->" => {
self.extract_variable_name(left)
}
_ => {
if let Some(child) = node.first_child() {
self.extract_variable_name(child)
} else {
ExtractedName::Full(String::new())
}
}
}
}
pub(super) fn is_in_hash_key_context(
&self,
node: &Node,
ancestors: &[&Node],
max_depth: usize,
) -> bool {
let mut current = node;
let len = ancestors.len();
for i in (0..len).rev() {
if len - i > max_depth {
break;
}
let parent = ancestors[i];
match &parent.kind {
NodeKind::Binary { op, left, right: _ } if op == "->" => {
if std::ptr::eq(left.as_ref(), current) {
return true;
}
}
NodeKind::MethodCall { object, .. } => {
if std::ptr::eq(object.as_ref(), current) {
return true;
}
}
NodeKind::Binary { op, left: _, right } if op == "{}" => {
if std::ptr::eq(right.as_ref(), current) {
return true;
}
}
NodeKind::HashLiteral { pairs } => {
for (key, _value) in pairs {
if std::ptr::eq(key, current) {
return true;
}
}
}
NodeKind::ArrayLiteral { .. } => {
if i > 0 {
let grandparent = ancestors[i - 1];
if let NodeKind::Binary { op, right, .. } = &grandparent.kind {
if op == "{}" && std::ptr::eq(right.as_ref(), parent) {
return true;
}
}
}
}
NodeKind::IndirectCall { object, args, .. } => {
for arg in args {
if std::ptr::eq(arg, current) {
if let NodeKind::Variable { sigil, .. } = &object.kind {
if sigil == "$" {
return true;
}
}
}
}
}
_ => {}
}
current = parent;
}
false
}
pub fn get_suggestions(&self, issues: &[ScopeIssue]) -> Vec<String> {
issues
.iter()
.map(|issue| match issue.kind {
IssueKind::VariableShadowing => {
format!("Consider rename '{}' to avoid shadowing", issue.variable_name)
}
IssueKind::UnusedVariable => {
format!(
"Remove unused variable '{}' or prefix with underscore",
issue.variable_name
)
}
IssueKind::UndeclaredVariable => {
format!("Declare '{}' with 'my', 'our', or 'local'", issue.variable_name)
}
IssueKind::VariableRedeclaration => {
format!("Remove duplicate declaration of '{}'", issue.variable_name)
}
IssueKind::DuplicateParameter => {
format!("Remove or rename duplicate parameter '{}'", issue.variable_name)
}
IssueKind::ParameterShadowsGlobal => {
format!("Rename parameter '{}' to avoid shadowing", issue.variable_name)
}
IssueKind::UnusedParameter => {
format!("Rename '{}' with underscore or add comment", issue.variable_name)
}
IssueKind::UnquotedBareword => {
format!("Quote bareword '{}' or declare as filehandle", issue.variable_name)
}
IssueKind::UninitializedVariable => {
format!("Initialize '{}' before use", issue.variable_name)
}
IssueKind::CaptureVarWithoutRegexMatch => {
format!(
"Perform a regex match (=~ /.../) before using capture variable '{}'",
issue.variable_name
)
}
})
.collect()
}
}
fn collect_imported_barewords(ast: &Node) -> HashSet<String> {
fn push_symbol(imported: &mut HashSet<String>, module: &str, token: &str) {
let symbol = token.trim().trim_matches('\'').trim_matches('"').trim();
if symbol.is_empty() || symbol == "," {
return;
}
if symbol.starts_with(':') {
if let Some(expanded) = resolve_known_export_tag(module, symbol) {
imported.extend(expanded.iter().map(|name| (*name).to_string()));
}
return;
}
let is_bareword = symbol.bytes().all(|byte| byte.is_ascii_alphanumeric() || byte == b'_')
&& symbol
.as_bytes()
.first()
.is_some_and(|first| first.is_ascii_alphabetic() || *first == b'_');
if is_bareword {
imported.insert(symbol.to_string());
}
}
fn require_module_name(node: &Node) -> Option<String> {
let NodeKind::FunctionCall { name, args } = &node.kind else {
return None;
};
if name != "require" {
return None;
}
let first = args.first()?;
match &first.kind {
NodeKind::Identifier { name } => Some(name.clone()),
NodeKind::String { value, .. } => {
let cleaned = value.trim_matches('\'').trim_matches('"').trim();
if cleaned.is_empty() {
return None;
}
Some(cleaned.trim_end_matches(".pm").replace('/', "::"))
}
_ => None,
}
}
fn require_variable_name(node: &Node) -> Option<String> {
let NodeKind::FunctionCall { name, args } = &node.kind else {
return None;
};
if name != "require" {
return None;
}
let first = args.first()?;
let NodeKind::Variable { sigil, name } = &first.kind else {
return None;
};
(sigil == "$" && !name.contains("::")).then(|| name.clone())
}
fn maybe_record_manual_imports(
node: &Node,
required_modules: &HashSet<String>,
imported: &mut HashSet<String>,
) {
let NodeKind::MethodCall { object, method, args } = &node.kind else {
return;
};
if method != "import" {
return;
}
let NodeKind::Identifier { name: module } = &object.kind else {
return;
};
if !required_modules.contains(module) {
return;
}
for arg in args {
match &arg.kind {
NodeKind::String { value, .. } => push_symbol(imported, module, value),
NodeKind::Identifier { name } => {
if name.starts_with("qw") {
let content = name
.trim_start_matches("qw")
.trim_start_matches(|c: char| "([{/<|!".contains(c))
.trim_end_matches(|c: char| ")]}/|!>".contains(c));
for token in content.split_whitespace() {
push_symbol(imported, module, token);
}
} else {
push_symbol(imported, module, name);
}
}
NodeKind::ArrayLiteral { elements } => {
for el in elements {
if let NodeKind::String { value, .. } = &el.kind {
push_symbol(imported, module, value);
}
}
}
_ => {}
}
}
}
fn maybe_record_dynamic_manual_imports(
node: &Node,
dynamic_require_vars: &HashSet<String>,
imported: &mut HashSet<String>,
) {
let NodeKind::MethodCall { object, method, args } = &node.kind else {
return;
};
if method != "import" {
return;
}
let NodeKind::Variable { sigil, name } = &object.kind else {
return;
};
if sigil != "$" || !dynamic_require_vars.contains(name) {
return;
}
for arg in args {
match &arg.kind {
NodeKind::String { value, .. } => push_symbol(imported, "", value),
NodeKind::Identifier { name } => {
if name.starts_with("qw") {
let content = name
.trim_start_matches("qw")
.trim_start_matches(|c: char| "([{/<|!".contains(c))
.trim_end_matches(|c: char| ")]}/|!>".contains(c));
for token in content.split_whitespace() {
push_symbol(imported, "", token);
}
} else {
push_symbol(imported, "", name);
}
}
NodeKind::ArrayLiteral { elements } => {
for el in elements {
if let NodeKind::String { value, .. } = &el.kind {
push_symbol(imported, "", value);
}
}
}
_ => {}
}
}
}
fn inner_node(stmt: &Node) -> &Node {
if let NodeKind::ExpressionStatement { expression } = &stmt.kind {
expression.as_ref()
} else {
stmt
}
}
fn visit(node: &Node, imported: &mut HashSet<String>, in_eval: bool) {
if let NodeKind::Use { module, args, .. } = &node.kind {
for arg in args {
if arg.starts_with("qw") {
let content = arg
.trim_start_matches("qw")
.trim_start_matches(|c: char| "([{/<|!".contains(c))
.trim_end_matches(|c: char| ")]}/|!>".contains(c));
for token in content.split_whitespace() {
push_symbol(imported, module, token);
}
} else {
push_symbol(imported, module, arg);
}
}
} else if !in_eval {
if let NodeKind::Program { statements } | NodeKind::Block { statements } = &node.kind {
let required_modules: HashSet<String> = statements
.iter()
.filter_map(|stmt| require_module_name(inner_node(stmt)))
.collect();
let dynamic_require_vars: HashSet<String> = statements
.iter()
.filter_map(|stmt| require_variable_name(inner_node(stmt)))
.collect();
if !required_modules.is_empty() || !dynamic_require_vars.is_empty() {
for stmt in statements {
let inner = inner_node(stmt);
maybe_record_manual_imports(inner, &required_modules, imported);
maybe_record_dynamic_manual_imports(inner, &dynamic_require_vars, imported);
}
}
}
}
let child_in_eval = in_eval || matches!(&node.kind, NodeKind::Eval { .. });
for child in node.children() {
visit(child, imported, child_in_eval);
}
}
let mut imported = HashSet::new();
visit(ast, &mut imported, false);
imported
}
#[inline]
pub(super) fn is_capture_variable(name: &str) -> bool {
!name.is_empty() && name != "0" && name.as_bytes().iter().all(|c| c.is_ascii_digit())
}
pub(super) fn is_builtin_global(sigil: &str, name: &str) -> bool {
if !name.is_empty() {
let first = name.as_bytes()[0];
if first.is_ascii_lowercase() {
if name.len() > 1 || (first != b'a' && first != b'b') {
return false;
}
}
}
let sigil_byte = match sigil.as_bytes().first() {
Some(b) => *b,
None => {
return match name {
"STDIN" | "STDOUT" | "STDERR" | "DATA" | "ARGVOUT" => true,
_ => false,
};
}
};
match sigil_byte {
b'$' => match name {
"_" | "!" | "@" | "?" | "^" | "$" | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8"
| "9" | "." | "," | "/" | "\\" | "\"" | ";" | "%" | "=" | "-" | "~" | "|" | "&"
| "`" | "'" | "+" | "[" | "]" | "^A" | "^C" | "^D" | "^E" | "^F" | "^H" | "^I" | "^L"
| "^M" | "^N" | "^O" | "^P" | "^R" | "^S" | "^T" | "^V" | "^W" | "^X" |
"ARGV" | "VERSION" | "AUTOLOAD" |
"a" | "b" |
"EVAL_ERROR" | "ERRNO" | "EXTENDED_OS_ERROR" | "CHILD_ERROR" |
"PROCESS_ID" | "PROGRAM_NAME" |
"PERL_VERSION" | "OLD_PERL_VERSION" |
"PL_sv_yes" | "PL_sv_no" | "PL_sv_undef" => true,
_ => {
let caret_name = if let Some(inner) = name
.strip_prefix('{')
.and_then(|s| s.strip_suffix('}'))
{
inner
} else {
name
};
if let Some(rest) = caret_name.strip_prefix('^') {
if !rest.is_empty()
&& rest
.as_bytes()
.iter()
.all(|c| c.is_ascii_uppercase() || *c == b'_')
{
return true;
}
}
if !name.is_empty() && name.as_bytes().iter().all(|c| c.is_ascii_digit()) {
return true;
}
false
}
},
b'@' => matches!(name, "_" | "+" | "-" | "INC" | "ARGV" | "EXPORT" | "EXPORT_OK" | "ISA"),
b'%' => matches!(name, "_" | "+" | "-" | "!" | "ENV" | "INC" | "SIG" | "EXPORT_TAGS"),
_ => false,
}
}
pub(super) fn is_known_function(name: &str) -> bool {
if name.is_empty() {
return false;
}
if matches!(name, "PL_sv_yes" | "PL_sv_no" | "PL_sv_undef") {
return true;
}
if name.as_bytes()[0].is_ascii_uppercase() {
return false;
}
match name {
"print" | "printf" | "say" | "open" | "close" | "read" | "write" | "seek" | "tell"
| "eof" | "fileno" | "binmode" | "sysopen" | "sysread" | "syswrite" | "sysclose"
| "select" |
"chomp" | "chop" | "chr" | "crypt" | "fc" | "hex" | "index" | "lc" | "lcfirst" | "length"
| "oct" | "ord" | "pack" | "q" | "qq" | "qr" | "quotemeta" | "qw" | "qx" | "reverse"
| "rindex" | "sprintf" | "substr" | "tr" | "uc" | "ucfirst" | "unpack" |
"pop" | "push" | "shift" | "unshift" | "splice" | "split" | "join" | "grep" | "map"
| "sort" |
"delete" | "each" | "exists" | "keys" | "values" |
"die" | "exit" | "return" | "goto" | "last" | "next" | "redo" | "continue" | "break"
| "given" | "when" | "default" |
"stat" | "lstat" | "-r" | "-w" | "-x" | "-o" | "-R" | "-W" | "-X" | "-O" | "-e" | "-z"
| "-s" | "-f" | "-d" | "-l" | "-p" | "-S" | "-b" | "-c" | "-t" | "-u" | "-g" | "-k"
| "-T" | "-B" | "-M" | "-A" | "-C" |
"system" | "exec" | "fork" | "wait" | "waitpid" | "kill" | "sleep" | "alarm"
| "getpgrp" | "getppid" | "getpriority" | "setpgrp" | "setpriority" | "time" | "times"
| "localtime" | "gmtime" |
"abs" | "atan2" | "cos" | "exp" | "int" | "log" | "rand" | "sin" | "sqrt" | "srand" |
"defined" | "undef" | "ref" | "bless" | "tie" | "tied" | "untie" | "eval" | "caller"
| "import" | "require" | "use" | "do" | "package" | "sub" | "my" | "our" | "local"
| "state" | "scalar" | "wantarray" | "warn" => true,
_ => false,
}
}
pub(super) fn builtin_declaration_arg_positions(name: &str) -> &'static [usize] {
match name {
"open" | "opendir" | "sysopen" | "socket" | "accept" | "dbmopen" => &[0],
"read" | "sysread" | "recv" | "shmread" => &[1],
"pipe" => &[0, 1],
"socketpair" => &[0, 1],
_ => &[],
}
}
pub(super) fn is_topic_defaulting_builtin(name: &str) -> bool {
matches!(
name,
"chomp"
| "chop"
| "chr"
| "hex"
| "lc"
| "lcfirst"
| "length"
| "oct"
| "ord"
| "uc"
| "ucfirst"
| "abs"
| "int"
| "log"
| "sqrt"
| "cos"
| "sin"
| "exp"
| "print"
| "say"
)
}
pub(super) fn is_topic_modifying_builtin(name: &str) -> bool {
matches!(name, "chomp" | "chop")
}
fn is_explicit_scalar_reference_deref(source: &str) -> bool {
source.starts_with("@$")
|| source.starts_with("%$")
|| source.starts_with("$$")
|| source.starts_with("@{$")
|| source.starts_with("%{$")
|| source.starts_with("${$")
}
fn normalize_scalar_deref_base_name(name: &str) -> &str {
let unwrapped =
name.strip_prefix('{').and_then(|inner| inner.strip_suffix('}')).unwrap_or(name);
unwrapped.strip_prefix('$').unwrap_or(unwrapped)
}
#[allow(dead_code)]
fn is_filehandle(name: &str) -> bool {
match name {
"STDIN" | "STDOUT" | "STDERR" | "ARGV" | "ARGVOUT" | "DATA" | "STDHANDLE"
| "__PACKAGE__" | "__FILE__" | "__LINE__" | "__SUB__" | "__END__" | "__DATA__" => true,
_ => {
name.chars().all(|c| c.is_ascii_uppercase() || c == '_') && !name.is_empty()
}
}
}