use regex::Regex;
use std::collections::HashSet;
pub fn get_builtins() -> HashSet<&'static str> {
[
"HOME",
"PATH",
"PWD",
"USER",
"SHELL",
"TERM",
"LANG",
"LC_ALL",
"OLDPWD",
"IFS",
"OPTARG",
"OPTIND",
"PPID",
"CDPATH",
"MAILCHECK",
"PS1",
"PS2",
"PS3",
"PS4",
"ENV",
"FCEDIT",
"HISTFILE",
"HISTSIZE",
"MAIL",
"MAILPATH",
"NLSPATH",
"TMOUT",
"COLUMNS",
"LINES",
"EUID",
"UID",
"GROUPS",
"HOSTNAME",
"HOSTTYPE",
"OSTYPE",
"MACHTYPE",
"BASH",
"BASH_VERSION",
"BASH_VERSINFO",
"BASH_SUBSHELL",
"BASHPID",
"RANDOM",
"SECONDS",
"LINENO",
"SHLVL",
"REPLY",
"EPOCHSECONDS",
"EPOCHREALTIME",
"SRANDOM",
"FUNCNAME",
"BASH_SOURCE",
"BASH_LINENO",
"FUNCNEST",
"BASH_COMMAND",
"BASH_EXECUTION_STRING",
"BASH_ARGC",
"BASH_ARGV",
"BASH_ARGV0",
"BASH_REMATCH",
"MAPFILE",
"READLINE_LINE",
"READLINE_POINT",
"READLINE_MARK",
"PIPESTATUS",
"COMP_WORDS",
"COMP_CWORD",
"COMP_LINE",
"COMP_POINT",
"COMP_TYPE",
"COMP_KEY",
"COMPREPLY",
"SHELLOPTS",
"BASHOPTS",
"BASH_COMPAT",
"HISTCMD",
"HISTCONTROL",
"HISTIGNORE",
"HISTTIMEFORMAT",
"DIRSTACK",
"COPROC",
"TMPDIR",
"TEMP",
"TMP",
"EDITOR",
"VISUAL",
"PAGER",
"BROWSER",
"DISPLAY",
"XAUTHORITY",
"DBUS_SESSION_BUS_ADDRESS",
"XDG_CONFIG_HOME",
"XDG_DATA_HOME",
"XDG_CACHE_HOME",
"XDG_RUNTIME_DIR",
"XDG_SESSION_TYPE",
"XDG_CURRENT_DESKTOP",
"LOGNAME",
"HOSTNAME",
"HOSTFILE",
"INPUTRC",
"COLORTERM",
"TERM_PROGRAM",
"LC_CTYPE",
"LC_MESSAGES",
"LC_NUMERIC",
"LC_TIME",
"LC_COLLATE",
"LC_MONETARY",
]
.iter()
.copied()
.collect()
}
pub fn is_special_or_builtin(var_name: &str, builtins: &HashSet<&str>) -> bool {
if builtins.contains(var_name) {
return true;
}
if var_name.chars().all(|c| c.is_ascii_digit()) {
return true;
}
if ["@", "*", "#", "?", "$", "!", "0", "-"].contains(&var_name) {
return true;
}
false
}
pub fn has_source_commands(source: &str) -> bool {
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') {
continue;
}
if trimmed.starts_with("source ") || trimmed.starts_with(". ") {
return true;
}
if trimmed.contains("; source ")
|| trimmed.contains("; . ")
|| trimmed.contains("&& source ")
|| trimmed.contains("&& . ")
|| trimmed.contains("|| source ")
|| trimmed.contains("|| . ")
{
return true;
}
}
false
}
pub fn is_comment_line(line: &str) -> bool {
line.trim_start().starts_with('#')
}
pub fn is_uppercase_var(var_name: &str) -> bool {
var_name.chars().all(|c| c.is_uppercase() || c == '_')
}
pub fn is_parameter_expansion_with_operator(line: &str, match_end: usize) -> bool {
let remaining = &line[match_end..];
if remaining.starts_with(":-")
|| remaining.starts_with(":=")
|| remaining.starts_with(":+")
|| remaining.starts_with(":?")
|| remaining.starts_with('-')
|| remaining.starts_with('=')
|| remaining.starts_with('+')
|| remaining.starts_with('?')
{
return true;
}
false
}
fn is_esac_line(trimmed: &str) -> bool {
trimmed == "esac" || trimmed.starts_with("esac;") || trimmed.starts_with("esac ")
}
fn extract_case_block_vars(case_block: &[&str], assign_pattern: &Regex) -> Vec<String> {
let has_default = case_block.iter().any(|l| {
let t = l.trim();
t.starts_with("*)") || t.starts_with("* )") || t.contains("*)")
});
if !has_default {
return Vec::new();
}
let mut vars = Vec::new();
for case_line in case_block {
let t = case_line.trim();
if t.ends_with(')') && !t.contains('=') {
continue;
}
for cap in assign_pattern.captures_iter(case_line) {
if let Some(var) = cap.get(1) {
vars.push(var.as_str().to_string());
}
}
}
vars
}
#[allow(clippy::expect_used)] pub fn collect_case_statement_variables(source: &str) -> HashSet<String> {
let mut case_vars: HashSet<String> = HashSet::new();
let lines: Vec<&str> = source.lines().collect();
let mut in_case = false;
let mut case_start = 0;
let mut case_depth = 0;
let assign_pattern =
Regex::new(r"([A-Za-z_][A-Za-z0-9_]*)=").expect("valid assignment regex pattern");
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("case ") && trimmed.contains(" in") {
if !in_case {
in_case = true;
case_start = i;
}
case_depth += 1;
}
if is_esac_line(trimmed) {
if case_depth > 0 {
case_depth -= 1;
}
if case_depth == 0 && in_case {
let case_block: Vec<&str> = lines[case_start..=i].to_vec();
for var in extract_case_block_vars(&case_block, &assign_pattern) {
case_vars.insert(var);
}
in_case = false;
}
}
}
case_vars
}
pub fn is_case_start(line: &str) -> bool {
let trimmed = line.trim();
trimmed.starts_with("case ") && trimmed.contains(" in")
}
pub fn is_case_end(line: &str) -> bool {
let trimmed = line.trim();
trimmed == "esac" || trimmed.starts_with("esac;") || trimmed.starts_with("esac ")
}
pub fn case_has_default(block: &[&str]) -> bool {
block.iter().any(|l| {
let t = l.trim();
t.starts_with("*)") || t.starts_with("* )") || t.contains("*)")
})
}
pub fn is_case_pattern_line(line: &str) -> bool {
let t = line.trim();
t.ends_with(')') && !t.contains('=')
}
pub fn extract_read_variables(line: &str) -> Vec<String> {
let mut vars = Vec::new();
if let Some(read_pos) = line.find("read ") {
let after_read = &line[read_pos + 5..];
let parts: Vec<&str> = after_read.split_whitespace().collect();
let mut i = 0;
while i < parts.len() {
let part = parts[i];
if part.starts_with('-') {
i += 1;
if matches!(part, "-p" | "-a" | "-d" | "-n" | "-t" | "-u") {
i += 1;
}
} else {
break;
}
}
while i < parts.len() {
let var_name = parts[i].trim_end_matches(';');
if var_name
.chars()
.next()
.is_some_and(|c| c.is_alphabetic() || c == '_')
&& var_name.chars().all(|c| c.is_alphanumeric() || c == '_')
{
vars.push(var_name.to_string());
i += 1;
} else {
break;
}
}
}
vars
}
pub struct Patterns {
pub assign: Regex,
pub use_: Regex,
pub for_loop: Regex,
pub c_style_for: Regex,
pub case_expr: Regex,
}
#[allow(clippy::unwrap_used)] pub fn create_patterns() -> Patterns {
Patterns {
assign: Regex::new(
r"^\s*(?:(?:local|readonly|export|declare|typeset)(?:\s+-[a-zA-Z]+)?\s+)?([A-Za-z_][A-Za-z0-9_]*)=",
).unwrap(),
use_: Regex::new(r"\$\{?([A-Za-z_][A-Za-z0-9_]*)\}?").unwrap(),
for_loop: Regex::new(r"\bfor\s+([A-Za-z_][A-Za-z0-9_]*)\s+in\b").unwrap(),
c_style_for: Regex::new(r"\bfor\s*\(\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap(),
case_expr: Regex::new(r"\bcase\s+\$\{?([A-Za-z_][A-Za-z0-9_]*)\}?\s+in\b").unwrap(),
}
}
#[allow(clippy::unwrap_used)] pub fn collect_variable_info(
source: &str,
patterns: &Patterns,
) -> (HashSet<String>, Vec<(String, usize, usize)>) {
let mut assigned: HashSet<String> = HashSet::new();
let mut used_vars: Vec<(String, usize, usize)> = Vec::new();
let has_sources = has_source_commands(source);
for (line_num, line) in source.lines().enumerate() {
let line_num = line_num + 1;
if is_comment_line(line) {
continue;
}
for cap in patterns.assign.captures_iter(line) {
assigned.insert(cap.get(1).unwrap().as_str().to_string());
}
for cap in patterns.for_loop.captures_iter(line) {
assigned.insert(cap.get(1).unwrap().as_str().to_string());
}
for cap in patterns.c_style_for.captures_iter(line) {
assigned.insert(cap.get(1).unwrap().as_str().to_string());
}
for cap in patterns.case_expr.captures_iter(line) {
assigned.insert(cap.get(1).unwrap().as_str().to_string());
}
for var in extract_read_variables(line) {
assigned.insert(var);
}
for cap in patterns.use_.captures_iter(line) {
let var_name = cap.get(1).unwrap().as_str();
let full_match = cap.get(0).unwrap();
let col = full_match.start() + 1;
if is_parameter_expansion_with_operator(line, full_match.end()) {
continue;
}
if has_sources && is_uppercase_var(var_name) {
continue;
}
used_vars.push((var_name.to_string(), line_num, col));
}
}
(assigned, used_vars)
}
pub fn find_undefined_variables(
assigned: &HashSet<String>,
used_vars: &[(String, usize, usize)],
builtins: &HashSet<&str>,
) -> Vec<(String, usize, usize)> {
let mut undefined = Vec::new();
for (var_name, line_num, col) in used_vars {
if assigned.contains(var_name) {
continue;
}
if is_special_or_builtin(var_name, builtins) {
continue;
}
undefined.push((var_name.clone(), *line_num, *col));
}
undefined
}
#[cfg(test)]
#[path = "sc2154_logic_tests_get_builtins.rs"]
mod tests_extracted;