use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphStore;
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::OnceLock;
use tracing::info;
static GLOBAL_PATTERN: OnceLock<Regex> = OnceLock::new();
static VAR_NAME: OnceLock<Regex> = OnceLock::new();
fn global_pattern() -> &'static Regex {
GLOBAL_PATTERN.get_or_init(|| {
Regex::new(r"^(var\s+\w+\s*=|let\s+\w+\s*=|global\s+\w+|\w+\s*=\s*[^=])").expect("valid regex")
})
}
fn var_name_pattern() -> &'static Regex {
VAR_NAME.get_or_init(|| Regex::new(r"^(?:var|let|global)\s+(\w+)").expect("valid regex"))
}
pub struct GlobalVariablesDetector {
repository_path: PathBuf,
max_findings: usize,
}
impl GlobalVariablesDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 50,
}
}
fn extract_var_name(line: &str) -> Option<String> {
if let Some(caps) = var_name_pattern().captures(line.trim()) {
return caps.get(1).map(|m| m.as_str().to_string());
}
if line.trim().starts_with("global ") {
return line
.trim()
.strip_prefix("global ")
.and_then(|s| s.split_whitespace().next())
.map(|s| s.to_string());
}
None
}
fn count_usages(&self, content: &str, var_name: &str, declaration_line: usize) -> usize {
let mut count = 0;
let var_pattern = format!(r"\b{}\b", regex::escape(var_name));
if let Ok(re) = Regex::new(&var_pattern) {
for (i, line) in content.lines().enumerate() {
if i == declaration_line - 1 {
continue;
} if re.is_match(line) {
count += 1;
}
}
}
count
}
fn check_cross_module_usage(
&self,
graph: &dyn crate::graph::GraphQuery,
file_path: &str,
_var_name: &str,
) -> bool {
let file_name = file_path.rsplit('/').next().unwrap_or(file_path);
let module_name = file_name.split('.').next().unwrap_or("");
for (_, import_target) in graph.get_imports() {
if import_target.contains(module_name) {
return true;
}
}
false
}
fn create_finding(
&self,
path: &std::path::Path,
line: usize,
var_name: &str,
usage_count: usize,
is_cross_module: bool,
) -> Finding {
let severity = if is_cross_module && usage_count > 5 {
Severity::High } else if is_cross_module || usage_count > 10 {
Severity::Medium
} else {
Severity::Low
};
let mut notes = Vec::new();
if usage_count > 0 {
notes.push(format!("📊 Used {} times in this file", usage_count));
}
if is_cross_module {
notes.push("⚠️ Module is imported by others - global may leak".to_string());
}
let context_notes = if notes.is_empty() {
String::new()
} else {
format!("\n\n**Impact Analysis:**\n{}", notes.join("\n"))
};
let suggestion = if is_cross_module {
let capitalized = format!(
"{}{}",
var_name
.chars()
.next()
.map(|c| c.to_uppercase().to_string())
.unwrap_or_default(),
var_name.chars().skip(1).collect::<String>()
);
format!(
"Cross-module globals are especially dangerous. Consider:\n\
1. Export a getter/setter function instead: `get{0}()`, `set{0}()`\n\
2. Encapsulate in a class with controlled access\n\
3. Use dependency injection",
capitalized
)
} else if usage_count > 5 {
"Many usages - consider:\n\
1. Encapsulate in a module with getter/setter\n\
2. Convert to a class instance\n\
3. Pass as parameter instead"
.to_string()
} else {
"Use const if immutable, or encapsulate in a module/class.".to_string()
};
Finding {
id: String::new(),
detector: "GlobalVariablesDetector".to_string(),
severity,
title: format!("Global mutable variable: {}", var_name),
description: format!(
"Global mutable state '{}' makes code hard to reason about.{}",
var_name, context_notes
),
affected_files: vec![path.to_path_buf()],
line_start: Some(line as u32),
line_end: Some(line as u32),
suggested_fix: Some(suggestion),
estimated_effort: Some(if is_cross_module {
"30 minutes".to_string()
} else {
"15 minutes".to_string()
}),
category: Some("code-quality".to_string()),
cwe_id: None,
why_it_matters: Some(
"Global state causes hidden dependencies between functions. \
Changes to globals can have unexpected effects throughout the codebase."
.to_string(),
),
..Default::default()
}
}
}
impl Detector for GlobalVariablesDetector {
fn name(&self) -> &'static str {
"global-variables"
}
fn description(&self) -> &'static str {
"Detects mutable global variables"
}
fn detect(&self, graph: &dyn crate::graph::GraphQuery) -> Result<Vec<Finding>> {
let mut findings = vec![];
let walker = ignore::WalkBuilder::new(&self.repository_path)
.hidden(false)
.git_ignore(true)
.build();
for entry in walker.filter_map(|e| e.ok()) {
if findings.len() >= self.max_findings {
break;
}
let path = entry.path();
if !path.is_file() {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !matches!(ext, "py" | "js" | "ts") {
continue;
}
let path_str = path.to_string_lossy().to_string();
if crate::detectors::content_classifier::is_likely_bundled_path(&path_str) {
continue;
}
if let Some(content) = crate::cache::global_cache().get_content(path) {
if crate::detectors::content_classifier::is_bundled_code(&content)
|| crate::detectors::content_classifier::is_minified_code(&content)
|| crate::detectors::content_classifier::is_fixture_code(&path_str, &content)
{
continue;
}
let mut py_indent_stack: Vec<usize> = Vec::new(); let mut py_in_function = false;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if ext == "py" {
let indent = line.len() - line.trim_start().len();
if !trimmed.is_empty() {
py_indent_stack.retain(|&lvl| lvl < indent);
py_in_function = !py_indent_stack.is_empty();
}
if trimmed.starts_with("def ") || trimmed.starts_with("async def ") {
py_indent_stack.push(indent);
py_in_function = true;
}
}
if trimmed.starts_with("const ")
|| trimmed.starts_with("import ")
|| trimmed.starts_with("from ")
|| trimmed.starts_with("class ")
|| trimmed.starts_with("#")
|| trimmed.starts_with("//")
|| trimmed.is_empty()
|| trimmed.starts_with("export ")
{
continue;
}
let is_global = if ext == "py" {
py_in_function && trimmed.starts_with("global ")
} else {
let at_module_scope = !line.starts_with(' ') && !line.starts_with('\t');
at_module_scope
&& (trimmed.starts_with("var ") || trimmed.starts_with("let "))
};
if is_global {
if let Some(var_name) = Self::extract_var_name(trimmed) {
let usage_count = self.count_usages(&content, &var_name, i + 1);
let is_cross_module =
self.check_cross_module_usage(graph, &path_str, &var_name);
findings.push(self.create_finding(
path,
i + 1,
&var_name,
usage_count,
is_cross_module,
));
}
}
}
}
}
info!(
"GlobalVariablesDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}