//! Global Variables Detector
//!
//! Graph-enhanced detection of mutable global variables.
//! Uses graph to:
//! - Count how many functions read/write the global (impact analysis)
//! - Detect cross-module usage (higher severity)
//! - Find potential encapsulation points
use crate::detectors::base::{Detector, DetectorConfig};
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::LazyLock;
use tracing::info;
#[allow(dead_code)] // Used by GLOBAL_PATTERN
static GLOBAL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^(var\s+\w+\s*=|let\s+\w+\s*=|global\s+\w+|\w+\s*=\s*[^=])").expect("valid regex")
});
static VAR_NAME: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(?:var|let|global)\s+(\w+)").expect("valid regex"));
pub struct GlobalVariablesDetector {
#[allow(dead_code)] // Part of detector pattern, used for file scanning
repository_path: PathBuf,
max_findings: usize,
}
impl GlobalVariablesDetector {
crate::detectors::detector_new!(50);
/// Extract variable name from declaration
fn extract_var_name(line: &str) -> Option<String> {
let trimmed = line.trim();
// Handle global object property assignments
for prefix in &["window.", "globalThis.", "global.", "self."] {
if let Some(rest) = trimmed.strip_prefix(prefix) {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
return Some(name);
}
}
}
if let Some(caps) = VAR_NAME.captures(trimmed) {
return caps.get(1).map(|m| m.as_str().to_string());
}
// Handle Python global statement
if line.trim().starts_with("global ") {
return line
.trim()
.strip_prefix("global ")
.and_then(|s| s.split_whitespace().next())
.map(|s| s.to_string());
}
None
}
/// Count how many functions in the file reference this variable
fn count_usages(&self, content: &str, var_name: &str, declaration_line: usize) -> usize {
let mut count = 0;
let var_pattern = format!(r"\b{}\b", regex::escape(var_name));
if let Ok(re) = Regex::new(&var_pattern) {
for (i, line) in content.lines().enumerate() {
if i == declaration_line - 1 {
continue;
} // Skip declaration
if re.is_match(line) {
count += 1;
}
}
}
count
}
/// Check if variable is used by functions in other files
fn check_cross_module_usage(
&self,
graph: &dyn crate::graph::GraphQuery,
file_path: &str,
_var_name: &str,
) -> bool {
// Check if any function in other files might reference this
// This is heuristic - we check if the file is imported by others
let file_name = file_path.rsplit('/').next().unwrap_or(file_path);
let module_name = file_name.split('.').next().unwrap_or("");
// Look for imports of this module
let gi = graph.interner();
for &(_, dst_idx) in graph.all_import_edges() {
if let Some(dst_node) = graph.node_idx(dst_idx) {
if dst_node.qn(gi).contains(module_name) {
return true;
}
}
}
false
}
/// Count how many times a variable is reassigned after its declaration line.
/// Returns 0 if never reassigned (effectively const).
fn count_reassignments(content: &str, var_name: &str, decl_line_idx: usize) -> usize {
let escaped = regex::escape(var_name);
// Match: varName++ / varName-- (increment/decrement), or
// varName = / varName += / varName -= etc (assignment, but not == or ===)
// Note: Rust's regex crate does not support lookahead, so we use [^=] instead.
let pattern = format!(r"\b{}\s*(?:\+\+|--|[+\-*/&|^%]?=[^=])", escaped);
let re = match Regex::new(&pattern) {
Ok(r) => r,
Err(_) => return 0,
};
content
.lines()
.enumerate()
.filter(|(idx, line)| *idx != decl_line_idx && re.is_match(line))
.count()
}
fn create_finding(
&self,
path: &std::path::Path,
line: usize,
var_name: &str,
usage_count: usize,
is_cross_module: bool,
) -> Finding {
// Calculate severity based on impact
let severity = if is_cross_module && usage_count > 5 {
Severity::High // Cross-module globals with many usages are dangerous
} else if is_cross_module || usage_count > 10 {
Severity::Medium
} else {
Severity::Low
};
let mut notes = Vec::new();
if usage_count > 0 {
notes.push(format!("📊 Used {} times in this file", usage_count));
}
if is_cross_module {
notes.push("⚠️ Module is imported by others - global may leak".to_string());
}
let context_notes = if notes.is_empty() {
String::new()
} else {
format!("\n\n**Impact Analysis:**\n{}", notes.join("\n"))
};
let suggestion = if is_cross_module {
let capitalized = format!(
"{}{}",
var_name
.chars()
.next()
.map(|c| c.to_uppercase().to_string())
.unwrap_or_default(),
var_name.chars().skip(1).collect::<String>()
);
format!(
"Cross-module globals are especially dangerous. Consider:\n\
1. Export a getter/setter function instead: `get{0}()`, `set{0}()`\n\
2. Encapsulate in a class with controlled access\n\
3. Use dependency injection",
capitalized
)
} else if usage_count > 5 {
"Many usages - consider:\n\
1. Encapsulate in a module with getter/setter\n\
2. Convert to a class instance\n\
3. Pass as parameter instead"
.to_string()
} else {
"Use const if immutable, or encapsulate in a module/class.".to_string()
};
Finding {
id: String::new(),
detector: "GlobalVariablesDetector".to_string(),
severity,
title: format!("Global mutable variable: {}", var_name),
description: format!(
"Global mutable state '{}' makes code hard to reason about.{}",
var_name, context_notes
),
affected_files: vec![path.to_path_buf()],
line_start: Some(line as u32),
line_end: Some(line as u32),
suggested_fix: Some(suggestion),
estimated_effort: Some(if is_cross_module {
"30 minutes".to_string()
} else {
"15 minutes".to_string()
}),
category: Some("code-quality".to_string()),
cwe_id: None,
why_it_matters: Some(
"Global state causes hidden dependencies between functions. \
Changes to globals can have unexpected effects throughout the codebase."
.to_string(),
),
..Default::default()
}
}
}
impl Detector for GlobalVariablesDetector {
fn name(&self) -> &'static str {
"global-variables"
}
fn description(&self) -> &'static str {
"Detects mutable global variables"
}
fn file_extensions(&self) -> &'static [&'static str] {
&["py", "js", "ts", "jsx", "tsx", "mjs", "mts"]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let mut findings = vec![];
let mut seen_globals: HashSet<(PathBuf, String)> = HashSet::new();
for path in files.files_with_extensions(&["py", "js", "ts", "jsx", "tsx", "mjs", "mts"]) {
if findings.len() >= self.max_findings {
break;
}
let path_str = path.to_string_lossy().to_string();
// Skip bundled/generated code (path-based fallback first since we load content anyway)
if crate::detectors::content_classifier::is_likely_bundled_path(&path_str) {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if let Some(content) = files.content(path) {
// Skip bundled/generated code (content-based detection)
if crate::detectors::content_classifier::is_bundled_code(&content)
|| crate::detectors::content_classifier::is_minified_code(&content)
|| crate::detectors::content_classifier::is_fixture_code(&path_str, &content)
{
continue;
}
let is_module = matches!(ext, "ts" | "tsx" | "mts" | "mjs")
|| content.lines().any(|l| {
let t = l.trim();
((t.starts_with("import ") || t.starts_with("import{"))
&& !t.starts_with("import("))
|| t.starts_with("export ")
|| t.starts_with("export{")
});
// Track Python function scope with indentation depth
let mut py_indent_stack: Vec<usize> = Vec::new(); // indent levels of open def blocks
let mut py_in_function = false;
let mut in_docstring = false;
let all_lines: Vec<&str> = content.lines().collect();
for (i, line) in all_lines.iter().enumerate() {
let trimmed = line.trim();
// Track Python triple-quoted strings (docstrings)
if ext == "py" {
let triple_double = trimmed.matches("\"\"\"").count();
let triple_single = trimmed.matches("'''").count();
let triple_count = triple_double + triple_single;
if triple_count % 2 != 0 {
in_docstring = !in_docstring;
}
if in_docstring {
continue;
}
}
// --- Python scope tracking via indentation ---
if ext == "py" {
let indent = line.len() - line.trim_start().len();
if !trimmed.is_empty() {
// Pop any indent levels that are >= current indent (we've left those blocks)
py_indent_stack.retain(|&lvl| lvl < indent);
py_in_function = !py_indent_stack.is_empty();
}
if trimmed.starts_with("def ") || trimmed.starts_with("async def ") {
py_indent_stack.push(indent);
py_in_function = true;
}
}
// Skip constants, imports, classes
if trimmed.starts_with("const ")
|| trimmed.starts_with("import ")
|| trimmed.starts_with("from ")
|| trimmed.starts_with("class ")
|| trimmed.starts_with("#")
|| trimmed.starts_with("//")
|| trimmed.is_empty()
|| trimmed.starts_with("export ")
{
continue;
}
// Check for global assignment
let is_global = if ext == "py" {
// Only flag explicit `global varname` statements that are INSIDE functions
// (that's their purpose: declaring a global from within a function)
py_in_function && trimmed.starts_with("global ")
} else if is_module {
// ES modules: only flag true globals that escape module scope
let is_true_global = trimmed.starts_with("window.")
|| trimmed.starts_with("globalThis.")
|| trimmed.starts_with("global.")
|| trimmed.starts_with("self.");
is_true_global && trimmed.contains('=') && !trimmed.contains("==")
} else {
// Script mode: current behavior
let at_module_scope = !line.starts_with(' ') && !line.starts_with('\t');
let is_require = trimmed.contains("require(");
at_module_scope
&& (trimmed.starts_with("var ") || trimmed.starts_with("let "))
&& !is_require
};
if is_global {
let prev_line = if i > 0 { Some(all_lines[i - 1]) } else { None };
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
if let Some(var_name) = Self::extract_var_name(trimmed) {
let key = (path.to_path_buf(), var_name.clone());
if seen_globals.contains(&key) {
continue;
}
seen_globals.insert(key);
// Skip effectively-constant variables (assigned once, never reassigned)
// Only for JS/TS script mode — Python's `global` keyword already implies intentional mutation
// For module-mode true globals (window.x etc), always flag — they're mutable by nature
if ext != "py" && !is_module {
let reassignments =
Self::count_reassignments(&content, &var_name, i);
if reassignments == 0 {
continue;
}
}
let usage_count = self.count_usages(&content, &var_name, i + 1);
let is_cross_module =
self.check_cross_module_usage(graph, &path_str, &var_name);
findings.push(self.create_finding(
path,
i + 1,
&var_name,
usage_count,
is_cross_module,
));
}
}
}
}
}
info!(
"GlobalVariablesDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for GlobalVariablesDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_detects_global_statement_in_python() {
// Python: `global counter` inside a function body triggers detection
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"state.py",
"counter = 0\n\ndef increment():\n global counter\n counter += 1\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect 'global counter' inside function"
);
assert!(
findings[0].title.contains("counter"),
"Title should mention variable name, got: {}",
findings[0].title
);
}
#[test]
fn test_no_finding_for_global_in_docstring() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("widgets.py", "class Widget:\n def merge(self):\n \"\"\"\n global or in CSS you might want to override a style.\n \"\"\"\n pass\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag 'global' in docstring. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_dedup_same_variable_across_functions() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("trans.py", "def func_a():\n global _default\n _default = get_default()\n\ndef func_b():\n global _default\n return _default\n\ndef func_c():\n global _default\n _default = None\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert_eq!(
findings.len(),
1,
"Should deduplicate same variable across functions. Found {} findings",
findings.len()
);
}
#[test]
fn test_no_finding_for_local_variables() {
// No `global` statement, no module-level mutable var
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"clean.py",
"def compute(x):\n result = x * 2\n return result\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag local variables in functions, but got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_single_assignment_not_flagged() {
// var app = express() — assigned once, never reassigned → should NOT be flagged
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"test.js",
"var app = express();\napp.get('/', handler);\napp.listen(3000);",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
findings.is_empty(),
"Single-assignment var should not be flagged, got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_reassigned_variable_flagged() {
// var count = 0; count++ — genuinely mutable → should be flagged
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("test.js", "var count = 0;\ncount++;\nconsole.log(count);")],
);
let findings = detector.detect(&ctx).unwrap();
assert!(!findings.is_empty(), "Reassigned var should be flagged");
}
#[test]
fn test_no_finding_for_module_scoped_let() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"state.js",
"import { something } from './other';\nlet currentAuth = null;\ncurrentAuth = getAuth();\nexport function getState() { return currentAuth; }\n",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
findings.is_empty(),
"Module-scoped let should not be flagged, got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_ts_module_scoped_let() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"auth.ts",
"let currentAuth: Auth | null = null;\ncurrentAuth = login();\nexport function getAuth() { return currentAuth; }\n",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
findings.is_empty(),
"TS module-scoped let should not be flagged"
);
}
#[test]
fn test_flags_window_global_in_module() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"globals.js",
"import { x } from './y';\nwindow.myGlobal = 123;\n",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
!findings.is_empty(),
"window.x assignment should be flagged in module"
);
assert!(findings[0].title.contains("myGlobal"));
}
#[test]
fn test_still_flags_script_mode_var() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("script.js", "var count = 0;\ncount++;\nconsole.log(count);")],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
!findings.is_empty(),
"Script-mode var should still be flagged"
);
}
#[test]
fn test_flags_globalthis_in_module() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"config.js",
"import { defaults } from './defaults';\nglobalThis.appConfig = defaults;\n",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
!findings.is_empty(),
"globalThis.x assignment should be flagged"
);
assert!(findings[0].title.contains("appConfig"));
}
#[test]
fn test_no_finding_for_mjs_module_scoped_let() {
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("state.mjs", "let x = 1;\nx = 2;\nconsole.log(x);\n")],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
findings.is_empty(),
".mjs module-scoped let should not be flagged"
);
}
#[test]
fn repro_fp6_auth_ts_module_scoped_let() {
// Real FP: let currentAuth in auth.ts — module-scoped let in TS file
// with imports/exports. Should not be flagged.
let store = GraphBuilder::new().freeze();
let detector = GlobalVariablesDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"lib/auth.ts",
"import { deriveDeviceKey } from \"./crypto\";\nimport { putKey, getKey } from \"./db\";\n\nexport interface AuthState {\n deviceKey: Uint8Array;\n userId: string;\n}\n\nlet currentAuth: AuthState | null = null;\n\nexport function getAuth(): AuthState | null {\n return currentAuth;\n}\n\nexport function clearAuth(): void {\n if (currentAuth) {\n currentAuth = null;\n }\n}\n\nexport async function login(password: string): Promise<AuthState> {\n currentAuth = { deviceKey: new Uint8Array(32), userId: \"abc\" };\n return currentAuth;\n}\n",
)],
);
let findings = detector.detect(&ctx).unwrap();
assert!(
findings.is_empty(),
"FP #6: module-scoped let in .ts with imports should NOT be flagged, got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
}