use crate::detectors::base::{Detector, DetectorConfig};
use crate::detectors::taint::{TaintAnalysisResult, TaintAnalyzer, TaintCategory};
use crate::graph::GraphStore;
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::path::PathBuf;
use std::sync::OnceLock;
static FILE_OP: OnceLock<Regex> = OnceLock::new();
static PATH_JOIN: OnceLock<Regex> = OnceLock::new();
static SEND_FILE: OnceLock<Regex> = OnceLock::new();
static PATH_RESOLVE: OnceLock<Regex> = OnceLock::new();
fn file_op() -> &'static Regex {
FILE_OP.get_or_init(|| Regex::new(r"(?i)(open|read|write|readFile|writeFile|readFileSync|writeFileSync|appendFile|createReadStream|createWriteStream|unlink|unlinkSync|remove|rmdir|mkdir|stat|statSync|access|accessSync|copyFile|rename)\s*\(").expect("valid regex"))
}
fn path_join() -> &'static Regex {
PATH_JOIN.get_or_init(|| Regex::new(r"(?i)(os\.path\.join|path\.join|path\.resolve|filepath\.Join|filepath\.Clean|Path\s*\()").expect("valid regex"))
}
fn send_file() -> &'static Regex {
SEND_FILE.get_or_init(|| {
Regex::new(r"(?i)(sendFile|download|serveStatic|send_file|serve_file)\s*\(").expect("valid regex")
})
}
fn path_resolve() -> &'static Regex {
PATH_RESOLVE
.get_or_init(|| Regex::new(r"(?i)(realpath|abspath|normpath|resolve|Clean)\s*\(").expect("valid regex"))
}
pub struct PathTraversalDetector {
repository_path: PathBuf,
max_findings: usize,
taint_analyzer: TaintAnalyzer,
}
impl PathTraversalDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 50,
taint_analyzer: TaintAnalyzer::new(),
}
}
}
impl Detector for PathTraversalDetector {
fn name(&self) -> &'static str {
"path-traversal"
}
fn description(&self) -> &'static str {
"Detects path traversal vulnerabilities"
}
fn detect(&self, graph: &dyn crate::graph::GraphQuery) -> Result<Vec<Finding>> {
let mut findings = vec![];
let walker = ignore::WalkBuilder::new(&self.repository_path)
.hidden(false)
.git_ignore(true)
.build();
let mut taint_paths = self
.taint_analyzer
.trace_taint(graph, TaintCategory::PathTraversal);
let intra_paths = crate::detectors::data_flow::run_intra_function_taint(
&self.taint_analyzer,
graph,
TaintCategory::PathTraversal,
&self.repository_path,
);
taint_paths.extend(intra_paths);
let taint_result = TaintAnalysisResult::from_paths(taint_paths);
for entry in walker.filter_map(|e| e.ok()) {
if findings.len() >= self.max_findings {
break;
}
let path = entry.path();
if !path.is_file() {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !matches!(ext, "py" | "js" | "ts" | "rb" | "php" | "java" | "go") {
continue;
}
let rel_path = path
.strip_prefix(&self.repository_path)
.unwrap_or(path)
.to_path_buf();
let file_str = path.to_string_lossy();
let is_test_file = file_str.contains("/test")
|| file_str.contains("/tests/")
|| file_str.contains("_test.")
|| file_str.contains(".test.")
|| file_str.contains("/spec/")
|| file_str.contains("_spec.");
if let Some(content) = crate::cache::global_cache().get_content(path) {
for (i, line) in content.lines().enumerate() {
let has_user_input = line.contains("req.") || line.contains("request.") ||
line.contains("params[") || line.contains("params.") ||
line.contains("input(") || line.contains("sys.argv") || line.contains("process.argv") ||
line.contains("r.URL") || line.contains("c.Param") || line.contains("c.Query") ||
line.contains("FormValue") || line.contains("r.Form") ||
line.contains("query[") || line.contains("query.get") ||
line.contains("body[") || line.contains("body.get");
let line_num = (i + 1) as u32;
let check_taint = |base_severity: Severity,
base_desc: &str|
-> (Severity, String) {
let adjusted_base = if is_test_file {
match base_severity {
Severity::Critical => Severity::Medium,
Severity::High => Severity::Low,
_ => Severity::Low,
}
} else {
base_severity
};
let matching_taint = taint_result.paths.iter().find(|p| {
(p.sink_file == file_str || p.source_file == file_str)
&& (p.sink_line == line_num || p.source_line == line_num)
});
match matching_taint {
Some(taint_path) if taint_path.is_sanitized => {
(Severity::Low, format!(
"{}\n\n**Taint Analysis Note**: A sanitizer function (`{}`) was found \
in the data flow path, which may mitigate this vulnerability.",
base_desc,
taint_path.sanitizer.as_deref().unwrap_or("unknown")
))
}
Some(taint_path) => {
let sev = if is_test_file { Severity::Medium } else { Severity::Critical };
(sev, format!(
"{}\n\n**Taint Analysis Confirmed**: Data flow analysis traced a path \
from user input to this file sink without sanitization:\n\n`{}`",
base_desc,
taint_path.path_string()
))
}
None => (adjusted_base, base_desc.to_string())
}
};
if file_op().is_match(line) && has_user_input {
let (severity, description) = check_taint(
Severity::High,
"File operation with user-controlled input detected. An attacker could use '../' sequences to access files outside the intended directory."
);
findings.push(Finding {
id: String::new(),
detector: "PathTraversalDetector".to_string(),
severity,
title: "Potential path traversal in file operation".to_string(),
description,
affected_files: vec![rel_path.clone()],
line_start: Some(line_num),
line_end: Some(line_num),
suggested_fix: Some("1. Use path.basename() to extract filename only\n2. Validate resolved path is within allowed directory\n3. Use a whitelist of allowed filenames if possible".to_string()),
estimated_effort: Some("30 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-22".to_string()),
why_it_matters: Some("Attackers could read sensitive files like /etc/passwd or overwrite critical system files.".to_string()),
..Default::default()
});
}
if path_join().is_match(line) && has_user_input {
let (severity, description) = check_taint(
Severity::High,
"path.join() with user input does NOT prevent path traversal. Joining '/base' with '../etc/passwd' results in '/etc/passwd'."
);
findings.push(Finding {
id: String::new(),
detector: "PathTraversalDetector".to_string(),
severity,
title: "Path traversal via path.join with user input".to_string(),
description,
affected_files: vec![rel_path.clone()],
line_start: Some(line_num),
line_end: Some(line_num),
suggested_fix: Some("After joining, verify the resolved path starts with your base directory:\n```\nconst resolved = path.resolve(baseDir, userInput);\nif (!resolved.startsWith(path.resolve(baseDir))) { throw new Error('Invalid path'); }\n```".to_string()),
estimated_effort: Some("30 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-22".to_string()),
why_it_matters: Some("path.join() is commonly misunderstood as safe, but it preserves '../' sequences allowing directory escape.".to_string()),
..Default::default()
});
}
if send_file().is_match(line) && has_user_input {
let (severity, description) = check_taint(
Severity::High,
"File download/send function with user-controlled path. Attackers could download arbitrary files from the server."
);
findings.push(Finding {
id: String::new(),
detector: "PathTraversalDetector".to_string(),
severity,
title: "Path traversal in file download".to_string(),
description,
affected_files: vec![rel_path.clone()],
line_start: Some(line_num),
line_end: Some(line_num),
suggested_fix: Some("Use res.download() with { root: '/safe/base/dir' } option, or validate resolved path is within allowed directory.".to_string()),
estimated_effort: Some("30 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-22".to_string()),
why_it_matters: Some("Attackers could download sensitive configuration files, source code, or credentials from the server.".to_string()),
..Default::default()
});
}
let has_path_concat = (line.contains("+ ")
|| line.contains("f\"")
|| line.contains("f'")
|| line.contains("${")
|| line.contains("fmt.Sprintf"))
&& (line.contains("/") || line.contains("\\\\"))
&& (line.contains("open(")
|| line.contains("read(")
|| line.contains("write("));
if has_path_concat && has_user_input {
let (severity, description) = check_taint(
Severity::High,
"File path constructed via string concatenation with user input. This is vulnerable to directory traversal attacks."
);
findings.push(Finding {
id: String::new(),
detector: "PathTraversalDetector".to_string(),
severity,
title: "Path traversal via string concatenation".to_string(),
description,
affected_files: vec![rel_path.clone()],
line_start: Some(line_num),
line_end: Some(line_num),
suggested_fix: Some("Use secure path functions and validate the final resolved path is within the allowed directory.".to_string()),
estimated_effort: Some("30 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-22".to_string()),
why_it_matters: Some("String concatenation provides no protection against '../' sequences in user input.".to_string()),
..Default::default()
});
}
}
}
}
findings.retain(|f| f.severity != Severity::Low);
Ok(findings)
}
}