use std::sync::OnceLock;
use rustc_hash::FxHashMap;
use fallow_types::extract::{
ModuleInfo, SanitizedSinkArg, SanitizerScope, SinkSite, TaintedBinding,
};
use fallow_types::output::{IssueAction, SuppressFileAction, SuppressFileKind};
use fallow_types::results::{
SecurityCandidate, SecurityCandidateBoundary, SecurityCandidateSink, SecurityFinding,
SecurityFindingKind, SecurityNetworkContext, SecuritySeverity, TraceHop, TraceHopRole,
};
use fallow_types::suppress::IssueKind;
use super::catalogue::{Matcher, catalogue};
use super::{LineOffsetsMap, byte_offset_to_line_col};
use crate::discover::FileId;
use crate::graph::ModuleGraph;
use crate::suppress::SuppressionContext;
pub(super) const SUPPRESS_KIND: &str = "security-sink";
#[derive(Debug, Default, Clone)]
pub struct CategoryFilter {
include: Option<Vec<String>>,
exclude: Option<Vec<String>>,
}
impl CategoryFilter {
#[must_use]
pub fn new(include: Option<Vec<String>>, exclude: Option<Vec<String>>) -> Self {
Self { include, exclude }
}
#[must_use]
pub fn admits(&self, id: &str) -> bool {
if let Some(include) = &self.include
&& !include.iter().any(|c| c == id)
{
return false;
}
if let Some(exclude) = &self.exclude
&& exclude.iter().any(|c| c == id)
{
return false;
}
true
}
#[must_use]
pub fn explicitly_admits(&self, id: &str) -> bool {
let Some(include) = &self.include else {
return false;
};
if !include.iter().any(|c| c == id) {
return false;
}
if let Some(exclude) = &self.exclude
&& exclude.iter().any(|c| c == id)
{
return false;
}
true
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct TaintedSinkStats {
pub sinks_skipped_dynamic_callee: usize,
}
pub(super) fn build_actions() -> Vec<IssueAction> {
vec![IssueAction::SuppressFile(SuppressFileAction {
kind: SuppressFileKind::SuppressFile,
auto_fixable: false,
description: "Suppress with a file-level comment at the top of the file".to_string(),
comment: format!("// fallow-ignore-file {SUPPRESS_KIND}"),
})]
}
fn provenance_satisfied(matcher: &Matcher, module: &ModuleInfo, callee_path: &str) -> bool {
let Some(spec) = &matcher.import_provenance else {
return true;
};
let leading_ident = callee_path.split('.').next().unwrap_or(callee_path);
let want_binding_trace = matches!(
matcher.id.as_str(),
"command-injection"
| "permissive-cors"
| "electron-unsafe-webpreferences"
| "insecure-temp-file"
| "jwt-alg-none"
| "jwt-verify-missing-algorithms"
| "tls-validation-disabled"
| "mysql-multiple-statements"
| "world-writable-permission"
) || (matcher.id == "weak-crypto" && matcher.is_literal_aware());
module.imports.iter().any(|imp| {
let source_matches = import_source_matches(&imp.source, spec);
if !source_matches {
return false;
}
if want_binding_trace {
imp.local_name == leading_ident
} else {
true
}
})
}
fn import_source_matches(source: &str, spec: &str) -> bool {
fn strip_node_prefix(value: &str) -> &str {
value.strip_prefix("node:").unwrap_or(value)
}
let source = strip_node_prefix(source);
let spec = strip_node_prefix(spec);
source == spec
|| source
.strip_prefix(spec)
.is_some_and(|rest| rest.starts_with('/'))
}
fn production_exclude_globset() -> &'static globset::GlobSet {
static SET: OnceLock<globset::GlobSet> = OnceLock::new();
SET.get_or_init(|| {
let mut builder = globset::GlobSetBuilder::new();
for pattern in crate::discover::PRODUCTION_EXCLUDE_PATTERNS {
if let Ok(glob) = globset::GlobBuilder::new(pattern)
.literal_separator(true)
.build()
{
builder.add(glob);
}
}
builder
.build()
.unwrap_or_else(|_| globset::GlobSet::empty())
})
}
pub(super) fn is_low_value_anchor(path: &std::path::Path) -> bool {
let normalized = path.to_string_lossy().replace('\\', "/");
production_exclude_globset().is_match(&normalized)
|| crate::analyze::predicates::is_config_file(path)
}
fn source_tainted_locals<'b>(
bindings: &'b [TaintedBinding],
declared_deps: &rustc_hash::FxHashSet<String>,
) -> FxHashMap<&'b str, (&'static str, &'static str, u32)> {
let cat = catalogue();
let mut out: FxHashMap<&'b str, (&'static str, &'static str, u32)> = FxHashMap::default();
for b in bindings {
if let Some((id, title)) = cat.matching_source_for_deps(&b.source_path, declared_deps) {
out.entry(b.local.as_str())
.or_insert((id, title, b.source_span_start));
}
}
out
}
fn is_html_sanitizable_category(id: &str) -> bool {
matches!(id, "dangerous-html" | "dom-document-write" | "jquery-html")
}
fn is_url_sanitizable_category(id: &str) -> bool {
matches!(id, "open-redirect" | "nextjs-open-redirect" | "ssrf")
}
fn is_path_sanitizable_category(id: &str) -> bool {
matches!(id, "path-traversal" | "route-send-file" | "zip-slip")
}
fn has_direct_sanitizer(sink: &SinkSite, args: &[SanitizedSinkArg], scope: SanitizerScope) -> bool {
args.iter().any(|arg| {
arg.span_start == sink.span_start && arg.arg_index == sink.arg_index && arg.scope == scope
})
}
fn sink_has_sanitizer(module: &ModuleInfo, sink: &SinkSite, scope: SanitizerScope) -> bool {
has_direct_sanitizer(sink, &module.sanitized_sink_args, scope)
}
fn has_direct_html_sanitizer(sink: &SinkSite, args: &[SanitizedSinkArg]) -> bool {
args.iter().any(|arg| {
arg.span_start == sink.span_start
&& arg.arg_index == sink.arg_index
&& arg.scope == SanitizerScope::Html
})
}
fn sink_has_html_sanitizer(module: &ModuleInfo, sink: &SinkSite) -> bool {
has_direct_html_sanitizer(sink, &module.sanitized_sink_args)
}
fn sink_source<'t>(
sink: &SinkSite,
tainted: &FxHashMap<&str, (&'t str, &'t str, u32)>,
declared_deps: &rustc_hash::FxHashSet<String>,
) -> Option<(&'t str, &'t str, Option<u32>)> {
let cat = catalogue();
if let Some((id, title)) = sink
.arg_source_paths
.iter()
.find_map(|path| cat.matching_source_for_deps(path, declared_deps))
{
return Some((id, title, None));
}
if !tainted.is_empty()
&& let Some((id, title, span)) = sink
.arg_idents
.iter()
.find_map(|name| tainted.get(name.as_str()).copied())
{
return Some((id, title, Some(span)));
}
None
}
fn matcher_admits_sink(matcher: &Matcher, sink: &SinkSite, source: Option<(&str, &str)>) -> bool {
matcher.sink_shape == sink.sink_shape
&& matcher.arg_index == sink.arg_index
&& (sink.arg_is_non_literal || matcher.is_literal_aware())
&& matcher.admits_arg_kind(sink.arg_kind)
&& matcher.literal_value_satisfied(sink.arg_literal.as_ref())
&& matcher.object_properties_satisfied(&sink.object_properties)
&& matcher.object_missing_satisfied(
&sink.object_property_keys,
sink.object_property_keys_complete,
)
&& matcher.context_satisfied(&sink.arg_idents)
&& (!matcher.requires_source || source.is_some())
&& (matcher.requires_source_kinds.is_empty()
|| source.is_some_and(|(id, _)| matcher.requires_source_kinds.iter().any(|k| k == id)))
&& matcher.first_matching_pattern(&sink.callee_path).is_some()
}
pub(super) const NETWORK_EXFIL_CATEGORY: &str = "secret-to-network";
pub(super) const INCLUDE_REQUIRED_CATEGORIES: &[&str] = &[NETWORK_EXFIL_CATEGORY];
fn is_include_required_category(id: &str) -> bool {
INCLUDE_REQUIRED_CATEGORIES.contains(&id)
}
#[must_use]
pub fn find_tainted_sinks(
graph: &ModuleGraph,
modules: &[ModuleInfo],
suppressions: &SuppressionContext<'_>,
line_offsets_by_file: &LineOffsetsMap<'_>,
category_filter: &CategoryFilter,
declared_deps: &rustc_hash::FxHashSet<String>,
root: &std::path::Path,
) -> (Vec<SecurityFinding>, TaintedSinkStats) {
let mut stats = TaintedSinkStats::default();
let active: Vec<&Matcher> = catalogue()
.matchers()
.iter()
.filter(|m| {
let admitted = if is_include_required_category(&m.id) {
category_filter.explicitly_admits(&m.id)
} else {
category_filter.admits(&m.id)
};
admitted && m.enabler_satisfied(declared_deps)
})
.collect();
if active.is_empty() {
return (Vec::new(), stats);
}
let modules_by_id: FxHashMap<FileId, &ModuleInfo> =
modules.iter().map(|m| (m.file_id, m)).collect();
let mut findings = Vec::new();
for node in &graph.modules {
let Some(module) = modules_by_id.get(&node.file_id) else {
continue;
};
stats.sinks_skipped_dynamic_callee += module.security_sinks_skipped as usize;
if module.security_sinks.is_empty() {
continue;
}
let rel_path = node.path.strip_prefix(root).unwrap_or(&node.path);
if is_low_value_anchor(rel_path) {
continue;
}
let file_id = node.file_id;
if suppressions.is_file_suppressed(file_id, IssueKind::SecuritySink) {
continue;
}
let tainted_locals = source_tainted_locals(&module.tainted_bindings, declared_deps);
for sink in &module.security_sinks {
let source = sink_source(sink, &tainted_locals, declared_deps);
let source_id = source.map(|(id, title, _)| (id, title));
let Some(matcher) = active.iter().copied().find(|m| {
matcher_admits_sink(m, sink, source_id)
&& provenance_satisfied(m, module, &sink.callee_path)
}) else {
continue;
};
if is_html_sanitizable_category(&matcher.id) && sink_has_html_sanitizer(module, sink) {
continue;
}
if is_url_sanitizable_category(&matcher.id)
&& sink_has_sanitizer(module, sink, SanitizerScope::Url)
{
continue;
}
if is_path_sanitizable_category(&matcher.id)
&& sink_has_sanitizer(module, sink, SanitizerScope::Path)
{
continue;
}
let (line, col) =
byte_offset_to_line_col(line_offsets_by_file, file_id, sink.span_start);
if suppressions.is_suppressed(file_id, line, IssueKind::SecuritySink) {
continue;
}
let pattern = matcher
.first_matching_pattern(&sink.callee_path)
.map_or("", super::catalogue::CalleePattern::raw);
#[expect(
clippy::literal_string_with_formatting_args,
reason = "catalogue evidence placeholders, not format args"
)]
let base_evidence = matcher
.evidence_template
.replace("{callee}", &sink.callee_path)
.replace("{pattern}", pattern)
.replace(
"{regex}",
sink.regex_pattern.as_deref().unwrap_or("unknown"),
);
let source_backed = source.is_some();
let evidence = match source {
Some((_, title, _)) => format!(
"Untrusted source reaches this sink (an argument traces to {}). {base_evidence}",
title.to_ascii_lowercase()
),
None => base_evidence,
};
let source_read = source.map(|(_, _, span)| match span {
Some(offset) if offset != 0 => {
byte_offset_to_line_col(line_offsets_by_file, file_id, offset)
}
_ => (line, col),
});
let network = (matcher.id == NETWORK_EXFIL_CATEGORY).then(|| SecurityNetworkContext {
destination: sink.url_arg_literal.clone(),
});
let candidate = SecurityCandidate {
source_kind: source.map(|(id, _, _)| id.to_string()),
sink: SecurityCandidateSink {
path: node.path.clone(),
line,
col,
category: Some(matcher.id.clone()),
cwe: Some(matcher.cwe),
callee: Some(sink.callee_path.clone()),
},
boundary: SecurityCandidateBoundary::default(),
network,
};
let path = node.path.clone();
findings.push(SecurityFinding {
finding_id: String::new(),
kind: SecurityFindingKind::TaintedSink,
category: Some(matcher.id.clone()),
cwe: Some(matcher.cwe),
path: path.clone(),
line,
col,
evidence,
source_backed,
source_read,
severity: SecuritySeverity::Low,
trace: vec![TraceHop {
path,
line,
col,
role: TraceHopRole::Sink,
}],
actions: build_actions(),
dead_code: None,
reachability: None,
candidate,
taint_flow: None,
runtime: None,
attack_surface: None,
});
}
}
findings.sort_by(|a, b| {
b.source_backed
.cmp(&a.source_backed)
.then(a.path.cmp(&b.path))
.then(a.line.cmp(&b.line))
.then(a.col.cmp(&b.col))
.then(a.category.cmp(&b.category))
});
(findings, stats)
}
#[cfg(test)]
mod tests {
use super::*;
use rustc_hash::FxHashSet;
#[test]
fn category_filter_default_admits_all() {
let f = CategoryFilter::default();
assert!(f.admits("dangerous-html"));
assert!(f.admits("anything"));
}
#[test]
fn category_filter_include_scopes() {
let f = CategoryFilter::new(Some(vec!["dangerous-html".to_string()]), None);
assert!(f.admits("dangerous-html"));
assert!(!f.admits("sql-injection"));
}
#[test]
fn category_filter_exclude_removes() {
let f = CategoryFilter::new(None, Some(vec!["sql-injection".to_string()]));
assert!(f.admits("dangerous-html"));
assert!(!f.admits("sql-injection"));
}
#[test]
fn import_source_matches_node_prefix() {
assert!(import_source_matches("node:child_process", "child_process"));
assert!(import_source_matches("child_process", "node:child_process"));
assert!(!import_source_matches("child_process", "node:vm"));
}
#[test]
fn import_source_matches_package_subpath() {
assert!(import_source_matches("mysql2/promise", "mysql2"));
assert!(import_source_matches("@scope/pkg/subpath", "@scope/pkg"));
assert!(!import_source_matches("mysql2-promise", "mysql2"));
}
fn binding(local: &str, source_path: &str) -> TaintedBinding {
TaintedBinding {
local: local.to_string(),
source_path: source_path.to_string(),
source_span_start: 0,
}
}
fn sink_with_idents_and_sources(idents: &[&str], source_paths: &[&str]) -> SinkSite {
SinkSite {
sink_shape: fallow_types::extract::SinkShape::Call,
callee_path: "eval".to_string(),
arg_index: 0,
arg_is_non_literal: true,
arg_kind: fallow_types::extract::SinkArgKind::Other,
arg_literal: None,
regex_pattern: None,
object_properties: Vec::new(),
object_property_keys: Vec::new(),
object_property_keys_complete: false,
arg_idents: idents.iter().map(|s| (*s).to_string()).collect(),
arg_source_paths: source_paths.iter().map(|s| (*s).to_string()).collect(),
span_start: 0,
span_end: 1,
url_arg_literal: None,
}
}
fn sink_with_idents(idents: &[&str]) -> SinkSite {
sink_with_idents_and_sources(idents, &[])
}
#[test]
fn source_tainted_locals_match_catalogue_sources() {
let bindings = vec![binding("id", "req.query"), binding("cfg", "config.value")];
let tainted = source_tainted_locals(&bindings, &FxHashSet::default());
assert!(tainted.contains_key("id"));
assert!(!tainted.contains_key("cfg"));
assert_eq!(
tainted.get("id").copied(),
Some(("http-request-input", "HTTP request input", 0))
);
}
#[test]
fn sink_is_source_backed_when_arg_traces_to_source() {
let bindings = vec![binding("id", "req.query")];
let tainted = source_tainted_locals(&bindings, &FxHashSet::default());
assert_eq!(
sink_source(&sink_with_idents(&["id"]), &tainted, &FxHashSet::default()),
Some(("http-request-input", "HTTP request input", Some(0)))
);
assert_eq!(
sink_source(
&sink_with_idents(&["other"]),
&tainted,
&FxHashSet::default(),
),
None
);
}
#[test]
fn sink_not_source_backed_with_no_tainted_locals() {
let tainted = source_tainted_locals(&[], &FxHashSet::default());
assert_eq!(
sink_source(&sink_with_idents(&["id"]), &tainted, &FxHashSet::default()),
None
);
}
#[test]
fn sink_is_source_backed_when_arg_source_path_matches_catalogue() {
let tainted = source_tainted_locals(&[], &FxHashSet::default());
assert_eq!(
sink_source(
&sink_with_idents_and_sources(&["process"], &["process.env.SECRET", "process.env"]),
&tainted,
&FxHashSet::default(),
)
.map(|(_, title, _)| title),
Some("Environment secret")
);
}
#[test]
fn direct_source_path_precedes_broader_tainted_local_source() {
let bindings = vec![binding("req", "framework.request")];
let mut deps = FxHashSet::default();
deps.insert("express".to_string());
let tainted = source_tainted_locals(&bindings, &deps);
assert_eq!(
sink_source(
&sink_with_idents_and_sources(&["req"], &["req.body"]),
&tainted,
&deps,
)
.map(|(_, title, _)| title),
Some("HTTP request input")
);
}
#[test]
fn low_value_anchor_excludes_tests_and_configs() {
use std::path::Path;
assert!(is_low_value_anchor(Path::new("src/foo.test.ts")));
assert!(is_low_value_anchor(Path::new("src/foo.spec.ts")));
assert!(is_low_value_anchor(Path::new("src/Button.stories.tsx")));
assert!(is_low_value_anchor(Path::new("test/helper.ts")));
assert!(is_low_value_anchor(Path::new(
"packages/app/__tests__/x.ts"
)));
assert!(is_low_value_anchor(Path::new("vite.config.ts")));
assert!(is_low_value_anchor(Path::new(
"packages/app/vite.config.ts"
)));
assert!(is_low_value_anchor(Path::new("jest.config.js")));
assert!(!is_low_value_anchor(Path::new("src/sink.ts")));
assert!(!is_low_value_anchor(Path::new("src/db/query.ts")));
assert!(!is_low_value_anchor(Path::new("src/app/app.config.ts")));
}
}