#![allow(clippy::only_used_in_recursion, clippy::type_complexity)]
use crate::auth_analysis;
use crate::cfg::{Cfg, FileCfg, FuncSummaries, build_cfg, export_summaries};
use crate::cfg_analysis;
use crate::commands::scan::Diag;
use crate::errors::{NyxError, NyxResult};
use crate::evidence::{Evidence, FlowStep, SpanEvidence, StateEvidence};
use crate::labels::{
Cap, DataLabel, LangAnalysisRules, build_lang_rules, severity_for_source_kind,
};
use crate::patterns::{FindingCategory, Severity};
use crate::state;
use crate::summary::ssa_summary::SsaFuncSummary;
use crate::summary::{FuncSummary, GlobalSummaries};
use crate::symbol::{Lang, normalize_namespace};
use crate::utils::config::AnalysisMode;
use crate::utils::ext::lowercase_ext;
use crate::utils::{Config, query_cache};
use petgraph::graph::NodeIndex;
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::ops::ControlFlow;
use std::path::Path;
use std::time::Instant;
use tree_sitter::{Language, QueryCursor, StreamingIterator};
thread_local! {
static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
static LAST_PARSE_TIMEOUT_MS: std::cell::Cell<Option<u64>> = const {
std::cell::Cell::new(None)
};
}
pub fn take_last_parse_timeout_ms() -> Option<u64> {
LAST_PARSE_TIMEOUT_MS.with(|c| c.take())
}
fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag {
let mut evidence = Evidence::default();
evidence.notes.push(format!(
"tree-sitter parse exceeded timeout budget ({timeout_ms} ms); file skipped"
));
evidence
.engine_notes
.push(crate::engine_notes::EngineNote::ParseTimeout {
timeout_ms: timeout_ms.min(u32::MAX as u64) as u32,
});
Diag {
path: path.to_string_lossy().into_owned(),
line: 0,
col: 0,
severity: Severity::Low,
id: "engine.parse_timeout".into(),
category: FindingCategory::Quality,
path_validated: false,
guard_kind: None,
message: Some(format!(
"tree-sitter parse exceeded timeout budget ({timeout_ms} ms); file skipped"
)),
labels: vec![],
confidence: None,
evidence: Some(evidence),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
}
}
fn parse_timeout_ms() -> u64 {
crate::utils::analysis_options::current().parse_timeout_ms
}
fn maybe_inject_test_panic(path: &Path) {
if let Ok(marker) = std::env::var("NYX_TEST_FORCE_PANIC_PATH")
&& !marker.is_empty()
&& path.to_string_lossy().contains(marker.as_str())
{
panic!(
"NYX_TEST_FORCE_PANIC_PATH injection: {} matches {:?}",
path.display(),
marker
);
}
}
fn byte_offset_to_point(tree: &tree_sitter::Tree, byte: usize) -> tree_sitter::Point {
tree.root_node()
.descendant_for_byte_range(byte, byte)
.map(|n| n.start_position())
.unwrap_or_else(|| tree_sitter::Point { row: 0, column: 0 })
}
use crate::utils::snippet::line_snippet as extract_line_snippet;
fn resolve_file_rel(file_rel: &str, scan_root: Option<&Path>, fallback: &Path) -> String {
if file_rel.is_empty() {
return fallback.to_string_lossy().into_owned();
}
match scan_root {
Some(root) => root.join(file_rel).to_string_lossy().into_owned(),
None => file_rel.to_string(),
}
}
fn build_taint_diag(
finding: &crate::taint::Finding,
cfg_graph: &crate::cfg::Cfg,
tree: &tree_sitter::Tree,
path: &Path,
src: &[u8],
scan_root: Option<&Path>,
) -> Option<Diag> {
let call_site_byte = cfg_graph[finding.sink].classification_span().0;
let call_site_point = byte_offset_to_point(tree, call_site_byte);
let source_info = cfg_graph.node_weight(finding.source);
let source_byte = finding
.flow_steps
.first()
.and_then(|s| {
cfg_graph
.node_weight(s.cfg_node)
.map(|i| i.classification_span().0)
})
.or(finding.source_span)
.or_else(|| source_info.map(|i| i.classification_span().0))
.unwrap_or(call_site_byte);
let source_point = byte_offset_to_point(tree, source_byte);
let source_callee = source_info
.and_then(|i| i.call.callee.as_deref())
.map(sanitize_desc)
.or_else(|| {
finding
.flow_steps
.first()
.and_then(|s| s.var_name.as_deref())
.map(sanitize_desc)
})
.or_else(|| {
source_info
.and_then(|i| i.taint.defines.as_deref())
.map(sanitize_desc)
})
.or_else(|| {
source_info
.and_then(|i| i.taint.uses.first().map(String::as_str))
.map(sanitize_desc)
})
.unwrap_or_else(|| "(unknown)".into());
let call_site_callee = cfg_graph[finding.sink]
.call
.callee
.as_deref()
.map(sanitize_desc)
.unwrap_or_else(|| "(unknown)".into());
let kind_label = source_kind_label(finding.source_kind);
let file_path_owned = path.to_string_lossy().into_owned();
let (primary_path, primary_line, primary_col, primary_snippet_hint) =
if let Some(loc) = finding.primary_location.as_ref() {
let abs = resolve_file_rel(&loc.file_rel, scan_root, path);
if abs != file_path_owned {
tracing::debug!(
caller_file = %file_path_owned,
primary_file = %abs,
primary_line = loc.line,
"taint finding attributed to a cross-file primary sink location",
);
}
let snippet = if loc.snippet.is_empty() {
None
} else {
Some(loc.snippet.clone())
};
(abs, loc.line as usize, loc.col as usize, snippet)
} else {
(
file_path_owned.clone(),
call_site_point.row + 1,
call_site_point.column + 1,
None,
)
};
let short_source = crate::fmt::shorten_callee(&source_callee);
let short_call_site = crate::fmt::shorten_callee(&call_site_callee);
let sink_display = primary_snippet_hint
.as_deref()
.map(crate::fmt::shorten_callee)
.unwrap_or_else(|| short_call_site.clone());
let sink_label_display = if finding.primary_location.is_some() {
format!("{call_site_callee} \u{2192} {sink_display}")
} else {
call_site_callee.clone()
};
let mut labels = vec![
(
"Source".into(),
format!(
"{source_callee} ({}:{})",
source_point.row + 1,
source_point.column + 1
),
),
("Sink".into(), sink_label_display),
];
if let Some(guard) = finding.guard_kind {
labels.push(("Path guard".into(), format!("{guard:?}")));
}
let mut evidence_notes = Vec::new();
if finding.path_validated {
evidence_notes.push("path_validated".into());
}
evidence_notes.push(format!("source_kind:{:?}", finding.source_kind));
evidence_notes.push(format!("hop_count:{}", finding.hop_count));
evidence_notes.push(format!("cap_specificity:{}", finding.cap_specificity));
if finding.uses_summary {
evidence_notes.push("uses_summary".into());
}
let mut flow_steps: Vec<FlowStep> = finding
.flow_steps
.iter()
.enumerate()
.map(|(i, raw)| {
let step_byte = cfg_graph[raw.cfg_node].classification_span().0;
let point = byte_offset_to_point(tree, step_byte);
let snippet = extract_line_snippet(src, step_byte);
let callee = cfg_graph[raw.cfg_node].call.callee.clone();
let function = cfg_graph[raw.cfg_node].ast.enclosing_func.clone();
FlowStep {
step: (i + 1) as u32,
kind: raw.op_kind.clone(),
file: file_path_owned.clone(),
line: (point.row + 1) as u32,
col: (point.column + 1) as u32,
snippet,
variable: raw.var_name.clone(),
callee,
function,
is_cross_file: false,
}
})
.collect();
if let Some(loc) = finding.primary_location.as_ref() {
if let Some(last) = flow_steps.last_mut()
&& matches!(last.kind, crate::evidence::FlowStepKind::Sink)
{
last.kind = crate::evidence::FlowStepKind::Call;
}
let is_cross_file = primary_path != file_path_owned;
let synthetic_snippet = if loc.snippet.is_empty() {
None
} else {
Some(loc.snippet.clone())
};
let next_step = (flow_steps.len() + 1) as u32;
flow_steps.push(FlowStep {
step: next_step,
kind: crate::evidence::FlowStepKind::Sink,
file: primary_path.clone(),
line: loc.line,
col: loc.col,
snippet: synthetic_snippet,
variable: None,
callee: None,
function: None,
is_cross_file,
});
}
let sink_evidence_snippet = primary_snippet_hint
.clone()
.or_else(|| Some(short_call_site.clone()));
let sink_caps_bits: u16 = cfg_graph[finding.sink]
.taint
.labels
.iter()
.filter_map(|l| match l {
crate::labels::DataLabel::Sink(c) => Some(c.bits()),
_ => None,
})
.fold(0u16, |acc, b| acc | b);
let mut effective_caps = if finding.effective_sink_caps.is_empty() {
crate::labels::Cap::from_bits_truncate(sink_caps_bits)
} else {
finding.effective_sink_caps
};
if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
&& finding.source_kind.sensitivity() < crate::labels::Sensitivity::Sensitive
{
effective_caps.remove(crate::labels::Cap::DATA_EXFIL);
if finding.effective_sink_caps == crate::labels::Cap::DATA_EXFIL {
return None;
}
}
let flow_has_body_bind = {
let body_bind_substrings = [
".body(",
".json(",
".form(",
".multipart(",
".bodyvalue(",
".setentity(",
"bodypublishers",
"body_string",
"body_json",
"body_bytes",
"send_string",
"send_json",
"send_form",
"postforobject",
"postforentity",
"patchforobject",
];
let chain_lower = call_site_callee.to_ascii_lowercase();
let in_sink = body_bind_substrings.iter().any(|m| chain_lower.contains(m));
let in_steps = finding.flow_steps.iter().any(|step| {
cfg_graph[step.cfg_node]
.call
.callee
.as_deref()
.map(|c| {
let lc = c.to_ascii_lowercase();
body_bind_substrings.iter().any(|m| lc.contains(m))
})
.unwrap_or(false)
});
in_sink || in_steps
};
let source_is_credential_bearing = matches!(
finding.source_kind,
crate::labels::SourceKind::Cookie | crate::labels::SourceKind::CaughtException
);
let is_data_exfil_rule = effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
&& !effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID)
&& (!effective_caps.contains(crate::labels::Cap::SSRF)
|| (finding.source_kind.sensitivity() >= crate::labels::Sensitivity::Sensitive
&& (flow_has_body_bind || source_is_credential_bearing)));
let diag_id = if effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID) {
"rs.auth.missing_ownership_check.taint".to_string()
} else if is_data_exfil_rule {
format!(
"taint-data-exfiltration (source {}:{})",
source_point.row + 1,
source_point.column + 1
)
} else {
format!(
"taint-unsanitised-flow (source {}:{})",
source_point.row + 1,
source_point.column + 1
)
};
let data_exfil_field: Option<String> = if is_data_exfil_rule {
let last_var = finding
.flow_steps
.last()
.and_then(|s| s.var_name.as_deref());
let filters = &cfg_graph[finding.sink].call.gate_filters;
filters
.iter()
.find(|f| f.label_caps.contains(crate::labels::Cap::DATA_EXFIL))
.and_then(|f| {
if let (Some(uses), Some(var)) = (f.destination_uses.as_ref(), last_var)
&& let Some(idx) = uses.iter().position(|u| u == var)
{
return f.destination_fields.get(idx).cloned();
}
f.destination_fields.first().cloned()
})
} else {
None
};
let severity = if is_data_exfil_rule {
match finding.source_kind {
crate::labels::SourceKind::Cookie | crate::labels::SourceKind::EnvironmentConfig => {
crate::patterns::Severity::High
}
_ => crate::patterns::Severity::Medium,
}
} else {
severity_for_source_kind(finding.source_kind)
};
let message = if is_data_exfil_rule {
let suffix = data_exfil_field
.as_deref()
.map(|f| format!(" ({f} field)"))
.unwrap_or_default();
format!("sensitive data flows from {short_source} \u{2192} {sink_display}{suffix}")
} else {
format!("unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}")
};
let mut diag = Diag {
path: primary_path.clone(),
line: primary_line,
col: primary_col,
severity,
id: diag_id,
category: FindingCategory::Security,
path_validated: finding.path_validated,
guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
message: Some(message),
labels,
confidence: None,
evidence: Some(Evidence {
source: Some(SpanEvidence {
path: file_path_owned.clone(),
line: (source_point.row + 1) as u32,
col: (source_point.column + 1) as u32,
kind: "source".into(),
snippet: Some(short_source.clone()),
}),
sink: Some(SpanEvidence {
path: primary_path.clone(),
line: primary_line as u32,
col: primary_col as u32,
kind: "sink".into(),
snippet: sink_evidence_snippet,
}),
guards: finding
.guard_kind
.map(|g| {
vec![SpanEvidence {
path: primary_path.clone(),
line: primary_line as u32,
col: 0,
kind: "guard".into(),
snippet: Some(format!("{g:?}")),
}]
})
.unwrap_or_default(),
sanitizers: vec![],
state: None,
notes: evidence_notes,
source_kind: Some(finding.source_kind),
hop_count: Some(finding.hop_count),
uses_summary: finding.uses_summary,
cap_specificity: Some(finding.cap_specificity),
flow_steps,
symbolic: finding.symbolic.clone(),
sink_caps: sink_caps_bits,
engine_notes: finding.engine_notes.clone(),
data_exfil_field,
..Default::default()
}),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: finding.finding_id.clone(),
alternative_finding_ids: finding.alternative_finding_ids.to_vec(),
};
let explanation = crate::evidence::generate_explanation(&diag);
let limiters = crate::evidence::compute_confidence_limiters(&diag);
if let Some(ref mut ev) = diag.evidence {
ev.explanation = explanation;
ev.confidence_limiters = limiters;
}
Some(diag)
}
pub fn lang_slug_for_path(path: &Path) -> Option<&'static str> {
lang_for_path(path).map(|(_, slug)| slug)
}
fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
match lowercase_ext(path) {
Some("rs") => Some((Language::from(tree_sitter_rust::LANGUAGE), "rust")),
Some("c") => Some((Language::from(tree_sitter_c::LANGUAGE), "c")),
Some("cpp" | "cc" | "cxx" | "c++" | "hpp" | "hxx" | "hh" | "h++") => {
Some((Language::from(tree_sitter_cpp::LANGUAGE), "cpp"))
}
Some("java") => Some((Language::from(tree_sitter_java::LANGUAGE), "java")),
Some("go") => Some((Language::from(tree_sitter_go::LANGUAGE), "go")),
Some("php") => Some((Language::from(tree_sitter_php::LANGUAGE_PHP), "php")),
Some("py") => Some((Language::from(tree_sitter_python::LANGUAGE), "python")),
Some("ts") => Some((
Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
"typescript",
)),
Some("tsx") => Some((
Language::from(tree_sitter_typescript::LANGUAGE_TSX),
"typescript",
)),
Some("js") => Some((
Language::from(tree_sitter_javascript::LANGUAGE),
"javascript",
)),
Some("jsx") => Some((
Language::from(tree_sitter_javascript::LANGUAGE),
"javascript",
)),
Some("rb") => Some((Language::from(tree_sitter_ruby::LANGUAGE), "ruby")),
_ => None,
}
}
fn is_binary(bytes: &[u8]) -> bool {
bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
}
fn is_test_file(path: &Path) -> bool {
static TEST_SUFFIXES: &[&str] = &[
".test.js",
".test.ts",
".test.jsx",
".test.tsx",
".spec.js",
".spec.ts",
".spec.jsx",
".spec.tsx",
];
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
for suffix in TEST_SUFFIXES {
if name.ends_with(suffix) {
return true;
}
}
}
for component in path.components() {
if let std::path::Component::Normal(c) = component
&& c == "__tests__"
{
return true;
}
}
false
}
fn is_test_suppressible_pattern(id: &str) -> bool {
id.ends_with(".secrets.hardcoded_secret")
|| id.ends_with(".crypto.math_random")
|| id.ends_with(".transport.fetch_http")
}
fn is_nonprod_path(path: &Path) -> bool {
static NONPROD_DIRS: &[&str] = &[
"tests",
"test",
"__tests__",
"benches",
"benchmarks",
"examples",
"build",
"scripts",
"docs",
"js_tests",
"fixtures",
"vendor",
];
static NONPROD_FILES: &[&str] = &["build.rs"];
if let Some(name) = path.file_name().and_then(|n| n.to_str())
&& (NONPROD_FILES.contains(&name) || name.ends_with(".min.js"))
{
return true;
}
for component in path.components() {
if let std::path::Component::Normal(c) = component
&& let Some(s) = c.to_str()
&& NONPROD_DIRS.contains(&s)
{
return true;
}
}
false
}
fn sanitize_desc(s: &str) -> String {
crate::fmt::normalize_snippet(s)
}
fn source_kind_label(sk: crate::labels::SourceKind) -> &'static str {
use crate::labels::SourceKind;
match sk {
SourceKind::UserInput => "user input",
SourceKind::Cookie => "cookie value",
SourceKind::Header => "request header",
SourceKind::EnvironmentConfig => "environment config",
SourceKind::FileSystem => "file system data",
SourceKind::Database => "database result",
SourceKind::CaughtException => "caught exception",
SourceKind::Unknown => "tainted data",
}
}
fn downgrade_severity(s: Severity) -> Severity {
match s {
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
Severity::Low => Severity::Low,
}
}
struct ParsedSource<'a> {
tree: tree_sitter::Tree,
ts_lang: Language,
lang_slug: &'static str,
bytes: &'a [u8],
path: &'a Path,
file_path_str: Cow<'a, str>,
}
impl<'a> ParsedSource<'a> {
fn try_new(bytes: &'a [u8], path: &'a Path) -> NyxResult<Option<Self>> {
LAST_PARSE_TIMEOUT_MS.with(|c| c.set(None));
if is_binary(bytes) {
return Ok(None);
}
let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
return Ok(None);
};
let timeout_ms = parse_timeout_ms();
let start = Instant::now();
let mut timed_out = false;
let parsed = PARSER.with(|cell| -> NyxResult<Option<tree_sitter::Tree>> {
let mut parser = cell.borrow_mut();
parser.set_language(&ts_lang)?;
if timeout_ms == 0 {
return Ok(parser.parse(bytes, None));
}
let len = bytes.len();
let mut input = |i: usize, _pt: tree_sitter::Point| -> &[u8] {
if i < len { &bytes[i..] } else { &[] }
};
let mut progress = |_state: &tree_sitter::ParseState| -> ControlFlow<()> {
if start.elapsed().as_millis() as u64 >= timeout_ms {
timed_out = true;
ControlFlow::Break(())
} else {
ControlFlow::Continue(())
}
};
let options = tree_sitter::ParseOptions::new().progress_callback(&mut progress);
Ok(parser.parse_with_options(&mut input, None, Some(options)))
})?;
let Some(tree) = parsed else {
if timed_out {
tracing::warn!(
file = %path.display(),
timeout_ms,
"tree-sitter parse timed out; skipping file",
);
LAST_PARSE_TIMEOUT_MS.with(|c| c.set(Some(timeout_ms)));
return Ok(None);
}
return Err(NyxError::Other("tree-sitter failed".into()));
};
let file_path_str = path.to_string_lossy();
Ok(Some(Self {
tree,
ts_lang,
lang_slug,
bytes,
path,
file_path_str,
}))
}
fn run_ast_queries(&self, cfg: &Config) -> Vec<Diag> {
let root = self.tree.root_node();
let compiled = query_cache::for_lang(self.lang_slug, self.ts_lang.clone());
let mut cursor = QueryCursor::new();
let mut out = Vec::new();
let in_test_file = is_test_file(self.path);
for cq in compiled.iter() {
if cq.meta.severity > cfg.scanner.min_severity {
continue;
}
if in_test_file && is_test_suppressible_pattern(cq.meta.id) {
continue;
}
let mut matches = cursor.matches(&cq.query, root, self.bytes);
while let Some(m) = matches.next() {
if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
if cq.meta.category.finding_category() == FindingCategory::Security
&& is_call_all_args_literal(cap.node, self.bytes)
{
continue;
}
if cq.meta.id == "php.path.include_variable"
&& self.lang_slug == "php"
&& is_php_include_param_passthrough(cap.node, self.bytes)
{
continue;
}
if cq.meta.id == "php.deser.unserialize"
&& self.lang_slug == "php"
&& is_php_unserialize_allowed_classes_restricted(cap.node, self.bytes)
{
continue;
}
if (self.lang_slug == "c" || self.lang_slug == "cpp")
&& is_c_buffer_call_literal_safe(cq.meta.id, cap.node, self.bytes)
{
continue;
}
if self.lang_slug == "cpp"
&& is_cpp_cast_target_type_safe(cq.meta.id, cap.node, self.bytes)
{
continue;
}
if (cq.meta.id == "php.crypto.md5" || cq.meta.id == "php.crypto.sha1")
&& self.lang_slug == "php"
&& is_php_weak_hash_non_crypto_use(cap.node, self.bytes)
{
continue;
}
let point = cap.node.start_position();
out.push(Diag {
path: self.path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cq.meta.severity,
id: cq.meta.id.to_owned(),
category: cq.meta.category.finding_category(),
path_validated: false,
guard_kind: None,
message: Some(cq.meta.description.to_owned()),
labels: vec![],
confidence: Some(cq.meta.confidence),
evidence: Some(Evidence {
source: None,
sink: Some(SpanEvidence {
path: self.path.to_string_lossy().into_owned(),
line: (point.row + 1) as u32,
col: (point.column + 1) as u32,
kind: "sink".into(),
snippet: None,
}),
guards: vec![],
sanitizers: vec![],
state: None,
notes: vec![],
..Default::default()
}),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
});
}
}
}
out
}
fn finalize_diags(&self, out: &mut Vec<Diag>, cfg: &Config) {
out.sort_by(|a, b| {
(a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity))
});
out.dedup_by(|a, b| a.line == b.line && a.col == b.col && a.id == b.id);
if !cfg.scanner.include_nonprod && is_nonprod_path(self.path) {
for d in out.iter_mut() {
d.severity = downgrade_severity(d.severity);
}
}
}
}
struct ParsedFile<'a> {
source: ParsedSource<'a>,
file_cfg: FileCfg,
lang_rules: LangAnalysisRules,
has_lang_rules: bool,
}
impl<'a> ParsedFile<'a> {
fn from_source(source: ParsedSource<'a>, cfg: &Config) -> Self {
let mut lang_rules = build_lang_rules(cfg, source.lang_slug);
let in_file_fws =
crate::utils::project::detect_in_file_frameworks(source.bytes, source.lang_slug);
let missing: Vec<_> = in_file_fws
.into_iter()
.filter(|fw| !lang_rules.frameworks.contains(fw))
.collect();
if !missing.is_empty() {
let aug_ctx = crate::utils::project::FrameworkContext {
frameworks: missing.clone(),
inspected_langs: std::collections::HashSet::new(),
};
lang_rules
.extra_labels
.extend(crate::labels::framework_rules_for_lang_pub(
source.lang_slug,
&aug_ctx,
));
lang_rules.frameworks.extend(missing);
}
let has_lang_rules = !lang_rules.extra_labels.is_empty()
|| !lang_rules.terminators.is_empty()
|| !lang_rules.event_handlers.is_empty();
let rules_ref = if has_lang_rules {
Some(&lang_rules)
} else {
None
};
let file_cfg = build_cfg(
&source.tree,
source.bytes,
source.lang_slug,
&source.file_path_str,
rules_ref,
);
Self {
source,
file_cfg,
lang_rules,
has_lang_rules,
}
}
fn cfg_graph(&self) -> &Cfg {
&self.file_cfg.toplevel().graph
}
#[allow(dead_code)]
fn entry(&self) -> NodeIndex {
self.file_cfg.toplevel().entry
}
fn local_summaries(&self) -> &FuncSummaries {
&self.file_cfg.summaries
}
fn rules_ref(&self) -> Option<&LangAnalysisRules> {
if self.has_lang_rules {
Some(&self.lang_rules)
} else {
None
}
}
fn export_summaries(&self) -> Vec<FuncSummary> {
self.export_summaries_with_root(None)
}
fn export_summaries_with_root(&self, scan_root: Option<&Path>) -> Vec<FuncSummary> {
let mut out = export_summaries(
self.local_summaries(),
&self.source.file_path_str,
self.source.lang_slug,
);
if !self.file_cfg.hierarchy_edges.is_empty() {
let edges = self.file_cfg.hierarchy_edges.clone();
for s in &mut out {
s.hierarchy_edges = edges.clone();
}
}
if self.source.lang_slug == "rust" && !out.is_empty() {
let module_path = crate::rust_resolve::derive_module_path(self.source.path, scan_root);
let use_map =
crate::rust_resolve::parse_rust_use_map(self.source.bytes, &self.source.tree);
let aliases = if use_map.aliases.is_empty() {
None
} else {
Some(use_map.aliases)
};
let wildcards = if use_map.wildcards.is_empty() {
None
} else {
Some(use_map.wildcards)
};
for s in &mut out {
s.module_path = module_path.clone();
s.rust_use_map = aliases.clone();
s.rust_wildcards = wildcards.clone();
}
}
out
}
fn extract_ssa_artifacts(
&self,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> (
Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
Vec<(
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
) {
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
let scan_root_str = scan_root.map(|p| p.to_string_lossy());
let namespace = normalize_namespace(&self.source.file_path_str, scan_root_str.as_deref());
let locator = crate::summary::SinkSiteLocator {
tree: &self.source.tree,
bytes: self.source.bytes,
file_rel: &namespace,
};
let (summaries, bodies) = crate::taint::extract_ssa_artifacts_from_file_cfg(
&self.file_cfg,
caller_lang,
&namespace,
self.local_summaries(),
global_summaries,
Some(&locator),
);
(summaries.into_iter().collect(), bodies)
}
fn lower_ssa_for_fused(
&self,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> (
std::collections::HashMap<
crate::symbol::FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
>,
std::collections::HashMap<
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
>,
) {
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
let scan_root_str = scan_root.map(|p| p.to_string_lossy());
let namespace = normalize_namespace(&self.source.file_path_str, scan_root_str.as_deref());
crate::taint::lower_all_functions_from_bodies(
&self.file_cfg,
caller_lang,
&namespace,
self.local_summaries(),
global_summaries,
None,
)
}
fn run_cfg_analyses(
&self,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> Vec<Diag> {
crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
crate::taint::ssa_transfer::reset_all_validated_spans();
let (ssa_summaries, callee_bodies) = self.lower_ssa_for_fused(global_summaries, scan_root);
self.run_cfg_analyses_with_lowered(
cfg,
global_summaries,
scan_root,
&ssa_summaries,
&callee_bodies,
)
}
#[allow(clippy::too_many_arguments)]
fn run_cfg_analyses_with_lowered(
&self,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
ssa_summaries: &std::collections::HashMap<
crate::symbol::FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
>,
callee_bodies: &std::collections::HashMap<
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
>,
) -> Vec<Diag> {
let mut out = Vec::new();
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
tracing::debug!("Running taint analysis on: {}", self.source.path.display());
tracing::debug!("Func summaries: {:?}", self.local_summaries());
let scan_root_str = scan_root.map(|p| p.to_string_lossy());
let namespace = normalize_namespace(&self.source.file_path_str, scan_root_str.as_deref());
let extra = if self.lang_rules.extra_labels.is_empty() {
None
} else {
Some(self.lang_rules.extra_labels.as_slice())
};
let taint_results = crate::taint::analyse_file_with_lowered(
&self.file_cfg,
self.local_summaries(),
global_summaries,
caller_lang,
&namespace,
&[],
extra,
ssa_summaries,
callee_bodies,
);
let path_safe_suppressed_spans =
crate::taint::ssa_transfer::take_path_safe_suppressed_spans();
for finding in &taint_results {
let body_cfg = &self.file_cfg.body(finding.body_id).graph;
let sink_info = &body_cfg[finding.sink];
let sink_has_ssrf = sink_info
.taint
.labels
.iter()
.any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SSRF)));
if sink_has_ssrf
&& let Some(ref callee) = sink_info.call.callee
&& (callee.ends_with("redirect") || callee.ends_with("Redirect"))
&& crate::cfg_analysis::guards::has_redirect_path_prefix(
self.source.bytes,
sink_info.ast.span,
)
{
continue;
}
if let Some(diag) = build_taint_diag(
finding,
body_cfg,
&self.source.tree,
self.source.path,
self.source.bytes,
scan_root,
) {
out.push(diag);
}
}
let taint_active = global_summaries.is_some() || !taint_results.is_empty();
let closure_released_per_body =
state::collect_closure_released_var_names(&self.file_cfg.bodies, caller_lang);
let empty_set: std::collections::HashSet<String> = std::collections::HashSet::new();
for body in &self.file_cfg.bodies {
let body_taint: Vec<_> = taint_results
.iter()
.filter(|f| f.body_id == body.meta.id)
.cloned()
.collect();
let body_const_facts = cfg_analysis::build_body_const_facts(body, caller_lang);
let cfg_ctx = cfg_analysis::AnalysisContext {
cfg: &body.graph,
entry: body.entry,
lang: caller_lang,
file_path: &self.source.file_path_str,
source_bytes: self.source.bytes,
func_summaries: self.local_summaries(),
global_summaries,
ssa_summaries: Some(ssa_summaries),
taint_findings: &body_taint,
analysis_rules: self.rules_ref(),
taint_active,
body_const_facts: body_const_facts.as_ref(),
type_facts: body_const_facts.as_ref().map(|f| &f.type_facts),
auth_decorators: &body.meta.auth_decorators,
closure_released_var_names: Some(
closure_released_per_body
.get(&body.meta.id)
.unwrap_or(&empty_set),
),
};
for cf in cfg_analysis::run_all(&cfg_ctx) {
let point = byte_offset_to_point(&self.source.tree, cf.span.0);
let cfg_confidence = Some(match cf.confidence {
cfg_analysis::Confidence::High => crate::evidence::Confidence::High,
cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
});
out.push(Diag {
path: self.source.path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: cf.severity,
id: cf.rule_id,
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: Some(cf.message),
labels: vec![],
confidence: cfg_confidence,
evidence: Some(Evidence {
source: None,
sink: Some(SpanEvidence {
path: self.source.path.to_string_lossy().into_owned(),
line: (point.row + 1) as u32,
col: (point.column + 1) as u32,
kind: "sink".into(),
snippet: None,
}),
guards: vec![],
sanitizers: vec![],
state: None,
notes: vec![],
..Default::default()
}),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
});
}
}
if cfg.scanner.enable_state_analysis {
let resource_method_summaries =
state::build_resource_method_summaries(&self.file_cfg.bodies, caller_lang);
let mut all_state_findings = Vec::new();
for body in &self.file_cfg.bodies {
let body_pointer_hints = cfg_analysis::build_body_const_facts(body, caller_lang)
.as_ref()
.and_then(|f| {
f.pointer_facts
.as_ref()
.map(|pf| pf.name_proxy_hints(&f.ssa))
});
let state_findings = state::run_state_analysis(
&body.graph,
body.entry,
caller_lang,
self.source.bytes,
self.local_summaries(),
global_summaries,
cfg.scanner.enable_auth_analysis,
&resource_method_summaries,
&body.meta.auth_decorators,
&path_safe_suppressed_spans,
body_pointer_hints.as_ref(),
Some(
closure_released_per_body
.get(&body.meta.id)
.unwrap_or(&empty_set),
),
);
for sf in &state_findings {
let point = byte_offset_to_point(&self.source.tree, sf.span.0);
out.push(Diag {
path: self.source.path.to_string_lossy().into_owned(),
line: point.row + 1,
col: point.column + 1,
severity: sf.severity,
id: sf.rule_id.clone(),
category: FindingCategory::Security,
path_validated: false,
guard_kind: None,
message: Some(sf.message.clone()),
labels: vec![],
confidence: None,
evidence: Some(Evidence {
source: None,
sink: Some(SpanEvidence {
path: self.source.path.to_string_lossy().into_owned(),
line: (point.row + 1) as u32,
col: (point.column + 1) as u32,
kind: "sink".into(),
snippet: None,
}),
guards: vec![],
sanitizers: vec![],
state: Some(StateEvidence {
machine: sf.machine.into(),
subject: sf.subject.clone(),
from_state: sf.from_state.into(),
to_state: sf.to_state.into(),
}),
notes: vec![],
..Default::default()
}),
rank_score: None,
rank_reason: None,
suppressed: false,
suppression: None,
rollup: None,
finding_id: String::new(),
alternative_finding_ids: Vec::new(),
});
}
all_state_findings.extend(state_findings);
}
let state_lines: std::collections::HashSet<usize> = all_state_findings
.iter()
.map(|sf| byte_offset_to_point(&self.source.tree, sf.span.0).row + 1)
.collect();
if !all_state_findings.is_empty() {
out.retain(|d| {
!((d.id == "cfg-resource-leak" || d.id == "cfg-auth-gap")
&& state_lines.contains(&d.line))
});
}
}
out
}
fn run_auth_analyses(
&self,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> Vec<Diag> {
let var_types = self.collect_file_var_types();
auth_analysis::run_auth_analysis(
&self.source.tree,
self.source.bytes,
self.source.lang_slug,
self.source.path,
cfg,
var_types.as_ref(),
global_summaries,
scan_root,
)
}
fn collect_file_var_types(&self) -> Option<auth_analysis::VarTypes> {
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
let mut merged: std::collections::HashMap<String, crate::ssa::type_facts::TypeKind> =
std::collections::HashMap::new();
let mut dropped: std::collections::HashSet<String> = std::collections::HashSet::new();
for body in &self.file_cfg.bodies {
let Some(facts) = cfg_analysis::build_body_const_facts(body, caller_lang) else {
continue;
};
for (idx, def) in facts.ssa.value_defs.iter().enumerate() {
let Some(name) = def.var_name.as_ref() else {
continue;
};
let Some(ty) = facts.type_facts.get_type(crate::ssa::SsaValue(idx as u32)) else {
continue;
};
if matches!(ty, crate::ssa::type_facts::TypeKind::Unknown) {
continue;
}
if dropped.contains(name) {
continue;
}
match merged.get(name) {
Some(existing) if existing == ty => {}
Some(_) => {
merged.remove(name);
dropped.insert(name.clone());
}
None => {
merged.insert(name.clone(), ty.clone());
}
}
}
}
if merged.is_empty() {
None
} else {
Some(merged)
}
}
}
pub fn extract_summaries_from_bytes(
bytes: &[u8],
path: &Path,
cfg: &Config,
) -> NyxResult<Vec<FuncSummary>> {
let _span = tracing::debug_span!("extract_summaries", file = %path.display()).entered();
let Some(source) = ParsedSource::try_new(bytes, path)? else {
return Ok(vec![]);
};
let parsed = ParsedFile::from_source(source, cfg);
Ok(parsed.export_summaries())
}
pub fn extract_summaries_from_bytes_with_root(
bytes: &[u8],
path: &Path,
cfg: &Config,
scan_root: Option<&Path>,
) -> NyxResult<Vec<FuncSummary>> {
let _span = tracing::debug_span!("extract_summaries", file = %path.display()).entered();
let Some(source) = ParsedSource::try_new(bytes, path)? else {
return Ok(vec![]);
};
let parsed = ParsedFile::from_source(source, cfg);
Ok(parsed.export_summaries_with_root(scan_root))
}
#[allow(dead_code)] pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
let bytes = std::fs::read(path)?;
extract_summaries_from_bytes(&bytes, path, cfg)
}
pub fn build_cfg_for_file(path: &Path, cfg: &Config) -> NyxResult<Option<(FileCfg, Lang)>> {
let bytes = std::fs::read(path)?;
let Some(source) = ParsedSource::try_new(&bytes, path)? else {
return Ok(None);
};
let lang = Lang::from_slug(source.lang_slug).unwrap_or(Lang::C);
let parsed = ParsedFile::from_source(source, cfg);
Ok(Some((parsed.file_cfg, lang)))
}
pub fn extract_auth_model_for_debug(
path: &Path,
cfg: &Config,
) -> NyxResult<Option<auth_analysis::model::AuthorizationModel>> {
let bytes = std::fs::read(path)?;
let Some(source) = ParsedSource::try_new(&bytes, path)? else {
return Ok(None);
};
let rules = auth_analysis::config::build_auth_rules(cfg, source.lang_slug);
if !rules.enabled {
return Ok(Some(auth_analysis::model::AuthorizationModel::default()));
}
let model = auth_analysis::extract::extract_authorization_model(
source.lang_slug,
cfg.framework_ctx.as_ref(),
&source.tree,
source.bytes,
source.path,
&rules,
);
Ok(Some(model))
}
#[doc(hidden)]
pub fn perf_stage_breakdown_fused(
bytes: &[u8],
path: &Path,
cfg: &Config,
global_summaries: Option<&crate::summary::GlobalSummaries>,
scan_root: Option<&Path>,
) -> Option<([u128; 8], [u128; 7])> {
use std::time::Instant;
let s_parse = Instant::now();
let source = ParsedSource::try_new(bytes, path).ok()??;
let parsed = ParsedFile::from_source(source, cfg);
let t_parse_cfg = s_parse.elapsed().as_micros();
crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
crate::taint::ssa_transfer::reset_all_validated_spans();
crate::taint::perf_lower_timings_start();
let s_lower = Instant::now();
let (lowered_summaries, lowered_bodies) =
parsed.lower_ssa_for_fused(global_summaries, scan_root);
let t_lower = s_lower.elapsed().as_micros();
let lower_breakdown = crate::taint::perf_lower_timings_take().unwrap_or([0; 7]);
let s_taint = Instant::now();
let taint_diags = parsed.run_cfg_analyses_with_lowered(
cfg,
global_summaries,
scan_root,
&lowered_summaries,
&lowered_bodies,
);
let t_taint_flow = s_taint.elapsed().as_micros();
let s_eligible = Instant::now();
let _ = crate::taint::build_eligible_bodies(&parsed.file_cfg, lowered_bodies);
let t_eligible = s_eligible.elapsed().as_micros();
let s_ast = Instant::now();
let ast_findings = parsed.source.run_ast_queries(cfg);
let t_ast = s_ast.elapsed().as_micros();
let s_suppr = Instant::now();
let suppression =
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint_diags);
let _filtered: Vec<_> = ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line))
.collect();
let t_suppr = s_suppr.elapsed().as_micros();
let s_auth = Instant::now();
let _ = parsed.run_auth_analyses(cfg, global_summaries, scan_root);
let t_auth = s_auth.elapsed().as_micros();
let t_state = 0u128;
Some((
[
t_parse_cfg,
t_lower,
t_taint_flow,
t_eligible,
t_ast,
t_suppr,
t_auth,
t_state,
],
lower_breakdown,
))
}
#[doc(hidden)]
pub fn perf_stage_breakdown(
bytes: &[u8],
path: &Path,
cfg: &Config,
global_summaries: Option<&crate::summary::GlobalSummaries>,
scan_root: Option<&Path>,
) -> Option<[u128; 6]> {
use std::time::Instant;
let s_parse = Instant::now();
let source = ParsedSource::try_new(bytes, path).ok()??;
let parsed = ParsedFile::from_source(source, cfg);
let t_parse_cfg = s_parse.elapsed().as_micros();
let s_taint = Instant::now();
let taint = parsed.run_cfg_analyses(cfg, global_summaries, scan_root);
let t_taint = s_taint.elapsed().as_micros();
let s_suppr = Instant::now();
let _ = TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint);
let t_suppr = s_suppr.elapsed().as_micros();
let s_ast = Instant::now();
let _ast_findings = parsed.source.run_ast_queries(cfg);
let t_ast = s_ast.elapsed().as_micros();
let s_auth = Instant::now();
let _ = parsed.run_auth_analyses(cfg, global_summaries, scan_root);
let t_auth = s_auth.elapsed().as_micros();
let s_ssa = Instant::now();
let _ = parsed.extract_ssa_artifacts(global_summaries, scan_root);
let t_ssa = s_ssa.elapsed().as_micros();
Some([t_parse_cfg, t_taint, t_suppr, t_ast, t_auth, t_ssa])
}
pub fn extract_all_summaries_from_bytes(
bytes: &[u8],
path: &Path,
cfg: &Config,
scan_root: Option<&Path>,
) -> NyxResult<(
Vec<FuncSummary>,
Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
Vec<(
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
Vec<(
crate::symbol::FuncKey,
auth_analysis::model::AuthCheckSummary,
)>,
)> {
let _span = tracing::debug_span!("extract_all_summaries", file = %path.display()).entered();
let Some(source) = ParsedSource::try_new(bytes, path)? else {
return Ok((vec![], vec![], vec![], vec![]));
};
let lang_slug = source.lang_slug;
let parsed = ParsedFile::from_source(source, cfg);
let func_summaries = parsed.export_summaries_with_root(scan_root);
let (ssa_summaries, ssa_bodies) = parsed.extract_ssa_artifacts(None, scan_root);
let auth_summaries = auth_analysis::extract_auth_summaries_by_key(
&parsed.source.tree,
parsed.source.bytes,
lang_slug,
parsed.source.path,
cfg,
scan_root,
);
Ok((func_summaries, ssa_summaries, ssa_bodies, auth_summaries))
}
fn is_call_all_args_literal(node: tree_sitter::Node, bytes: &[u8]) -> bool {
let call_node = find_enclosing_call(node);
let call_node = match call_node {
Some(n) => n,
None => return false,
};
let arg_list = find_arg_list(call_node);
let arg_list = match arg_list {
Some(n) => n,
None => return false,
};
let mut has_any_arg = false;
for i in 0..arg_list.named_child_count() as u32 {
let child = match arg_list.named_child(i) {
Some(c) => c,
None => continue,
};
has_any_arg = true;
if !is_literal_node(child, bytes) {
return false;
}
}
has_any_arg
}
fn find_enclosing_call(mut node: tree_sitter::Node) -> Option<tree_sitter::Node> {
for _ in 0..4 {
let kind = node.kind();
if kind.contains("call") && !kind.contains("callee") {
return Some(node);
}
if kind == "function_call_expression" {
return Some(node);
}
if kind.contains("block")
|| kind.contains("body")
|| kind == "program"
|| kind == "module"
|| kind == "expression_statement"
{
return None;
}
node = node.parent()?;
}
None
}
fn find_arg_list(call: tree_sitter::Node) -> Option<tree_sitter::Node> {
for i in 0..call.child_count() as u32 {
if let Some(child) = call.child(i) {
let kind = child.kind();
if kind == "argument_list" || kind == "arguments" || kind == "actual_parameters" {
return Some(child);
}
}
}
None
}
fn is_literal_node(node: tree_sitter::Node, bytes: &[u8]) -> bool {
let kind = node.kind();
match kind {
"string"
| "string_literal"
| "interpreted_string_literal"
| "raw_string_literal"
| "string_content"
| "string_fragment" => !has_interpolation(node),
"integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
true
}
"true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
| "boolean_literal" => true,
"encapsed_string" => {
!has_interpolation(node)
}
"argument" => {
node.named_child_count() == 1
&& node
.named_child(0)
.is_some_and(|c| is_literal_node(c, bytes))
}
"unary_expression" | "unary_op" => {
node.named_child_count() == 1
&& node
.named_child(0)
.is_some_and(|c| is_literal_node(c, bytes))
}
"binary_expression" | "concatenated_string" => {
node.named_child_count() >= 2
&& (0..node.named_child_count() as u32).all(|i| {
node.named_child(i)
.is_some_and(|c| is_literal_node(c, bytes))
})
}
_ => false,
}
}
fn is_php_include_param_passthrough(include_node: tree_sitter::Node, bytes: &[u8]) -> bool {
let var_node = include_node.named_child(0);
let Some(var_node) = var_node else {
return false;
};
if var_node.kind() != "variable_name" {
return false;
}
let name_node = var_node.named_child(0);
let Some(name_node) = name_node else {
return false;
};
let var_name = match std::str::from_utf8(&bytes[name_node.byte_range()]) {
Ok(s) => s,
Err(_) => return false,
};
let mut cur = include_node;
while let Some(parent) = cur.parent() {
match parent.kind() {
"method_declaration"
| "function_definition"
| "anonymous_function"
| "anonymous_function_creation_expression"
| "arrow_function" => {
let params = parent
.child_by_field_name("parameters")
.or_else(|| find_named_child_of_kind(parent, "formal_parameters"));
let Some(params) = params else {
return false;
};
if !param_list_contains_name(params, var_name, bytes) {
return false;
}
let body = parent
.child_by_field_name("body")
.or_else(|| find_named_child_of_kind(parent, "compound_statement"));
let body_start = body.map(|b| b.start_byte()).unwrap_or(parent.start_byte());
if is_var_reassigned_before(
body.unwrap_or(parent),
var_name,
include_node.start_byte(),
body_start,
bytes,
) {
return false;
}
return true;
}
"program" | "class_declaration" | "trait_declaration" | "interface_declaration" => {
return false;
}
_ => {}
}
cur = parent;
}
false
}
fn find_named_child_of_kind<'a>(
parent: tree_sitter::Node<'a>,
kind: &str,
) -> Option<tree_sitter::Node<'a>> {
for i in 0..parent.named_child_count() as u32 {
if let Some(child) = parent.named_child(i)
&& child.kind() == kind
{
return Some(child);
}
}
None
}
fn param_list_contains_name(params: tree_sitter::Node, target_name: &str, bytes: &[u8]) -> bool {
for i in 0..params.named_child_count() as u32 {
let Some(param) = params.named_child(i) else {
continue;
};
if !matches!(
param.kind(),
"simple_parameter"
| "variadic_parameter"
| "property_promotion_parameter"
| "promoted_constructor_parameter"
) {
continue;
}
let var_node = param
.child_by_field_name("name")
.or_else(|| find_named_child_of_kind(param, "variable_name"));
let Some(var_node) = var_node else {
continue;
};
let name_node = if var_node.kind() == "variable_name" {
var_node.named_child(0)
} else {
Some(var_node)
};
let Some(name_node) = name_node else {
continue;
};
if let Ok(name) = std::str::from_utf8(&bytes[name_node.byte_range()])
&& name == target_name
{
return true;
}
}
false
}
fn is_var_reassigned_before(
root: tree_sitter::Node,
target_name: &str,
before_byte: usize,
body_start: usize,
bytes: &[u8],
) -> bool {
let mut stack = vec![root];
while let Some(node) = stack.pop() {
if node.start_byte() >= before_byte {
continue;
}
if node.end_byte() <= body_start {
continue;
}
if node.kind() == "assignment_expression" {
let lhs = node
.child_by_field_name("left")
.or_else(|| node.named_child(0));
if let Some(lhs) = lhs
&& lhs.kind() == "variable_name"
&& let Some(n) = lhs.named_child(0)
&& let Ok(s) = std::str::from_utf8(&bytes[n.byte_range()])
&& s == target_name
{
return true;
}
}
for i in 0..node.named_child_count() as u32 {
if let Some(c) = node.named_child(i) {
stack.push(c);
}
}
}
false
}
fn is_php_unserialize_allowed_classes_restricted(
cap_node: tree_sitter::Node,
bytes: &[u8],
) -> bool {
let call_node = if cap_node.kind() == "function_call_expression" {
cap_node
} else {
let mut cur = cap_node;
let mut found = None;
for _ in 0..4 {
if cur.kind() == "function_call_expression" {
found = Some(cur);
break;
}
match cur.parent() {
Some(p) => cur = p,
None => break,
}
}
match found {
Some(c) => c,
None => return false,
}
};
let arg_list = find_named_child_of_kind(call_node, "arguments");
let Some(arg_list) = arg_list else {
return false;
};
let mut args = Vec::new();
for i in 0..arg_list.named_child_count() as u32 {
if let Some(c) = arg_list.named_child(i)
&& c.kind() == "argument"
{
args.push(c);
}
}
if args.len() < 2 {
return false;
}
let opts = args[1].named_child(0);
let Some(opts) = opts else { return false };
if opts.kind() != "array_creation_expression" {
return false;
}
for i in 0..opts.named_child_count() as u32 {
let Some(elem) = opts.named_child(i) else {
continue;
};
if elem.kind() != "array_element_initializer" {
continue;
}
if elem.named_child_count() < 2 {
continue;
}
let key = elem.named_child(0);
let value = elem.named_child(1);
let (Some(key), Some(value)) = (key, value) else {
continue;
};
if !is_string_literal_with_text(key, "allowed_classes", bytes) {
continue;
}
match value.kind() {
"boolean" => {
if let Ok(s) = std::str::from_utf8(&bytes[value.byte_range()])
&& s.eq_ignore_ascii_case("false")
{
return true;
}
}
"array_creation_expression"
| "class_constant_access_expression"
| "scoped_property_access_expression" => return true,
_ => {}
}
}
false
}
fn is_c_buffer_call_literal_safe(rule_id: &str, cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
let kind = match rule_id {
"c.memory.strcpy" | "cpp.memory.strcpy" => CBufferRule::StrcpyOrCat,
"c.memory.strcat" | "cpp.memory.strcat" => CBufferRule::StrcpyOrCat,
"c.memory.sprintf" | "cpp.memory.sprintf" => CBufferRule::Sprintf,
_ => return false,
};
let call = find_enclosing_call(cap_node);
let Some(call) = call else { return false };
let arg_list = find_arg_list(call);
let Some(arg_list) = arg_list else {
return false;
};
let mut args = Vec::new();
for i in 0..arg_list.named_child_count() as u32 {
if let Some(c) = arg_list.named_child(i) {
args.push(c);
}
}
if args.len() < 2 {
return false;
}
let src = args[1];
match kind {
CBufferRule::StrcpyOrCat => is_c_string_literal_or_lit_ternary(src, bytes),
CBufferRule::Sprintf => {
if !matches!(
src.kind(),
"string_literal" | "raw_string_literal" | "string"
) {
return false;
}
let Some(text) = c_string_literal_payload(src, bytes) else {
return false;
};
sprintf_format_is_safe(&text)
}
}
}
#[derive(Copy, Clone)]
enum CBufferRule {
StrcpyOrCat,
Sprintf,
}
fn is_c_string_literal_or_lit_ternary(node: tree_sitter::Node, bytes: &[u8]) -> bool {
let n = unwrap_c_paren(node);
match n.kind() {
"string_literal" | "raw_string_literal" | "string" => true,
"conditional_expression" => {
let mut branches: Vec<tree_sitter::Node> = Vec::new();
for i in 0..n.named_child_count() as u32 {
if let Some(c) = n.named_child(i) {
branches.push(c);
}
}
if branches.len() < 3 {
return false;
}
let conseq = unwrap_c_paren(branches[1]);
let alt = unwrap_c_paren(branches[2]);
is_c_lit_or_macro_branch(conseq, bytes) && is_c_lit_or_macro_branch(alt, bytes)
}
_ => false,
}
}
fn is_c_lit_or_macro_branch(node: tree_sitter::Node, bytes: &[u8]) -> bool {
match node.kind() {
"string_literal" | "raw_string_literal" | "string" => true,
"identifier" => {
let Ok(name) = std::str::from_utf8(&bytes[node.byte_range()]) else {
return false;
};
is_all_caps_macro_name(name)
}
_ => false,
}
}
fn is_all_caps_macro_name(s: &str) -> bool {
if s.is_empty() {
return false;
}
let mut has_alpha = false;
for ch in s.chars() {
if ch.is_ascii_uppercase() {
has_alpha = true;
} else if ch == '_' || ch.is_ascii_digit() {
} else {
return false;
}
}
has_alpha
}
fn unwrap_c_paren(mut node: tree_sitter::Node) -> tree_sitter::Node {
for _ in 0..4 {
if node.kind() == "parenthesized_expression"
&& let Some(inner) = node.named_child(0)
{
node = inner;
continue;
}
break;
}
node
}
fn c_string_literal_payload(node: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
for i in 0..node.named_child_count() as u32 {
if let Some(c) = node.named_child(i)
&& c.kind() == "string_content"
&& let Ok(s) = std::str::from_utf8(&bytes[c.byte_range()])
{
return Some(s.to_string());
}
}
let raw = std::str::from_utf8(&bytes[node.byte_range()]).ok()?;
let trimmed = raw.trim();
let after_prefix = trimmed
.trim_start_matches('L')
.trim_start_matches("u8")
.trim_start_matches('u')
.trim_start_matches('U');
let s = after_prefix
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'));
s.map(|s| s.to_string())
}
pub(crate) fn sprintf_format_is_safe(fmt: &str) -> bool {
let bytes = fmt.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] != b'%' {
i += 1;
continue;
}
i += 1;
if i >= bytes.len() {
return false;
}
if bytes[i] == b'%' {
i += 1;
continue;
}
while i < bytes.len() && matches!(bytes[i], b'-' | b'+' | b'#' | b' ' | b'0' | b'\'') {
i += 1;
}
if i < bytes.len() && bytes[i] == b'*' {
i += 1;
} else {
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
let mut has_precision = false;
if i < bytes.len() && bytes[i] == b'.' {
has_precision = true;
i += 1;
if i < bytes.len() && bytes[i] == b'*' {
i += 1;
} else {
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
}
while i < bytes.len() && matches!(bytes[i], b'h' | b'l' | b'L' | b'q' | b'z' | b'j' | b't')
{
i += 1;
}
if i >= bytes.len() {
return false;
}
let conv = bytes[i];
i += 1;
match conv {
b'd' | b'i' | b'u' | b'o' | b'x' | b'X' | b'c' | b'e' | b'E' | b'f' | b'F' | b'g'
| b'G' | b'a' | b'A' | b'p' | b'n' => continue,
b's' => {
if !has_precision {
return false;
}
}
_ => return false,
}
}
true
}
fn is_string_literal_with_text(node: tree_sitter::Node, text: &str, bytes: &[u8]) -> bool {
if node.kind() != "string" && node.kind() != "encapsed_string" {
return false;
}
let mut payload = None;
for i in 0..node.named_child_count() as u32 {
if let Some(c) = node.named_child(i)
&& (c.kind() == "string_content" || c.kind() == "string_value")
{
payload = Some(c);
break;
}
}
let Some(payload) = payload else {
if let Ok(s) = std::str::from_utf8(&bytes[node.byte_range()]) {
let trimmed = s.trim_matches(|c| c == '\'' || c == '"');
return trimmed == text;
}
return false;
};
if let Ok(s) = std::str::from_utf8(&bytes[payload.byte_range()]) {
return s == text;
}
false
}
fn is_cpp_cast_target_type_safe(rule_id: &str, cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
if rule_id != "cpp.memory.reinterpret_cast" {
return false;
}
let call = find_enclosing_call(cap_node);
let Some(call) = call else { return false };
let func = call.child_by_field_name("function");
let Some(func) = func else { return false };
if func.kind() != "template_function" {
return false;
}
let targs = func.child_by_field_name("arguments");
let Some(targs) = targs else { return false };
if targs.kind() != "template_argument_list" {
return false;
}
let Ok(text) = std::str::from_utf8(&bytes[targs.byte_range()]) else {
return false;
};
let inner = text
.trim()
.trim_start_matches('<')
.trim_end_matches('>')
.trim();
cpp_cast_target_type_is_safe(inner)
}
pub(crate) fn cpp_cast_target_type_is_safe(s: &str) -> bool {
let normalised: String = {
let mut out = String::with_capacity(s.len());
let mut prev_ws = true;
for ch in s.chars() {
if ch.is_whitespace() {
if !prev_ws {
out.push(' ');
prev_ws = true;
}
} else {
out.push(ch);
prev_ws = false;
}
}
out.trim().to_string()
};
let Some(base) = strip_pointer_and_cv(&normalised) else {
return false;
};
let depth = normalised.chars().filter(|c| *c == '*').count();
if depth == 0 {
return matches!(
base.as_str(),
"uintptr_t" | "intptr_t" | "std::uintptr_t" | "std::intptr_t"
);
}
if depth != 1 {
return false;
}
matches!(
base.as_str(),
"char"
| "signed char"
| "unsigned char"
| "wchar_t"
| "uint8_t"
| "int8_t"
| "std::byte"
| "byte"
| "void"
| "sockaddr"
| "struct sockaddr"
| "sockaddr_in"
| "sockaddr_in6"
| "sockaddr_un"
| "sockaddr_storage"
| "struct sockaddr_in"
| "struct sockaddr_in6"
| "struct sockaddr_un"
| "struct sockaddr_storage"
)
}
fn strip_pointer_and_cv(s: &str) -> Option<String> {
let mut t: &str = s.trim();
loop {
let after = t
.strip_prefix("const ")
.or_else(|| t.strip_prefix("volatile "));
match after {
Some(rest) => t = rest.trim_start(),
None => break,
}
}
loop {
let mut progressed = false;
loop {
let after = t
.trim_end()
.strip_suffix(" const")
.or_else(|| t.trim_end().strip_suffix(" volatile"));
match after {
Some(rest) => {
t = rest;
progressed = true;
}
None => break,
}
}
let trimmed = t.trim_end();
if let Some(stripped) = trimmed.strip_suffix('*') {
t = stripped;
progressed = true;
}
if !progressed {
break;
}
}
let base = t.trim();
if base.is_empty() {
return None;
}
for ch in base.chars() {
if !(ch.is_ascii_alphanumeric() || ch == '_' || ch == ':' || ch == ' ') {
return None;
}
}
Some(base.to_string())
}
fn is_php_weak_hash_non_crypto_use(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
let call = if cap_node.kind() == "function_call_expression" {
cap_node
} else {
let mut cur = cap_node;
let mut found = None;
for _ in 0..4 {
if cur.kind() == "function_call_expression" {
found = Some(cur);
break;
}
match cur.parent() {
Some(p) => cur = p,
None => break,
}
}
match found {
Some(c) => c,
None => return false,
}
};
let mut cur = call;
let mut steps = 0u32;
while let Some(parent) = cur.parent() {
if steps > 16 {
return false;
}
steps += 1;
match parent.kind() {
"binary_expression"
| "parenthesized_expression"
| "conditional_expression"
| "argument"
| "arguments"
| "function_call_expression"
| "encapsed_string" => {}
"assignment_expression" | "augmented_assignment_expression" => {
let lhs = parent
.child_by_field_name("left")
.or_else(|| parent.named_child(0));
let Some(lhs) = lhs else {
return false;
};
return resolve_php_lvalue_name(lhs, bytes)
.map(|n| name_is_non_crypto(&n))
.unwrap_or(false);
}
"array_element_initializer" => {
if parent.named_child_count() < 2 {
return false;
}
let key = parent.named_child(0);
let Some(key) = key else {
return false;
};
let Some(key_text) = string_literal_text(key, bytes) else {
return false;
};
return name_is_non_crypto(&key_text);
}
"subscript_expression" => {
let r0 = parent.named_child(0);
let Some(r0) = r0 else {
cur = parent;
continue;
};
if call.start_byte() >= r0.end_byte() {
return true;
}
}
"member_call_expression" | "nullsafe_member_call_expression" => {
let name_node = parent.child_by_field_name("name").or_else(|| {
let count = parent.named_child_count();
if count == 0 {
None
} else {
parent.named_child(count as u32 - 1)
}
});
if let Some(nn) = name_node
&& nn.kind() == "name"
&& let Ok(method) = std::str::from_utf8(&bytes[nn.byte_range()])
&& method_is_lookup_verb(method)
{
return true;
}
}
"return_statement" => {
let mut p = parent;
for _ in 0..10 {
let Some(pp) = p.parent() else {
return false;
};
p = pp;
let kind = p.kind();
if kind == "method_declaration" || kind == "function_definition" {
let Some(nn) = p
.child_by_field_name("name")
.or_else(|| find_named_child_of_kind(p, "name"))
else {
return false;
};
let Ok(name) = std::str::from_utf8(&bytes[nn.byte_range()]) else {
return false;
};
return method_name_is_non_crypto(name);
}
if kind == "anonymous_function"
|| kind == "arrow_function"
|| kind == "anonymous_function_creation_expression"
{
return false;
}
}
return false;
}
"expression_statement"
| "compound_statement"
| "method_declaration"
| "function_definition"
| "anonymous_function"
| "anonymous_function_creation_expression"
| "arrow_function"
| "program" => return false,
_ => return false,
}
cur = parent;
}
false
}
fn resolve_php_lvalue_name(lhs: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
let lhs = unwrap_php_paren(lhs);
match lhs.kind() {
"variable_name" => {
let name_node = lhs.named_child(0)?;
std::str::from_utf8(&bytes[name_node.byte_range()])
.ok()
.map(String::from)
}
"member_access_expression" => {
let n = lhs.child_by_field_name("name").or_else(|| {
let count = lhs.named_child_count();
if count == 0 {
None
} else {
lhs.named_child(count as u32 - 1)
}
})?;
if n.kind() == "name" {
std::str::from_utf8(&bytes[n.byte_range()])
.ok()
.map(String::from)
} else {
None
}
}
"subscript_expression" => {
if lhs.named_child_count() >= 2 {
let idx = lhs.named_child(1)?;
if let Some(txt) = string_literal_text(idx, bytes) {
return Some(txt);
}
}
let r = lhs.named_child(0)?;
resolve_php_lvalue_name(r, bytes)
}
"scoped_property_access_expression" => {
let count = lhs.named_child_count();
if count == 0 {
return None;
}
let prop = lhs.named_child(count as u32 - 1)?;
resolve_php_lvalue_name(prop, bytes)
}
_ => None,
}
}
fn string_literal_text(node: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
if node.kind() != "string" && node.kind() != "encapsed_string" {
return None;
}
if has_interpolation(node) {
return None;
}
for i in 0..node.named_child_count() as u32 {
if let Some(c) = node.named_child(i)
&& (c.kind() == "string_content" || c.kind() == "string_value")
{
return std::str::from_utf8(&bytes[c.byte_range()])
.ok()
.map(String::from);
}
}
if let Ok(s) = std::str::from_utf8(&bytes[node.byte_range()]) {
let trimmed = s.trim_matches(|c| c == '\'' || c == '"');
return Some(trimmed.to_string());
}
None
}
fn unwrap_php_paren(mut node: tree_sitter::Node) -> tree_sitter::Node {
for _ in 0..4 {
if node.kind() == "parenthesized_expression"
&& let Some(inner) = node.named_child(0)
{
node = inner;
continue;
}
break;
}
node
}
pub(crate) fn name_is_non_crypto(name: &str) -> bool {
if name.is_empty() {
return false;
}
let lower = name.to_ascii_lowercase();
static CRYPTO_EXCLUDES: &[&str] = &[
"password",
"passwd",
"pw_hash",
"pwhash",
"pwdhash",
"pwd_hash",
"passhash",
"pass_hash",
"secret",
"token",
"signature",
"signed",
"hmac",
"digest",
"verifier",
"challenge",
"csrf",
"salt",
"nonce_secret",
"auth_code",
"authcode",
"auth_key",
"authkey",
"private",
"credential",
"creds",
"encryption",
"decryption",
"encryptkey",
"decryptkey",
"encrypt_key",
"decrypt_key",
"apikey",
"api_key",
];
for ex in CRYPTO_EXCLUDES {
if lower.contains(ex) {
return false;
}
}
if lower == "sig" || lower.ends_with("_sig") || lower.ends_with("sig_") {
return false;
}
if lower == "mac" || lower.ends_with("_mac") {
return false;
}
static SAFE_SUFFIXES: &[&str] = &[
"hash",
"hashes",
"etag",
"etags",
"md5",
"sha1",
"fingerprint",
"fingerprints",
"cachekey",
"cache_key",
"cacheid",
"cache_id",
"id",
"uid",
"uuid",
"guid",
"name_hash",
"checksum",
"slot",
"bucket",
"seed",
"marker",
"tag",
"gravatar",
"hashid",
"opaque",
"shortid",
"short_id",
"fnv",
"fingerprintkey",
"anchor",
"version",
"buster",
"cachebuster",
"cache_buster",
"revision",
"rev",
];
let bytes_orig = name.as_bytes();
for s in SAFE_SUFFIXES {
if lower == *s {
return true;
}
if !lower.ends_with(s) {
continue;
}
let prev_pos = lower.len() - s.len();
if prev_pos == 0 {
return true;
}
let prev_char_orig = bytes_orig[prev_pos - 1] as char;
if !prev_char_orig.is_ascii_alphabetic() {
return true;
}
let suffix_first_orig = bytes_orig[prev_pos] as char;
if suffix_first_orig.is_ascii_uppercase() {
return true;
}
if s.len() >= 4 {
return true;
}
}
false
}
fn method_name_is_non_crypto(name: &str) -> bool {
let stripped = name
.strip_prefix("get")
.or_else(|| name.strip_prefix("Get"))
.unwrap_or(name);
if name_is_non_crypto(stripped) {
return true;
}
name_is_non_crypto(name)
}
fn method_is_lookup_verb(method: &str) -> bool {
let lower = method.to_ascii_lowercase();
static VERBS: &[&str] = &[
"get",
"set",
"has",
"delete",
"remove",
"fetch",
"store",
"put",
"save",
"exists",
"find",
"lookup",
"getitem",
"setitem",
"hasitem",
"deleteitem",
"addtag",
"addtotag",
"key",
"keyfor",
"containskey",
"haskey",
"loadbykey",
"fetchbykey",
"getbykey",
"setbykey",
"deletebykey",
"incr",
"incrby",
"decr",
"decrby",
"expire",
"ttl",
"namespacekey",
"cachekey",
];
if VERBS.contains(&lower.as_str()) {
return true;
}
static SUFFIX_HINTS: &[&str] = &[
"cachekey",
"key",
"id",
"hash",
"etag",
"uid",
"tag",
"fingerprint",
];
if let Some(rest) = lower
.strip_prefix("get")
.or_else(|| lower.strip_prefix("set"))
.or_else(|| lower.strip_prefix("has"))
.or_else(|| lower.strip_prefix("create"))
.or_else(|| lower.strip_prefix("build"))
{
for h in SUFFIX_HINTS {
if rest.ends_with(h) {
return true;
}
}
}
false
}
fn has_interpolation(node: tree_sitter::Node) -> bool {
for i in 0..node.child_count() as u32 {
if let Some(child) = node.child(i) {
let kind = child.kind();
if kind == "variable_name"
|| kind == "simple_variable"
|| kind.contains("interpolation")
{
return true;
}
}
}
false
}
fn pattern_category_cap(pattern_id: &str) -> Option<Cap> {
let category = pattern_id.split('.').nth(1)?;
match category {
"cmdi" => Some(Cap::SHELL_ESCAPE),
"xss" => Some(Cap::HTML_ESCAPE),
"sqli" => Some(Cap::SQL_QUERY),
"code_exec" => Some(Cap::CODE_EXEC),
"ssrf" => Some(Cap::SSRF),
"path" => Some(Cap::FILE_IO),
_ => None,
}
}
struct TaintSuppressionCtx {
source_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
sanitizer_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
sink_func_at_line: HashMap<usize, Option<String>>,
taint_finding_lines: HashSet<usize>,
taint_finding_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
engine_validated_funcs: HashSet<Option<String>>,
source_killed_funcs: HashSet<Option<String>>,
interproc_sanitizer_callers: HashSet<Option<String>>,
}
impl TaintSuppressionCtx {
fn build(file_cfg: &FileCfg, tree: &tree_sitter::Tree, taint_diags: &[Diag]) -> Self {
let mut source_lines_by_func: HashMap<Option<String>, HashSet<usize>> = HashMap::new();
let mut sanitizer_lines_by_func: HashMap<Option<String>, HashSet<usize>> = HashMap::new();
let mut sink_func_at_line: HashMap<usize, Option<String>> = HashMap::new();
let mut source_var_defs_by_func: HashMap<Option<String>, Vec<(String, usize)>> =
HashMap::new();
let mut const_def_var_by_func: HashMap<Option<String>, Vec<(String, usize)>> =
HashMap::new();
let mut sanitizer_funcs: HashSet<String> = HashSet::new();
let mut callees_by_func: HashMap<Option<String>, HashSet<String>> = HashMap::new();
for body in &file_cfg.bodies {
for idx in body.graph.node_indices() {
let info = &body.graph[idx];
let mut has_source = false;
let mut has_sink = false;
let mut has_sanitizer = false;
for label in &info.taint.labels {
match label {
DataLabel::Source(_) => has_source = true,
DataLabel::Sink(_) => has_sink = true,
DataLabel::Sanitizer(_) => has_sanitizer = true,
}
}
let is_synth_source = info.taint.defines.as_deref().is_some_and(|d| {
d.starts_with("__nyx_src_") || d.starts_with("__nyx_chainsrc_")
});
let byte = info.classification_span().0;
let point = byte_offset_to_point(tree, byte);
let line = point.row + 1;
if has_source && !is_synth_source {
source_lines_by_func
.entry(info.ast.enclosing_func.clone())
.or_default()
.insert(line);
if let Some(var) = info.taint.defines.as_deref() {
source_var_defs_by_func
.entry(info.ast.enclosing_func.clone())
.or_default()
.push((var.to_string(), line));
}
}
if has_sanitizer {
sanitizer_lines_by_func
.entry(info.ast.enclosing_func.clone())
.or_default()
.insert(line);
if let Some(func_name) = info.ast.enclosing_func.as_deref() {
sanitizer_funcs.insert(func_name.to_string());
}
}
if has_sink {
sink_func_at_line.insert(line, info.ast.enclosing_func.clone());
}
if !has_source
&& let (Some(var), Some(_)) = (
info.taint.defines.as_deref(),
info.taint.const_text.as_ref(),
)
{
const_def_var_by_func
.entry(info.ast.enclosing_func.clone())
.or_default()
.push((var.to_string(), line));
}
let bare_inserts: Vec<&str> = info
.call
.callee
.as_deref()
.into_iter()
.chain(info.arg_callees.iter().filter_map(|c| c.as_deref()))
.collect();
if !bare_inserts.is_empty() {
let entry = callees_by_func
.entry(info.ast.enclosing_func.clone())
.or_default();
for callee in bare_inserts {
let bare = crate::labels::bare_method_name(callee);
if !bare.is_empty() {
entry.insert(bare.to_string());
}
}
}
}
}
let mut source_killed_funcs: HashSet<Option<String>> = HashSet::new();
for (func, src_defs) in &source_var_defs_by_func {
let Some(kills) = const_def_var_by_func.get(func) else {
continue;
};
for (src_var, src_line) in src_defs {
if kills
.iter()
.any(|(kill_var, kill_line)| kill_var == src_var && kill_line > src_line)
{
source_killed_funcs.insert(func.clone());
break;
}
}
}
let mut interproc_sanitizer_callers: HashSet<Option<String>> = HashSet::new();
if !sanitizer_funcs.is_empty() {
for (func, callees) in &callees_by_func {
if callees.iter().any(|c| sanitizer_funcs.contains(c)) {
interproc_sanitizer_callers.insert(func.clone());
}
}
}
let mut engine_validated_funcs: HashSet<Option<String>> = HashSet::new();
for (start, _end) in crate::taint::ssa_transfer::take_all_validated_spans() {
let line = byte_offset_to_point(tree, start).row + 1;
if let Some(func) = sink_func_at_line.get(&line) {
engine_validated_funcs.insert(func.clone());
}
}
let taint_finding_lines: HashSet<usize> = taint_diags
.iter()
.filter(|d| d.id.starts_with("taint-unsanitised-flow"))
.map(|d| d.line)
.collect();
let mut taint_finding_lines_by_func: HashMap<Option<String>, HashSet<usize>> =
HashMap::new();
for line in &taint_finding_lines {
let func = sink_func_at_line.get(line).cloned().unwrap_or(None);
taint_finding_lines_by_func
.entry(func)
.or_default()
.insert(*line);
}
Self {
source_lines_by_func,
sanitizer_lines_by_func,
sink_func_at_line,
taint_finding_lines,
taint_finding_lines_by_func,
engine_validated_funcs,
source_killed_funcs,
interproc_sanitizer_callers,
}
}
fn should_suppress(&self, pattern_id: &str, line: usize) -> bool {
if pattern_category_cap(pattern_id).is_none() {
return false;
}
let func = match self.sink_func_at_line.get(&line) {
Some(f) => f,
None => return false, };
match self.source_lines_by_func.get(func) {
Some(source_lines) => {
if !source_lines.iter().any(|&sl| sl < line) {
return false;
}
}
None => return false,
}
if self.taint_finding_lines.contains(&line) {
return false;
}
let func_has_taint_finding = self
.taint_finding_lines_by_func
.get(func)
.is_some_and(|s| !s.is_empty());
let func_has_sanitizer = self
.sanitizer_lines_by_func
.get(func)
.is_some_and(|s| !s.is_empty());
let func_engine_validated = self.engine_validated_funcs.contains(func);
let func_source_killed = self.source_killed_funcs.contains(func);
let func_interproc_sanitizer = self.interproc_sanitizer_callers.contains(func);
if !func_has_taint_finding
&& !func_has_sanitizer
&& !func_engine_validated
&& !func_source_killed
&& !func_interproc_sanitizer
{
return false;
}
true
}
}
pub fn run_rules_on_bytes(
bytes: &[u8],
path: &Path,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> NyxResult<Vec<Diag>> {
let _span = tracing::debug_span!("run_rules", file = %path.display()).entered();
maybe_inject_test_panic(path);
let Some(source) = ParsedSource::try_new(bytes, path)? else {
let mut out = scan_text_based_patterns(bytes, path, cfg);
if let Some(timeout_ms) = take_last_parse_timeout_ms() {
out.push(parse_timeout_diag(path, timeout_ms));
}
return Ok(out);
};
let mut out = Vec::new();
let needs_cfg = matches!(
cfg.scanner.mode,
AnalysisMode::Full | AnalysisMode::Cfg | AnalysisMode::Taint
);
if needs_cfg {
let parsed = ParsedFile::from_source(source, cfg);
out.extend(parsed.run_cfg_analyses(cfg, global_summaries, scan_root));
if cfg.scanner.mode == AnalysisMode::Full {
let suppression =
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
let ast_findings = parsed.source.run_ast_queries(cfg);
out.extend(
ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
);
}
if cfg.scanner.mode == AnalysisMode::Full {
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
}
parsed.source.finalize_diags(&mut out, cfg);
} else {
out.extend(source.run_ast_queries(cfg));
let parsed = ParsedFile::from_source(source, cfg);
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
parsed.source.finalize_diags(&mut out, cfg);
}
Ok(out)
}
pub fn run_rules_on_file(
path: &Path,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> NyxResult<Vec<Diag>> {
let bytes = std::fs::read(path)?;
run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
}
pub struct FusedResult {
pub summaries: Vec<FuncSummary>,
pub diags: Vec<Diag>,
pub ssa_summaries: Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
pub cfg_nodes: usize,
pub ssa_bodies: Vec<(
crate::symbol::FuncKey,
crate::taint::ssa_transfer::CalleeSsaBody,
)>,
pub auth_summaries: Vec<(
crate::symbol::FuncKey,
auth_analysis::model::AuthCheckSummary,
)>,
}
pub fn analyse_file_fused(
bytes: &[u8],
path: &Path,
cfg: &Config,
global_summaries: Option<&GlobalSummaries>,
scan_root: Option<&Path>,
) -> NyxResult<FusedResult> {
let _span = tracing::debug_span!("analyse_fused", file = %path.display()).entered();
maybe_inject_test_panic(path);
let Some(source) = ParsedSource::try_new(bytes, path)? else {
let mut diags = scan_text_based_patterns(bytes, path, cfg);
if let Some(timeout_ms) = take_last_parse_timeout_ms() {
diags.push(parse_timeout_diag(path, timeout_ms));
}
return Ok(FusedResult {
summaries: vec![],
diags,
ssa_summaries: vec![],
cfg_nodes: 0,
ssa_bodies: vec![],
auth_summaries: vec![],
});
};
let parsed = ParsedFile::from_source(source, cfg);
let cfg_nodes = parsed.cfg_graph().node_count();
let summaries = parsed.export_summaries_with_root(scan_root);
let mut out = Vec::new();
let needs_cfg = matches!(
cfg.scanner.mode,
AnalysisMode::Full | AnalysisMode::Cfg | AnalysisMode::Taint
);
let (ssa_summaries, ssa_bodies) = if needs_cfg {
crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
crate::taint::ssa_transfer::reset_all_validated_spans();
let (lowered_summaries, lowered_bodies) =
parsed.lower_ssa_for_fused(global_summaries, scan_root);
out.extend(parsed.run_cfg_analyses_with_lowered(
cfg,
global_summaries,
scan_root,
&lowered_summaries,
&lowered_bodies,
));
let eligible_bodies = crate::taint::build_eligible_bodies(&parsed.file_cfg, lowered_bodies);
let summaries_vec: Vec<_> = lowered_summaries.into_iter().collect();
(summaries_vec, eligible_bodies)
} else {
(vec![], vec![])
};
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
let ast_findings = parsed.source.run_ast_queries(cfg);
if needs_cfg && cfg.scanner.mode == AnalysisMode::Full {
let suppression =
TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
out.extend(
ast_findings
.into_iter()
.filter(|d| !suppression.should_suppress(&d.id, d.line)),
);
} else {
out.extend(ast_findings);
}
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
}
parsed.source.finalize_diags(&mut out, cfg);
let auth_summaries = if cfg.scanner.mode == AnalysisMode::Full {
auth_analysis::extract_auth_summaries_by_key(
&parsed.source.tree,
parsed.source.bytes,
parsed.source.lang_slug,
parsed.source.path,
cfg,
scan_root,
)
} else {
Vec::new()
};
Ok(FusedResult {
summaries,
diags: out,
ssa_summaries,
cfg_nodes,
ssa_bodies,
auth_summaries,
})
}
fn scan_text_based_patterns(bytes: &[u8], path: &Path, cfg: &Config) -> Vec<Diag> {
let ext = lowercase_ext(path);
match ext {
Some("ejs") => {
let mut diags = crate::patterns::ejs::scan_ejs_file(path, bytes);
diags.retain(|d| d.severity <= cfg.scanner.min_severity);
diags
}
_ => vec![],
}
}
#[test]
fn unknown_extension_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let txt = dir.path().join("notes.txt");
std::fs::write(&txt, "just some text").unwrap();
let diags = run_rules_on_file(&txt, &Config::default(), None, None)
.expect("function should never error on plain text");
assert!(diags.is_empty());
}
#[test]
fn binary_file_guard_triggers() {
let dir = tempfile::tempdir().unwrap();
let bin = dir.path().join("junk.bin");
let mut data = vec![0_u8; 2048];
for i in (0..data.len()).step_by(3) {
data[i] = 0;
}
std::fs::write(&bin, &data).unwrap();
let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
assert!(diags.is_empty(), "binary files are skipped");
}
#[test]
fn nonprod_path_detection() {
assert!(is_nonprod_path(Path::new("project/tests/test_main.py")));
assert!(is_nonprod_path(Path::new("src/__tests__/foo.js")));
assert!(is_nonprod_path(Path::new("benches/bench.rs")));
assert!(is_nonprod_path(Path::new("vendor/lib/foo.py")));
assert!(is_nonprod_path(Path::new("src/build.rs")));
assert!(is_nonprod_path(Path::new("dist/app.min.js")));
assert!(is_nonprod_path(Path::new("examples/demo.py")));
assert!(is_nonprod_path(Path::new("fixtures/data.json")));
assert!(!is_nonprod_path(Path::new("src/main.rs")));
assert!(!is_nonprod_path(Path::new("lib/handler.py")));
assert!(!is_nonprod_path(Path::new("app/views.py")));
}
#[test]
fn severity_downgrade_works() {
assert_eq!(downgrade_severity(Severity::High), Severity::Medium);
assert_eq!(downgrade_severity(Severity::Medium), Severity::Low);
assert_eq!(downgrade_severity(Severity::Low), Severity::Low);
}
#[test]
fn nonprod_path_downgrades_findings() {
let dir = tempfile::tempdir().unwrap();
let test_dir = dir.path().join("tests");
std::fs::create_dir_all(&test_dir).unwrap();
let test_file = test_dir.join("test_cmd.py");
std::fs::write(
&test_file,
b"import os\ndef test():\n cmd = os.environ['X']\n os.system(cmd)\n",
)
.unwrap();
let default_cfg = Config::default();
let diags = run_rules_on_file(&test_file, &default_cfg, None, None).unwrap();
let high: Vec<_> = diags
.iter()
.filter(|d| d.severity == Severity::High)
.collect();
assert!(
high.is_empty(),
"Findings in tests/ should be downgraded from HIGH; got {:?}",
high
);
let mut prod_cfg = Config::default();
prod_cfg.scanner.include_nonprod = true;
let diags_prod = run_rules_on_file(&test_file, &prod_cfg, None, None).unwrap();
let _ = diags_prod;
}
#[test]
fn constant_arg_suppression_works() {
use tree_sitter::StreamingIterator;
{
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
let code = b"<?php\nsystem(\"echo health-ok\");\n";
let tree = parser.parse(code, None).unwrap();
let query_str = r#"(function_call_expression
function: (name) @n (#match? @n "^(system)$"))
@vuln"#;
let query = tree_sitter::Query::new(&lang, query_str).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
let m = matches.next().expect("query should match");
let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
assert!(
is_call_all_args_literal(cap.node, code),
"PHP system(\"echo health-ok\") should have all-literal args"
);
}
{
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
let code = b"import os\nos.system(\"echo health-ok\")\n";
let tree = parser.parse(code, None).unwrap();
let query_str = r#"(call
function: (attribute
object: (identifier) @pkg (#eq? @pkg "os")
attribute: (identifier) @fn (#eq? @fn "system")))
@vuln"#;
let query = tree_sitter::Query::new(&lang, query_str).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
let m = matches.next().expect("query should match");
let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
assert!(
is_call_all_args_literal(cap.node, code),
"Python os.system(\"echo health-ok\") should have all-literal args"
);
}
{
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
let code = b"import os\nos.system(cmd)\n";
let tree = parser.parse(code, None).unwrap();
let query_str = r#"(call
function: (attribute
object: (identifier) @pkg (#eq? @pkg "os")
attribute: (identifier) @fn (#eq? @fn "system")))
@vuln"#;
let query = tree_sitter::Query::new(&lang, query_str).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
let m = matches.next().expect("query should match");
let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
assert!(
!is_call_all_args_literal(cap.node, code),
"Python os.system(cmd) should NOT have all-literal args"
);
}
}
#[cfg(test)]
fn first_php_capture<'tree>(
tree: &'tree tree_sitter::Tree,
code: &[u8],
query_str: &str,
) -> tree_sitter::Node<'tree> {
use tree_sitter::StreamingIterator;
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code);
let m = matches.next().expect("query should match");
let cap = m
.captures
.iter()
.find(|c| c.index == 0)
.expect("capture index 0");
cap.node
}
#[test]
fn php_include_param_passthrough_recognises_canonical_shapes() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
let q = r#"(include_expression (variable_name)) @vuln"#;
let code = b"<?php\nstatic $cb = function ($file) { include $file; };\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_include_param_passthrough(cap, code),
"closure param pass-through should be recognised"
);
let code = b"<?php\nclass C { function f(string $file): void { include $file; } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_include_param_passthrough(cap, code),
"method param pass-through should be recognised"
);
let code = b"<?php\nclass C { function f(string $base): void { $f = $base . '/x.php'; include $f; } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_include_param_passthrough(cap, code),
"concat-built local should NOT be treated as pass-through"
);
let code = b"<?php\nfunction f($file) { $file = $_GET['x']; include $file; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_include_param_passthrough(cap, code),
"reassigned param should NOT be treated as pass-through"
);
let code = b"<?php\n$file = $_GET['x'];\ninclude $file;\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_include_param_passthrough(cap, code),
"top-level include should NOT be treated as pass-through"
);
}
#[test]
fn php_unserialize_allowed_classes_recognises_safe_forms() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;
let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => false]);\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_unserialize_allowed_classes_restricted(cap, code),
"allowed_classes => false should be recognised as safe"
);
let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => [Foo::class]]);\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_unserialize_allowed_classes_restricted(cap, code),
"allowed_classes => [array] should be recognised as safe"
);
let code =
b"<?php\nclass C { const A = []; function f($d) { return unserialize($d, ['allowed_classes' => self::A]); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_unserialize_allowed_classes_restricted(cap, code),
"allowed_classes => self::CONST should be recognised as safe"
);
let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => true]);\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_unserialize_allowed_classes_restricted(cap, code),
"allowed_classes => true is the unsafe default, should NOT be suppressed"
);
let code = b"<?php\n$x = unserialize($d);\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_unserialize_allowed_classes_restricted(cap, code),
"single-arg unserialize should NOT be suppressed"
);
let code = b"<?php\n$x = unserialize($d, $opts);\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_unserialize_allowed_classes_restricted(cap, code),
"dynamic options variable should NOT be suppressed"
);
}
#[test]
fn php_weak_hash_non_crypto_use_recognises_canonical_shapes() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
let q = r#"(function_call_expression function: (name) @n (#match? @n "^(md5|sha1)$")) @vuln"#;
let code = b"<?php\nclass C { public function getETag(): string { return '\"' . md5($this->data) . '\"'; } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"getETag concat should be suppressed"
);
let code = b"<?php\nfunction f($x) { return ['table_name_hash' => md5($x)]; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"array element with `*_hash` key should be suppressed"
);
let code = b"<?php\nfunction f($x, &$row) { $row['etag'] = md5($x); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"subscript LHS with 'etag' key should be suppressed"
);
let code = b"<?php\nclass C { function f() { $this->storageId = md5($this->id); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"member-access LHS `storageId` should be suppressed"
);
let code = b"<?php\nfunction f($t, &$tables) { $tables[$t]['hash'] ??= md5($t); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"??= subscript LHS with 'hash' key should be suppressed"
);
let code = b"<?php\nfunction f($a, $x) { return $a[md5($x)]; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"md5 used as subscript index should be suppressed"
);
let code = b"<?php\nclass C { public $cache; function f($u) { return $this->cache->get(sha1($u)); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"method call to lookup-verb `get(sha1(..))` should be suppressed"
);
let code = b"<?php\nclass C { public $q; function f($d) { $this->q->insert('t')->values(['etag' => $this->q->createNamedParameter(md5($d))]); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"wrapper-call inside array element with `etag` key should be suppressed"
);
let code = b"<?php\nfunction f($cols) { $columnNamesHashes = []; foreach ($cols as $c) { $columnNamesHashes[$c] = md5($c); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
is_php_weak_hash_non_crypto_use(cap, code),
"subscript LHS with dynamic index — receiver name `*Hashes` should drive suppression"
);
let code =
b"<?php\nclass C { public $password; function f($p) { $this->password = md5($p); } }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_weak_hash_non_crypto_use(cap, code),
"$this->password = md5(...) is crypto storage and must NOT be suppressed"
);
let code = b"<?php\nfunction f($x) { $tokenHash = md5($x); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_weak_hash_non_crypto_use(cap, code),
"$tokenHash compound name must NOT be suppressed (contains 'token')"
);
let code = b"<?php\nfunction f($p) { $pw_hash = md5($p); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_weak_hash_non_crypto_use(cap, code),
"$pw_hash compound name must NOT be suppressed"
);
let code = b"<?php\nfunction f($x) { var_dump(md5($x)); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_php_capture(&tree, code, q);
assert!(
!is_php_weak_hash_non_crypto_use(cap, code),
"var_dump(md5(...)) has no recognisable consumer name and must NOT be suppressed"
);
}
#[test]
fn name_is_non_crypto_recognises_word_boundary_suffixes() {
assert!(name_is_non_crypto("hash"));
assert!(name_is_non_crypto("etag"));
assert!(name_is_non_crypto("table_name_hash"));
assert!(name_is_non_crypto("table_id"));
assert!(name_is_non_crypto("cache_key"));
assert!(name_is_non_crypto("storageId"));
assert!(name_is_non_crypto("tableHash"));
assert!(name_is_non_crypto("sqlMd5"));
assert!(name_is_non_crypto("cacheBuster"));
assert!(name_is_non_crypto("columnnameshashes"));
assert!(name_is_non_crypto("tablefingerprint"));
assert!(name_is_non_crypto("v1id"));
assert!(!name_is_non_crypto("password_hash"));
assert!(!name_is_non_crypto("hashedPassword"));
assert!(!name_is_non_crypto("tokenHash"));
assert!(!name_is_non_crypto("signatureHash"));
assert!(!name_is_non_crypto("pw_hash"));
assert!(!name_is_non_crypto("digest"));
assert!(!name_is_non_crypto("hmac"));
assert!(!name_is_non_crypto("salt"));
assert!(!name_is_non_crypto("private_key"));
assert!(!name_is_non_crypto("key"));
assert!(!name_is_non_crypto("keys"));
assert!(!name_is_non_crypto("apiKey"));
assert!(!name_is_non_crypto("api_key"));
assert!(!name_is_non_crypto("apiKeyHash"));
assert!(!name_is_non_crypto("api_key_hash"));
assert!(name_is_non_crypto("cache_key"));
assert!(name_is_non_crypto("cachekey"));
assert!(!name_is_non_crypto("said"));
assert!(!name_is_non_crypto("void"));
assert!(!name_is_non_crypto("rapid"));
assert!(!name_is_non_crypto("x"));
assert!(!name_is_non_crypto("result"));
assert!(!name_is_non_crypto("output"));
assert!(!name_is_non_crypto(""));
}
#[test]
fn method_is_lookup_verb_recognises_cache_verbs() {
assert!(method_is_lookup_verb("get"));
assert!(method_is_lookup_verb("set"));
assert!(method_is_lookup_verb("has"));
assert!(method_is_lookup_verb("delete"));
assert!(method_is_lookup_verb("fetch"));
assert!(method_is_lookup_verb("getItem"));
assert!(method_is_lookup_verb("setItem"));
assert!(method_is_lookup_verb("getCacheKey"));
assert!(method_is_lookup_verb("setCacheKey"));
assert!(method_is_lookup_verb("buildKey"));
assert!(method_is_lookup_verb("createId"));
assert!(method_is_lookup_verb("hasFingerprint"));
assert!(!method_is_lookup_verb("hash_equals"));
assert!(!method_is_lookup_verb("verify"));
assert!(!method_is_lookup_verb("password_verify"));
assert!(!method_is_lookup_verb("decrypt"));
assert!(!method_is_lookup_verb("encrypt"));
assert!(!method_is_lookup_verb("sign"));
assert!(!method_is_lookup_verb("invoke"));
assert!(!method_is_lookup_verb("doSomething"));
}
#[test]
fn sprintf_format_safety_classifier() {
assert!(sprintf_format_is_safe(""));
assert!(sprintf_format_is_safe("hello world"));
assert!(sprintf_format_is_safe("%d"));
assert!(sprintf_format_is_safe("%lld%c"));
assert!(sprintf_format_is_safe("fixed=%d/%c"));
assert!(sprintf_format_is_safe("%5d %x %llo"));
assert!(sprintf_format_is_safe("%%literal-percent"));
assert!(sprintf_format_is_safe("%p"));
assert!(sprintf_format_is_safe(" %.*s"));
assert!(sprintf_format_is_safe("%.5s"));
assert!(sprintf_format_is_safe("[%-.10s]"));
assert!(!sprintf_format_is_safe("%s"));
assert!(!sprintf_format_is_safe("hello %s world"));
assert!(!sprintf_format_is_safe("%5s"));
assert!(!sprintf_format_is_safe("[%-20s]"));
assert!(!sprintf_format_is_safe("%S"));
assert!(!sprintf_format_is_safe("%"));
assert!(!sprintf_format_is_safe("%lZ"));
}
#[cfg(test)]
fn first_c_capture<'tree>(
tree: &'tree tree_sitter::Tree,
code: &[u8],
query_str: &str,
) -> tree_sitter::Node<'tree> {
use tree_sitter::StreamingIterator;
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code);
let m = matches.next().expect("query should match");
m.captures
.iter()
.find(|c| c.index == 0)
.expect("capture index 0")
.node
}
#[cfg(test)]
fn first_cpp_capture<'tree>(
tree: &'tree tree_sitter::Tree,
code: &[u8],
query_str: &str,
) -> tree_sitter::Node<'tree> {
use tree_sitter::StreamingIterator;
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
let mut cursor = tree_sitter::QueryCursor::new();
let mut matches = cursor.matches(&query, tree.root_node(), code);
let m = matches.next().expect("query should match");
m.captures
.iter()
.find(|c| c.index == 0)
.expect("capture index 0")
.node
}
#[test]
fn cpp_cast_target_type_is_safe_recognises_canonical_shapes() {
use crate::ast::cpp_cast_target_type_is_safe as f;
assert!(f("char*"));
assert!(f("char *"));
assert!(f("const char*"));
assert!(f("const char *"));
assert!(f("unsigned char*"));
assert!(f("const unsigned char*"));
assert!(f("signed char*"));
assert!(f("uint8_t*"));
assert!(f("const uint8_t*"));
assert!(f("int8_t*"));
assert!(f("std::byte*"));
assert!(f("const std::byte*"));
assert!(f("byte*"));
assert!(f("wchar_t*"));
assert!(f("void*"));
assert!(f("const void*"));
assert!(f("uintptr_t"));
assert!(f("std::uintptr_t"));
assert!(f("intptr_t"));
assert!(f("std::intptr_t"));
assert!(f("sockaddr*"));
assert!(f("struct sockaddr*"));
assert!(f("sockaddr_in*"));
assert!(f("sockaddr_in6*"));
assert!(f("sockaddr_un*"));
assert!(f("sockaddr_storage*"));
assert!(f("const uint8_t *"));
assert!(f("uint8_t * const"));
assert!(f("const unsigned char *"));
assert!(!f("char**"));
assert!(!f("uint8_t**"));
assert!(!f("void**"));
assert!(!f("void **"));
assert!(!f("uintptr_t*"));
assert!(!f("intptr_t*"));
assert!(!f("std::uintptr_t*"));
assert!(!f("MyStruct*"));
assert!(!f("InstanceType*"));
assert!(!f("DBImpl*"));
assert!(!f("C*"));
assert!(!f("CPP*"));
assert!(!f("T*"));
assert!(!f("secp256k1_keypair*"));
assert!(!f("PIP_ADAPTER_ADDRESSES"));
assert!(!f("std::vector<int>*"));
assert!(!f("void(*)(int)"));
assert!(!f("char[10]"));
assert!(!f("int"));
assert!(!f("size_t"));
assert!(!f("uint64_t"));
assert!(!f("char")); assert!(!f("uint8_t")); }
#[test]
fn cpp_reinterpret_cast_layer_e_recognises_byte_pointer_targets() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
parser.set_language(&lang).unwrap();
let q = r#"(call_expression
function: (template_function
name: (identifier) @n (#eq? @n "reinterpret_cast")))
@vuln"#;
let code = b"void f(int* p) { auto q = reinterpret_cast<uint8_t*>(p); (void)q; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<uint8_t*> must be suppressed (byte-pointer target)"
);
let code = b"#include <cstddef>\nvoid f(int* p) { auto q = reinterpret_cast<const std::byte*>(p); (void)q; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<const std::byte*> must be suppressed"
);
let code = b"void* f() { return reinterpret_cast<void*>(0x08000000); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<void*> must be suppressed (synthetic address)"
);
let code =
b"#include <cstdint>\nuintptr_t f(int* p) { return reinterpret_cast<uintptr_t>(p); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<uintptr_t> must be suppressed (integer round-trip)"
);
let code = b"struct sockaddr_in { int x; };\nstruct sockaddr;\nvoid f(struct sockaddr_in* a) { auto* s = reinterpret_cast<sockaddr*>(a); (void)s; }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<sockaddr*> must be suppressed (BSD socket pun)"
);
let code = b"struct MyStruct { int a; };\nMyStruct* f(char* buf) { return reinterpret_cast<MyStruct*>(buf); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_cpp_capture(&tree, code, q);
assert!(
!is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
"reinterpret_cast<MyStruct*> must NOT be suppressed (genuine strict-aliasing risk)"
);
assert!(
!is_cpp_cast_target_type_safe("cpp.memory.const_cast", cap, code),
"Layer E must only fire for cpp.memory.reinterpret_cast"
);
}
#[test]
fn c_buffer_call_literal_safe_recognises_canonical_shapes() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
parser.set_language(&lang).unwrap();
let q_strcpy = r#"(call_expression function: (identifier) @id (#eq? @id "strcpy")) @vuln"#;
let q_strcat = r#"(call_expression function: (identifier) @id (#eq? @id "strcat")) @vuln"#;
let q_sprintf = r#"(call_expression function: (identifier) @id (#eq? @id "sprintf")) @vuln"#;
let code = b"void f(char *d) { strcpy(d, \"pg_prewarm\"); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
"strcpy with string-literal source must be suppressed"
);
let code = b"void f(char *s, int h) { strcpy(s, (h >= 12) ? \"p.m.\" : \"a.m.\"); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
"strcpy with ternary-of-literals source must be suppressed"
);
let code = b"#define P_M_STR \"p.m.\"\n#define A_M_STR \"a.m.\"\nvoid f(char *s, int h) { strcpy(s, (h >= 12) ? P_M_STR : A_M_STR); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
"strcpy with ternary-of-ALL_CAPS-macros must be suppressed"
);
let code = b"void f(char *s, int h, char *a, char *b) { strcpy(s, (h >= 12) ? a : b); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
!is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
"strcpy with ternary-of-lowercase-vars must NOT be suppressed"
);
let code = b"void f(char *d) { strcat(d, \" (done)\"); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcat);
assert!(
is_c_buffer_call_literal_safe("c.memory.strcat", cap, code),
"strcat with string-literal source must be suppressed"
);
let code = b"void f(char *cp, long long v, char u) { sprintf(cp, \"%lld%c\", v, u); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_sprintf);
assert!(
is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
"sprintf with numeric-only format must be suppressed"
);
let code = b"void f(char *str, int n, const char *x) { sprintf(str, \" %.*s\", n, x); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_sprintf);
assert!(
is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
"sprintf with precision-bounded `%.*s` must be suppressed"
);
let code = b"void f(char *d, char **a) { strcpy(d, a[1]); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
!is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
"strcpy with non-literal source must NOT be suppressed"
);
let code = b"void f(char *b, const char *u) { sprintf(b, \"%s\", u); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_sprintf);
assert!(
!is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
"sprintf with bare `%%s` must NOT be suppressed"
);
let code = b"void f(char *b, long long v) { sprintf(b, \"%\" PRId64, v); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_sprintf);
assert!(
!is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
"sprintf with concatenated_string format must NOT be suppressed"
);
let code = b"void f(char *d) { strcpy(d, \"x\"); }\n";
let tree = parser.parse(code, None).unwrap();
let cap = first_c_capture(&tree, code, q_strcpy);
assert!(
!is_c_buffer_call_literal_safe("c.memory.gets", cap, code),
"Layer D should only fire for buffer-overflow rule ids"
);
}
#[test]
fn is_literal_node_rejects_python_fstring_with_interpolation() {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
let code = b"x = f\"SELECT * WHERE y = '{u}'\"\n";
let tree = parser.parse(code, None).unwrap();
let assignment = tree
.root_node()
.child(0)
.and_then(|s| s.child(0))
.expect("assignment node");
let rhs = assignment
.child_by_field_name("right")
.expect("RHS of assignment");
assert_eq!(rhs.kind(), "string");
assert!(
!is_literal_node(rhs, code),
"f-string with interpolation must not be classified as literal"
);
let code = b"x = \"plain literal\"\n";
let tree = parser.parse(code, None).unwrap();
let assignment = tree
.root_node()
.child(0)
.and_then(|s| s.child(0))
.expect("assignment node");
let rhs = assignment
.child_by_field_name("right")
.expect("RHS of assignment");
assert_eq!(rhs.kind(), "string");
assert!(
is_literal_node(rhs, code),
"plain string literal must be classified as literal"
);
}