#![allow(clippy::collapsible_if, clippy::too_many_arguments)]
pub mod backwards;
pub mod domain;
pub mod path_state;
pub mod ssa_transfer;
use crate::cfg::{BodyCfg, BodyId, Cfg, FileCfg, FuncSummaries};
use crate::engine_notes::EngineNote;
use crate::interop::InteropEdge;
use crate::labels::SourceKind;
use crate::state::engine::MAX_TRACKED_VARS;
use crate::state::symbol::SymbolInterner;
use crate::summary::GlobalSummaries;
use crate::symbol::{FuncKey, FuncKind, Lang};
use path_state::PredicateKind;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use smallvec::SmallVec;
use std::collections::{HashMap, HashSet, VecDeque};
use std::sync::atomic::{AtomicUsize, Ordering};
const JS_TS_PASS2_SAFETY_CAP: usize = 64;
static JS_TS_PASS2_CAP_OVERRIDE: AtomicUsize = AtomicUsize::new(0);
static LAST_JS_TS_PASS2_ITERATIONS: AtomicUsize = AtomicUsize::new(0);
#[doc(hidden)]
pub fn set_js_ts_pass2_cap_override(cap: usize) {
JS_TS_PASS2_CAP_OVERRIDE.store(cap, Ordering::Relaxed);
}
pub fn last_js_ts_pass2_iterations() -> usize {
LAST_JS_TS_PASS2_ITERATIONS.load(Ordering::Relaxed)
}
fn js_ts_pass2_cap() -> usize {
let o = JS_TS_PASS2_CAP_OVERRIDE.load(Ordering::Relaxed);
if o == 0 { JS_TS_PASS2_SAFETY_CAP } else { o }
}
thread_local! {
static PERF_LOWER_TIMINGS: std::cell::Cell<Option<[u128; 7]>> =
const { std::cell::Cell::new(None) };
}
#[doc(hidden)]
pub fn perf_lower_timings_start() {
PERF_LOWER_TIMINGS.with(|c| c.set(Some([0; 7])));
}
#[doc(hidden)]
pub fn perf_lower_timings_take() -> Option<[u128; 7]> {
PERF_LOWER_TIMINGS.with(|c| c.replace(None))
}
#[inline]
fn perf_lower_record(slot: usize, micros: u128) {
PERF_LOWER_TIMINGS.with(|c| {
if let Some(mut t) = c.get() {
t[slot] = t[slot].saturating_add(micros);
c.set(Some(t));
}
});
}
static JS_TS_GAUSS_SEIDEL_OVERRIDE: AtomicUsize = AtomicUsize::new(0);
#[doc(hidden)]
pub fn set_js_ts_gauss_seidel_override(mode: usize) {
JS_TS_GAUSS_SEIDEL_OVERRIDE.store(mode, Ordering::Relaxed);
}
pub fn js_ts_gauss_seidel_enabled() -> bool {
match JS_TS_GAUSS_SEIDEL_OVERRIDE.load(Ordering::Relaxed) {
1 => return false, 2 => return true, _ => {}
}
use std::sync::OnceLock;
static ENABLED: OnceLock<bool> = OnceLock::new();
*ENABLED.get_or_init(|| match std::env::var("NYX_JS_GAUSS_SEIDEL") {
Ok(v) => !matches!(v.as_str(), "" | "0" | "false"),
Err(_) => false,
})
}
#[derive(Debug, Clone)]
pub struct FlowStepRaw {
pub cfg_node: NodeIndex,
pub var_name: Option<String>,
pub op_kind: crate::evidence::FlowStepKind,
}
#[derive(Debug, Clone, PartialEq)]
pub struct SinkLocation {
pub file_rel: String,
pub line: u32,
pub col: u32,
pub snippet: String,
}
#[derive(Debug, Clone)]
pub struct Finding {
pub body_id: BodyId,
pub sink: NodeIndex,
pub source: NodeIndex,
#[allow(dead_code)] pub path: Vec<NodeIndex>,
pub source_kind: SourceKind,
pub path_validated: bool,
pub guard_kind: Option<PredicateKind>,
pub hop_count: u16,
pub cap_specificity: u8,
pub uses_summary: bool,
pub flow_steps: Vec<FlowStepRaw>,
pub symbolic: Option<crate::evidence::SymbolicVerdict>,
pub source_span: Option<usize>,
pub primary_location: Option<SinkLocation>,
pub engine_notes: SmallVec<[EngineNote; 2]>,
pub path_hash: u64,
pub finding_id: String,
pub alternative_finding_ids: SmallVec<[String; 2]>,
pub effective_sink_caps: crate::labels::Cap,
}
impl Finding {
pub fn with_note(mut self, note: EngineNote) -> Self {
crate::engine_notes::push_unique(&mut self.engine_notes, note);
self
}
pub fn merge_note(&mut self, note: EngineNote) {
crate::engine_notes::push_unique(&mut self.engine_notes, note);
}
}
fn compute_module_aliases_for_summary(
ssa: &crate::ssa::SsaBody,
lang: Lang,
) -> std::collections::HashMap<crate::ssa::SsaValue, smallvec::SmallVec<[String; 2]>> {
if !matches!(lang, Lang::JavaScript | Lang::TypeScript) {
return std::collections::HashMap::new();
}
let cp = crate::ssa::const_prop::const_propagate(ssa);
crate::ssa::const_prop::collect_module_aliases(ssa, &cp.values)
}
pub fn analyse_file(
file_cfg: &FileCfg,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
caller_lang: Lang,
caller_namespace: &str,
interop_edges: &[InteropEdge],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
) -> Vec<Finding> {
ssa_transfer::reset_path_safe_suppressed_spans();
ssa_transfer::reset_all_validated_spans();
let (ssa_summaries, callee_bodies) = lower_all_functions_from_bodies(
file_cfg,
caller_lang,
caller_namespace,
local_summaries,
global_summaries,
None,
);
analyse_file_with_lowered(
file_cfg,
local_summaries,
global_summaries,
caller_lang,
caller_namespace,
interop_edges,
extra_labels,
&ssa_summaries,
&callee_bodies,
)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn analyse_file_with_lowered(
file_cfg: &FileCfg,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
caller_lang: Lang,
caller_namespace: &str,
interop_edges: &[InteropEdge],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
ssa_summaries: &std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
callee_bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
) -> Vec<Finding> {
let _span = tracing::debug_span!("taint_analyse_file").entered();
let ssa_sums_ref = if ssa_summaries.is_empty() {
None
} else {
Some(ssa_summaries)
};
let context_sensitive = crate::utils::analysis_options::current().context_sensitive;
let inline_cache = std::cell::RefCell::new(std::collections::HashMap::new());
let callee_bodies_ref = if context_sensitive && !callee_bodies.is_empty() {
Some(callee_bodies)
} else {
None
};
let inline_cache_ref = if context_sensitive {
Some(&inline_cache)
} else {
None
};
let max_iterations = if matches!(caller_lang, Lang::JavaScript | Lang::TypeScript) {
js_ts_pass2_cap()
} else {
1
};
LAST_JS_TS_PASS2_ITERATIONS.store(0, Ordering::Relaxed);
let import_bindings_ref = if file_cfg.import_bindings.is_empty() {
None
} else {
Some(&file_cfg.import_bindings)
};
let cross_file_bodies_ref = global_summaries.and_then(|gs| gs.bodies_by_key());
if let Some(map) = cross_file_bodies_ref {
tracing::debug!(
cross_file_bodies = map.len(),
file = %caller_namespace,
"taint: cross-file bodies available for pass 2"
);
}
let mut all_findings = analyse_multi_body(
file_cfg,
caller_lang,
caller_namespace,
local_summaries,
global_summaries,
interop_edges,
extra_labels,
ssa_sums_ref,
callee_bodies_ref,
inline_cache_ref,
max_iterations,
import_bindings_ref,
cross_file_bodies_ref,
);
all_findings.sort_by_key(|f| {
(
f.body_id.0,
f.sink.index(),
f.source.index(),
!f.path_validated,
f.path_hash,
)
});
all_findings.dedup_by_key(|f| (f.body_id, f.sink, f.source, f.path_validated, f.path_hash));
for f in &mut all_findings {
f.finding_id = make_finding_id(f);
}
link_alternative_paths(&mut all_findings);
all_findings
}
fn make_finding_id(f: &Finding) -> String {
format!(
"taint-{}-{}-{}-{:016x}-{}",
f.body_id.0,
f.source.index(),
f.sink.index(),
f.path_hash,
if f.path_validated { 'v' } else { 'u' },
)
}
fn link_alternative_paths(findings: &mut [Finding]) {
let mut groups: HashMap<(BodyId, NodeIndex, NodeIndex), Vec<usize>> = HashMap::new();
for (idx, f) in findings.iter().enumerate() {
groups
.entry((f.body_id, f.sink, f.source))
.or_default()
.push(idx);
}
for (_, members) in groups {
if members.len() < 2 {
continue;
}
let ids: Vec<String> = members
.iter()
.map(|&i| findings[i].finding_id.clone())
.collect();
for &member_idx in &members {
let own_id = findings[member_idx].finding_id.clone();
findings[member_idx].alternative_finding_ids.clear();
findings[member_idx]
.alternative_finding_ids
.extend(ids.iter().filter(|id| **id != own_id).cloned());
}
}
}
fn containment_order(bodies: &[BodyCfg]) -> Vec<usize> {
let mut children: HashMap<BodyId, Vec<usize>> = HashMap::new();
let mut roots: Vec<usize> = Vec::new();
for (i, body) in bodies.iter().enumerate() {
match body.meta.parent_body_id {
Some(parent) => children.entry(parent).or_default().push(i),
None => roots.push(i),
}
}
let mut order = Vec::with_capacity(bodies.len());
let mut queue: VecDeque<usize> = roots.into();
while let Some(idx) = queue.pop_front() {
order.push(idx);
if let Some(kids) = children.get(&bodies[idx].meta.id) {
queue.extend(kids);
}
}
order
}
fn analyse_body_with_seed(
body: &BodyCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
interop_edges: &[InteropEdge],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
ssa_summaries: Option<
&std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
>,
callee_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
inline_cache: Option<&std::cell::RefCell<ssa_transfer::InlineCache>>,
seed: Option<&HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
import_bindings: Option<&crate::cfg::ImportBindings>,
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
) -> (
Vec<Finding>,
Option<HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>>,
) {
let cfg = &body.graph;
let entry = body.entry;
let body_id = body.meta.id;
let interner = SymbolInterner::from_cfg(cfg);
if interner.len() > MAX_TRACKED_VARS {
tracing::warn!(
symbols = interner.len(),
max = MAX_TRACKED_VARS,
"taint analysis: too many variables, some will be ignored"
);
}
let is_toplevel = body.meta.parent_body_id.is_none();
let has_nonempty_seed = seed.is_some_and(|s| !s.is_empty());
let is_java_lambda =
lang == Lang::Java && body.meta.kind == crate::cfg::BodyKind::AnonymousFunction;
let use_scoped_lowering = !is_toplevel
&& (matches!(lang, Lang::JavaScript | Lang::TypeScript)
|| has_nonempty_seed
|| is_java_lambda);
let ssa_result = if use_scoped_lowering {
let func_name = body.meta.name.clone().unwrap_or_else(|| {
body.meta
.func_key
.as_ref()
.and_then(|k| k.disambig.map(|d| format!("<anon#{d}>")))
.unwrap_or_else(|| format!("<anon@{}>", body.meta.span.0))
});
crate::ssa::lower_to_ssa_with_params(cfg, entry, Some(&func_name), false, &body.meta.params)
} else {
crate::ssa::lower_to_ssa(cfg, entry, None, true)
};
ssa_transfer::reset_body_engine_notes();
match ssa_result {
Ok(mut ssa_body) => {
let opt = crate::ssa::optimize_ssa_with_param_types(
&mut ssa_body,
cfg,
Some(lang),
&body.meta.param_types,
);
if tracing::enabled!(tracing::Level::TRACE) {
tracing::trace!(
func = body.meta.name.as_deref().unwrap_or("<anon>"),
ssa = %ssa_body,
"SSA body lowered",
);
for block in &ssa_body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let Some(t) = opt.type_facts.get_type(inst.value) {
tracing::trace!(value = inst.value.0, ty = ?t, "type fact");
}
}
}
}
let dynamic_pts = std::cell::RefCell::new(std::collections::HashMap::new());
let static_map =
crate::ssa::static_map::analyze(&ssa_body, cfg, Some(lang), &opt.const_values);
let static_map_opt = if static_map.is_empty() {
None
} else {
Some(static_map)
};
let pointer_facts = if crate::pointer::is_enabled() {
Some(crate::pointer::analyse_body(&ssa_body, body.meta.id))
} else {
None
};
let transfer = ssa_transfer::SsaTaintTransfer {
lang,
namespace,
interner: &interner,
local_summaries,
global_summaries,
interop_edges,
owner_body_id: body.meta.id,
parent_body_id: body.meta.parent_body_id,
global_seed: seed,
param_seed: None,
receiver_seed: None,
const_values: Some(&opt.const_values),
type_facts: Some(&opt.type_facts),
ssa_summaries,
extra_labels,
base_aliases: Some(&opt.alias_result),
callee_bodies,
inline_cache,
context_depth: 0,
callback_bindings: None,
points_to: Some(&opt.points_to),
dynamic_pts: Some(&dynamic_pts),
import_bindings,
promisify_aliases: None,
module_aliases: if opt.module_aliases.is_empty() {
None
} else {
Some(&opt.module_aliases)
},
static_map: static_map_opt.as_ref(),
auto_seed_handler_params: matches!(lang, Lang::JavaScript | Lang::TypeScript)
|| (lang == Lang::Java
&& body.meta.kind == crate::cfg::BodyKind::AnonymousFunction),
cross_file_bodies,
pointer_facts: pointer_facts.as_ref(),
};
let (events, block_states) =
ssa_transfer::run_ssa_taint_full(&ssa_body, cfg, &transfer);
let mut findings = ssa_transfer::ssa_events_to_findings(&events, &ssa_body, cfg);
let body_notes = ssa_transfer::take_body_engine_notes();
for f in &mut findings {
f.body_id = body_id;
for note in &body_notes {
f.merge_note(note.clone());
}
}
if crate::symex::is_enabled() {
let symex_ctx = crate::symex::SymexContext {
ssa: &ssa_body,
cfg,
const_values: &opt.const_values,
type_facts: &opt.type_facts,
global_summaries,
lang,
namespace,
points_to: Some(&opt.points_to),
callee_bodies,
scc_membership: None,
cross_file_bodies: global_summaries,
};
crate::symex::annotate_findings(&mut findings, &symex_ctx);
}
if crate::utils::analysis_options::current().backwards_analysis {
let bctx = backwards::BackwardsCtx {
ssa: &ssa_body,
cfg,
lang,
global_summaries,
intra_file_bodies: callee_bodies,
depth_budget: backwards::DEFAULT_BACKWARDS_DEPTH,
};
for finding in &mut findings {
let Some(sink_val) = ssa_body.cfg_node_map.get(&finding.sink).copied() else {
continue;
};
let sink_caps = cfg[finding.sink].taint.labels.iter().fold(
crate::labels::Cap::empty(),
|acc, l| match l {
crate::labels::DataLabel::Sink(c) => acc | *c,
_ => acc,
},
);
let caps = if sink_caps.is_empty() {
crate::labels::Cap::all()
} else {
sink_caps
};
let flows =
backwards::analyse_sink_backwards(&bctx, sink_val, finding.sink, caps);
let verdict = backwards::aggregate_verdict(&flows);
backwards::annotate_finding(finding, verdict);
}
}
let exit_state = ssa_transfer::extract_ssa_exit_state(
&block_states,
&ssa_body,
cfg,
&transfer,
body_id,
);
(findings, Some(exit_state))
}
Err(e) => {
tracing::trace!(
body_id = body_id.0,
body_name = ?body.meta.name,
error = %e,
"SSA lowering bailed; emitting engine note",
);
ssa_transfer::record_engine_note(crate::engine_notes::EngineNote::SsaLoweringBailed {
reason: format!("{e}"),
});
let _ = ssa_transfer::take_body_engine_notes();
(Vec::new(), None)
}
}
}
fn analyse_multi_body(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
interop_edges: &[InteropEdge],
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
ssa_summaries: Option<
&std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
>,
callee_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
inline_cache: Option<&std::cell::RefCell<ssa_transfer::InlineCache>>,
max_iterations: usize,
import_bindings: Option<&crate::cfg::ImportBindings>,
cross_file_bodies: Option<&std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>>,
) -> Vec<Finding> {
let order = containment_order(&file_cfg.bodies);
let mut all_findings: Vec<Finding> = Vec::new();
let mut body_exit_states: HashMap<
BodyId,
HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
> = HashMap::new();
for &idx in &order {
let body = &file_cfg.bodies[idx];
let parent_seed = body
.meta
.parent_body_id
.and_then(|pid| body_exit_states.get(&pid));
let (findings, exit_state) = analyse_body_with_seed(
body,
lang,
namespace,
local_summaries,
global_summaries,
interop_edges,
extra_labels,
ssa_summaries,
callee_bodies,
inline_cache,
parent_seed,
import_bindings,
cross_file_bodies,
);
tracing::debug!(
body_id = body.meta.id.0,
body_name = ?body.meta.name,
findings = findings.len(),
graph_nodes = body.graph.node_count(),
has_seed = parent_seed.is_some(),
"analyse_multi_body: body analysed"
);
all_findings.extend(findings);
if let Some(es) = exit_state {
body_exit_states.insert(body.meta.id, es);
}
}
let mut converged_early = true;
let mut iters_used: usize = 0;
let mut convergence_trajectory: smallvec::SmallVec<[u32; 4]> = smallvec::SmallVec::new();
if max_iterations > 1 {
let top = file_cfg.toplevel();
let top_cfg = &top.graph;
let toplevel_keys: HashSet<ssa_transfer::BindingKey> = {
let mut keys = HashSet::new();
for (_idx, info) in top_cfg.node_references() {
if let Some(ref d) = info.taint.defines {
keys.insert(ssa_transfer::BindingKey::new(d.as_str(), BodyId(0)));
}
for u in &info.taint.uses {
keys.insert(ssa_transfer::BindingKey::new(u.as_str(), BodyId(0)));
}
}
keys
};
let body_reads: HashMap<BodyId, HashSet<String>> = {
let mut m: HashMap<BodyId, HashSet<String>> = HashMap::new();
for body in &file_cfg.bodies {
if body.meta.parent_body_id.is_none() {
continue; }
let mut names: HashSet<String> = HashSet::new();
for (_idx, info) in body.graph.node_references() {
for u in &info.taint.uses {
names.insert(u.to_string());
}
}
m.insert(body.meta.id, names);
}
m
};
let mut current_seed = body_exit_states
.get(&BodyId(0))
.cloned()
.unwrap_or_default();
let mut findings_by_body: HashMap<BodyId, Vec<Finding>> = HashMap::new();
let mut toplevel_findings: Vec<Finding> = Vec::new();
for f in std::mem::take(&mut all_findings) {
let body = file_cfg.bodies.get(f.body_id.0 as usize);
if body.is_some_and(|b| b.meta.parent_body_id.is_none()) {
toplevel_findings.push(f);
} else {
findings_by_body
.entry(BodyId(f.body_id.0))
.or_default()
.push(f);
}
}
let rounds = max_iterations.saturating_sub(1);
converged_early = rounds == 0;
let use_gauss_seidel = js_ts_gauss_seidel_enabled();
for round in 0..rounds {
iters_used = round + 1;
let mut combined_exit = current_seed.clone();
for &idx in &order {
let body = &file_cfg.bodies[idx];
if body.meta.parent_body_id.is_none() {
continue; }
if let Some(es) = body_exit_states.get(&body.meta.id) {
let filtered = ssa_transfer::filter_seed_to_toplevel(es, &toplevel_keys);
combined_exit = ssa_transfer::join_seed_maps(&combined_exit, &filtered);
}
}
let iter_delta = seed_delta_size(¤t_seed, &combined_exit);
if convergence_trajectory.len() == 4 {
convergence_trajectory.remove(0);
}
convergence_trajectory.push(iter_delta as u32);
if combined_exit == current_seed {
converged_early = true;
break;
}
let changed_names = changed_binding_names(¤t_seed, &combined_exit);
current_seed = combined_exit;
body_exit_states.insert(BodyId(0), current_seed.clone());
for &idx in &order {
let body = &file_cfg.bodies[idx];
if body.meta.parent_body_id.is_none() {
continue; }
if let Some(reads) = body_reads.get(&body.meta.id) {
if reads.is_disjoint(&changed_names) {
continue;
}
}
let parent_seed = body
.meta
.parent_body_id
.and_then(|pid| body_exit_states.get(&pid));
let (findings, exit_state) = analyse_body_with_seed(
body,
lang,
namespace,
local_summaries,
global_summaries,
interop_edges,
extra_labels,
ssa_summaries,
callee_bodies,
inline_cache,
parent_seed,
import_bindings,
cross_file_bodies,
);
findings_by_body.insert(body.meta.id, findings);
if let Some(es) = exit_state {
if use_gauss_seidel {
let filtered = ssa_transfer::filter_seed_to_toplevel(&es, &toplevel_keys);
current_seed = ssa_transfer::join_seed_maps(¤t_seed, &filtered);
body_exit_states.insert(BodyId(0), current_seed.clone());
}
body_exit_states.insert(body.meta.id, es);
}
}
}
all_findings = toplevel_findings;
for body in &file_cfg.bodies {
if body.meta.parent_body_id.is_none() {
continue;
}
if let Some(fs) = findings_by_body.remove(&body.meta.id) {
all_findings.extend(fs);
}
}
}
let reported_iters = if iters_used == 0 { 1 } else { iters_used };
LAST_JS_TS_PASS2_ITERATIONS.store(reported_iters, Ordering::Relaxed);
if max_iterations > 1 {
let non_toplevel_bodies = file_cfg
.bodies
.iter()
.filter(|b| b.meta.parent_body_id.is_some())
.count();
crate::convergence_telemetry::record(
crate::convergence_telemetry::ConvergenceEvent::InFilePass2(
crate::convergence_telemetry::InFilePass2Record {
schema: crate::convergence_telemetry::SCHEMA_VERSION,
namespace: namespace.to_string(),
body_count: non_toplevel_bodies,
iterations: iters_used,
cap: max_iterations,
converged: converged_early,
trajectory: convergence_trajectory.clone(),
},
),
);
}
if max_iterations > 1 && !converged_early {
let reason = crate::engine_notes::CapHitReason::classify(&convergence_trajectory);
tracing::warn!(
file = %namespace,
iterations = iters_used,
cap = max_iterations,
reason = reason.tag(),
"JS/TS pass-2 in-file fixpoint did not converge within safety cap — \
results may be imprecise. This usually indicates a very deep chain \
of top-level bindings threaded through helper functions; please \
file a bug with a reproducer."
);
let note = EngineNote::InFileFixpointCapped {
iterations: iters_used as u32,
reason,
};
for f in &mut all_findings {
f.merge_note(note.clone());
}
}
all_findings
}
fn changed_binding_names(
before: &HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
after: &HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
) -> HashSet<String> {
let mut changed = HashSet::new();
for (k, v_after) in after {
match before.get(k) {
Some(v_before) if v_before == v_after => {}
_ => {
changed.insert(k.name.to_string());
}
}
}
for k in before.keys() {
if !after.contains_key(k) {
changed.insert(k.name.to_string());
}
}
changed
}
fn seed_delta_size(
before: &HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
after: &HashMap<ssa_transfer::BindingKey, crate::taint::domain::VarTaint>,
) -> usize {
let mut changed = 0usize;
for (k, v_after) in after {
match before.get(k) {
Some(v_before) if v_before == v_after => {}
_ => changed += 1,
}
}
for k in before.keys() {
if !after.contains_key(k) {
changed += 1;
}
}
changed
}
fn find_function_entries(cfg: &Cfg) -> Vec<(String, NodeIndex)> {
let mut seen = HashSet::new();
let mut entries = Vec::new();
for (idx, info) in cfg.node_references() {
if let Some(ref func_name) = info.ast.enclosing_func
&& seen.insert(func_name.clone())
{
entries.push((func_name.clone(), idx));
}
}
entries
}
fn lookup_formal_params(local_summaries: &FuncSummaries, func_name: &str) -> Vec<String> {
local_summaries
.iter()
.find(|(k, _)| k.name == func_name)
.map(|(_, s)| s.param_names.clone())
.unwrap_or_default()
}
fn lookup_canonical_func_key(
local_summaries: &FuncSummaries,
lang: Lang,
namespace: &str,
func_name: &str,
param_count: usize,
) -> FuncKey {
let mut matches = local_summaries
.keys()
.filter(|k| k.lang == lang && k.name == func_name && k.arity == Some(param_count));
let first = matches.next().cloned();
if let Some(first) = first
&& matches.next().is_none()
{
return first;
}
if let Some(name_only) = local_summaries
.keys()
.find(|k| k.lang == lang && k.name == func_name)
{
return name_only.clone();
}
FuncKey {
lang,
namespace: namespace.to_string(),
container: String::new(),
name: func_name.to_string(),
arity: Some(param_count),
disambig: None,
kind: FuncKind::Function,
}
}
#[allow(dead_code)] pub(crate) fn extract_intra_file_ssa_summaries(
cfg: &Cfg,
interner: &SymbolInterner,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
) -> std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary> {
let func_entries = find_function_entries(cfg);
let mut summaries = std::collections::HashMap::new();
for (func_name, func_entry) in &func_entries {
let formal_params = lookup_formal_params(local_summaries, func_name);
let func_ssa = match crate::ssa::lower_to_ssa_with_params(
cfg,
*func_entry,
Some(func_name),
false,
&formal_params,
) {
Ok(ssa) => ssa,
Err(_) => continue,
};
let param_count = if !formal_params.is_empty() {
formal_params.len()
} else {
func_ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count()
};
let mod_aliases = compute_module_aliases_for_summary(&func_ssa, lang);
let mod_aliases_ref = if mod_aliases.is_empty() {
None
} else {
Some(&mod_aliases)
};
let summary = ssa_transfer::extract_ssa_func_summary(
&func_ssa,
cfg,
local_summaries,
global_summaries,
lang,
namespace,
interner,
param_count,
mod_aliases_ref,
None,
Some(&formal_params),
None,
None,
);
if !summary.param_to_return.is_empty()
|| !summary.param_to_sink.is_empty()
|| !summary.source_caps.is_empty()
|| !summary.param_container_to_return.is_empty()
|| !summary.param_to_container_store.is_empty()
|| summary.return_abstract.is_some()
|| !summary.points_to.is_empty()
{
let key =
lookup_canonical_func_key(local_summaries, lang, namespace, func_name, param_count);
summaries.insert(key, summary);
}
}
if !summaries.is_empty() {
tracing::debug!(
count = summaries.len(),
"SSA summary extraction: produced intra-file summaries"
);
}
summaries
}
pub(crate) fn lower_all_functions_from_bodies(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
) -> (
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
) {
let mut summaries = std::collections::HashMap::new();
let mut bodies = std::collections::HashMap::new();
for body in file_cfg.function_bodies() {
let _t_misc = std::time::Instant::now();
let func_name = body.meta.name.clone().unwrap_or_else(|| {
body.meta
.func_key
.as_ref()
.and_then(|k| k.disambig.map(|d| format!("<anon#{d}>")))
.unwrap_or_else(|| format!("<anon@{}>", body.meta.span.0))
});
let interner = SymbolInterner::from_cfg(&body.graph);
let formal_params = &body.meta.params;
perf_lower_record(6, _t_misc.elapsed().as_micros());
let _t_lower = std::time::Instant::now();
let mut func_ssa = match crate::ssa::lower_to_ssa_with_params(
&body.graph,
body.entry,
Some(&func_name),
false,
formal_params,
) {
Ok(ssa) => ssa,
Err(_) => continue,
};
perf_lower_record(0, _t_lower.elapsed().as_micros());
let param_count = if !formal_params.is_empty() {
formal_params.len()
} else {
func_ssa
.blocks
.iter()
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
.filter(|i| matches!(i.op, crate::ssa::ir::SsaOp::Param { .. }))
.count()
};
let mut key = body.meta.func_key.clone().unwrap_or_else(|| {
lookup_canonical_func_key(local_summaries, lang, namespace, &func_name, param_count)
});
key.namespace = namespace.to_string();
{
let _t_extract = std::time::Instant::now();
let mod_aliases = compute_module_aliases_for_summary(&func_ssa, lang);
let mod_aliases_ref = if mod_aliases.is_empty() {
None
} else {
Some(&mod_aliases)
};
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
Some(body.meta.param_destructured_fields.as_slice())
} else {
None
};
let param_types_ref = if !body.meta.param_types.is_empty() {
Some(body.meta.param_types.as_slice())
} else {
None
};
let summary = ssa_transfer::extract_ssa_func_summary(
&func_ssa,
&body.graph,
local_summaries,
global_summaries,
lang,
namespace,
&interner,
param_count,
mod_aliases_ref,
locator,
Some(formal_params),
formal_destructured,
param_types_ref,
);
if param_count > 0 || summary.points_to.returns_fresh_alloc {
summaries.insert(key.clone(), summary);
}
perf_lower_record(1, _t_extract.elapsed().as_micros());
}
let _t_opt = std::time::Instant::now();
let opt = crate::ssa::optimize_ssa_with_param_types(
&mut func_ssa,
&body.graph,
Some(lang),
&body.meta.param_types,
);
perf_lower_record(2, _t_opt.elapsed().as_micros());
let _t_typed = std::time::Instant::now();
let typed_receivers = collect_typed_call_receivers(&func_ssa, &body.graph, &opt.type_facts);
if !typed_receivers.is_empty() {
let entry = summaries.entry(key.clone()).or_default();
entry.typed_call_receivers = typed_receivers;
}
if crate::pointer::is_enabled() {
let facts = crate::pointer::analyse_body(&func_ssa, body.meta.id);
let fpt = crate::pointer::extract_field_points_to(&func_ssa, &facts);
if !fpt.is_empty() {
let entry = summaries.entry(key.clone()).or_default();
entry.field_points_to = fpt;
}
}
perf_lower_record(3, _t_typed.elapsed().as_micros());
let _t_misc2 = std::time::Instant::now();
bodies.insert(
key,
ssa_transfer::CalleeSsaBody {
ssa: func_ssa,
opt,
param_count,
node_meta: std::collections::HashMap::new(),
body_graph: Some(body.graph.clone()),
},
);
perf_lower_record(6, _t_misc2.elapsed().as_micros());
}
let _t_aug = std::time::Instant::now();
augment_summaries_with_child_sinks(
file_cfg,
lang,
namespace,
local_summaries,
global_summaries,
&bodies,
&mut summaries,
);
perf_lower_record(4, _t_aug.elapsed().as_micros());
let _t_rerun = std::time::Instant::now();
rerun_extraction_with_augmented_summaries(
file_cfg,
lang,
namespace,
local_summaries,
global_summaries,
locator,
&bodies,
&mut summaries,
);
perf_lower_record(5, _t_rerun.elapsed().as_micros());
if !summaries.is_empty() {
tracing::debug!(
count = summaries.len(),
bodies = bodies.len(),
"lower_all_functions_from_bodies: produced summaries + cached bodies"
);
}
(summaries, bodies)
}
#[allow(clippy::too_many_arguments)]
fn rerun_extraction_with_augmented_summaries(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
summaries: &mut std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
) {
use crate::ssa::ir::SsaOp;
use crate::state::symbol::SymbolInterner;
if summaries.is_empty() {
return;
}
let augmented_snapshot: std::collections::HashMap<
FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
> = summaries.clone();
let in_file_names: std::collections::HashSet<&str> =
augmented_snapshot.keys().map(|k| k.name.as_str()).collect();
for body in file_cfg.function_bodies() {
let Some(parent_key) = body.meta.func_key.clone() else {
continue;
};
let mut key = parent_key;
key.namespace = namespace.to_string();
let Some(callee) = bodies.get(&key) else {
continue;
};
if callee.param_count == 0 {
continue;
}
let Some(parent_cfg) = callee.body_graph.as_ref() else {
continue;
};
let has_in_file_call = callee.ssa.blocks.iter().any(|b| {
b.body.iter().any(|inst| {
if let SsaOp::Call { callee: name, .. } = &inst.op {
in_file_names.contains(name.as_str())
} else {
false
}
})
});
if !has_in_file_call {
continue;
}
let interner = SymbolInterner::from_cfg(parent_cfg);
let mod_aliases = compute_module_aliases_for_summary(&callee.ssa, lang);
let mod_aliases_ref = if mod_aliases.is_empty() {
None
} else {
Some(&mod_aliases)
};
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
Some(body.meta.param_destructured_fields.as_slice())
} else {
None
};
let param_types_ref = if !body.meta.param_types.is_empty() {
Some(body.meta.param_types.as_slice())
} else {
None
};
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
&callee.ssa,
parent_cfg,
local_summaries,
global_summaries,
lang,
namespace,
&interner,
callee.param_count,
mod_aliases_ref,
locator,
Some(&body.meta.params),
Some(&augmented_snapshot),
formal_destructured,
param_types_ref,
);
let entry = summaries.entry(key).or_default();
merge_sink_fields(entry, &new_summary);
}
}
fn merge_sink_fields(
dst: &mut crate::summary::ssa_summary::SsaFuncSummary,
src: &crate::summary::ssa_summary::SsaFuncSummary,
) {
for (idx, sites) in &src.param_to_sink {
if let Some((_, dst_sites)) = dst.param_to_sink.iter_mut().find(|(i, _)| i == idx) {
for site in sites {
let key = site.dedup_key();
if !dst_sites.iter().any(|s| s.dedup_key() == key) {
dst_sites.push(site.clone());
}
}
} else {
dst.param_to_sink.push((*idx, sites.clone()));
}
}
for &(idx, pos, caps) in &src.param_to_sink_param {
if !dst
.param_to_sink_param
.iter()
.any(|(i, p, c)| *i == idx && *p == pos && *c == caps)
{
dst.param_to_sink_param.push((idx, pos, caps));
}
}
for &idx in &src.validated_params_to_return {
if !dst.validated_params_to_return.contains(&idx) {
dst.validated_params_to_return.push(idx);
}
}
}
fn augment_summaries_with_child_sinks(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
bodies: &std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
summaries: &mut std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
) {
use crate::cfg::BodyId;
use crate::labels::{Cap, SourceKind};
use crate::summary::SinkSite;
use crate::taint::domain::{TaintOrigin, VarTaint};
use ssa_transfer::BindingKey;
let body_id_to_idx: std::collections::HashMap<BodyId, usize> = file_cfg
.bodies
.iter()
.enumerate()
.map(|(i, b)| (b.meta.id, i))
.collect();
let mut descendants: std::collections::HashMap<BodyId, Vec<usize>> =
std::collections::HashMap::new();
for (idx, body) in file_cfg.bodies.iter().enumerate() {
let mut cur = body.meta.parent_body_id;
while let Some(pid) = cur {
descendants.entry(pid).or_default().push(idx);
cur = body_id_to_idx
.get(&pid)
.and_then(|i| file_cfg.bodies[*i].meta.parent_body_id);
}
}
for parent_body in &file_cfg.bodies {
let Some(parent_key) = parent_body.meta.func_key.clone() else {
continue;
};
let mut parent_key = parent_key;
parent_key.namespace = namespace.to_string();
let Some(parent_callee) = bodies.get(&parent_key) else {
continue;
};
if parent_callee.param_count == 0 {
continue;
}
let Some(child_indices) = descendants.get(&parent_body.meta.id) else {
continue;
};
if child_indices.is_empty() {
continue;
}
let parent_ssa = &parent_callee.ssa;
let parent_cfg = match parent_callee.body_graph.as_ref() {
Some(g) => g,
None => continue,
};
let parent_interner = crate::state::symbol::SymbolInterner::from_cfg(parent_cfg);
let mut parent_param_info: Vec<(usize, String)> = Vec::new();
for block in &parent_ssa.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let crate::ssa::ir::SsaOp::Param { index } = &inst.op {
if *index < parent_callee.param_count {
if let Some(name) = inst.var_name.as_ref() {
parent_param_info.push((*index, name.clone()));
}
}
}
}
}
for (param_idx, param_name) in &parent_param_info {
let mut seed: std::collections::HashMap<BindingKey, VarTaint> =
std::collections::HashMap::new();
seed.insert(
BindingKey::new(param_name.as_str(), BodyId(0)),
VarTaint {
caps: Cap::all(),
origins: smallvec::SmallVec::from_elem(
TaintOrigin {
node: petgraph::graph::NodeIndex::new(0),
source_kind: SourceKind::UserInput,
source_span: None,
},
1,
),
uses_summary: false,
},
);
let parent_transfer = ssa_transfer::SsaTaintTransfer {
lang,
namespace,
interner: &parent_interner,
local_summaries,
global_summaries,
interop_edges: &[],
owner_body_id: BodyId(0),
parent_body_id: None,
global_seed: Some(&seed),
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: Some(summaries),
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let (_parent_events, parent_block_states) =
ssa_transfer::run_ssa_taint_full(parent_ssa, parent_cfg, &parent_transfer);
let parent_exit = ssa_transfer::extract_ssa_exit_state(
&parent_block_states,
parent_ssa,
parent_cfg,
&parent_transfer,
BodyId(0),
);
if parent_exit.is_empty() {
continue;
}
for &child_idx in child_indices {
let child_body = &file_cfg.bodies[child_idx];
let Some(child_key) = child_body.meta.func_key.clone() else {
continue;
};
let mut child_key = child_key;
child_key.namespace = namespace.to_string();
let Some(child_callee) = bodies.get(&child_key) else {
continue;
};
let child_ssa = &child_callee.ssa;
let Some(child_cfg) = child_callee.body_graph.as_ref() else {
continue;
};
let child_interner = crate::state::symbol::SymbolInterner::from_cfg(child_cfg);
let child_transfer = ssa_transfer::SsaTaintTransfer {
lang,
namespace,
interner: &child_interner,
local_summaries,
global_summaries,
interop_edges: &[],
owner_body_id: BodyId(0),
parent_body_id: None,
global_seed: Some(&parent_exit),
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
ssa_summaries: Some(summaries),
extra_labels: None,
base_aliases: None,
callee_bodies: None,
inline_cache: None,
context_depth: 0,
callback_bindings: None,
points_to: None,
dynamic_pts: None,
import_bindings: None,
promisify_aliases: None,
module_aliases: None,
static_map: None,
auto_seed_handler_params: false,
cross_file_bodies: None,
pointer_facts: None,
};
let (child_events, _child_block_states) =
ssa_transfer::run_ssa_taint_full(child_ssa, child_cfg, &child_transfer);
if child_events.is_empty() {
continue;
}
let mut union_caps = Cap::empty();
for ev in &child_events {
union_caps |= ev.sink_caps;
}
if union_caps.is_empty() {
continue;
}
let entry = summaries.entry(parent_key.clone()).or_default();
let new_site = SinkSite::cap_only(union_caps);
let new_key = new_site.dedup_key();
if let Some((_, sites)) = entry
.param_to_sink
.iter_mut()
.find(|(i, _)| *i == *param_idx)
{
if !sites.iter().any(|s| s.dedup_key() == new_key) {
sites.push(new_site);
}
} else {
entry
.param_to_sink
.push((*param_idx, smallvec::smallvec![new_site]));
}
if !entry
.param_to_sink_param
.iter()
.any(|(i, _, c)| *i == *param_idx && *c == union_caps)
{
entry.param_to_sink_param.push((*param_idx, 0, union_caps));
}
}
}
}
}
fn collect_typed_call_receivers(
ssa: &crate::ssa::ir::SsaBody,
cfg: &crate::cfg::Cfg,
type_facts: &crate::ssa::type_facts::TypeFactResult,
) -> Vec<(u32, String)> {
use crate::ssa::ir::SsaOp;
let mut out: Vec<(u32, String)> = Vec::new();
let mut seen: std::collections::HashSet<u32> = std::collections::HashSet::new();
for block in &ssa.blocks {
for inst in block.body.iter() {
let SsaOp::Call { receiver, .. } = &inst.op else {
continue;
};
let Some(receiver_val) = receiver else {
continue; };
let Some(kind) = type_facts.get_type(*receiver_val) else {
continue; };
let Some(container) = kind.container_name() else {
continue; };
let Some(node_info) = cfg.node_weight(inst.cfg_node) else {
continue;
};
let ordinal = node_info.call.call_ordinal;
if !seen.insert(ordinal) {
continue;
}
out.push((ordinal, container));
}
}
out.sort_by_key(|(ord, _)| *ord);
out
}
const MAX_CROSS_FILE_BODY_BLOCKS: usize = 100;
type SsaArtifactSummaries =
std::collections::HashMap<FuncKey, crate::summary::ssa_summary::SsaFuncSummary>;
type EligibleCalleeBodies = Vec<(FuncKey, ssa_transfer::CalleeSsaBody)>;
pub(crate) fn extract_ssa_artifacts_from_file_cfg(
file_cfg: &FileCfg,
lang: Lang,
namespace: &str,
local_summaries: &FuncSummaries,
global_summaries: Option<&GlobalSummaries>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
) -> (SsaArtifactSummaries, EligibleCalleeBodies) {
let (summaries, bodies) = lower_all_functions_from_bodies(
file_cfg,
lang,
namespace,
local_summaries,
global_summaries,
locator,
);
let eligible_bodies = build_eligible_bodies(file_cfg, bodies);
(summaries, eligible_bodies)
}
pub(crate) fn build_eligible_bodies(
file_cfg: &FileCfg,
bodies: std::collections::HashMap<FuncKey, ssa_transfer::CalleeSsaBody>,
) -> EligibleCalleeBodies {
let mut eligible_bodies = Vec::new();
if crate::symex::cross_file_symex_enabled() {
for (key, mut body) in bodies {
if body.ssa.blocks.len() > MAX_CROSS_FILE_BODY_BLOCKS {
continue;
}
let Some(body_cfg) = file_cfg.bodies.iter().find(|b| {
b.meta.func_key.as_ref().is_some_and(|k| {
k.lang == key.lang
&& k.container == key.container
&& k.name == key.name
&& k.arity == key.arity
&& k.disambig == key.disambig
&& k.kind == key.kind
})
}) else {
continue;
};
if !ssa_transfer::populate_node_meta(&mut body, &body_cfg.graph) {
continue;
}
eligible_bodies.push((key, body));
}
}
eligible_bodies
}
#[cfg(test)]
mod tests;