use std::collections::BTreeMap;
use crate::expr::Expr;
use crate::flow::{StringShape, TaintCleanser, TaintKind, ValueFlow};
use crate::stmt::Statement;
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct FlowEnv {
map: BTreeMap<String, ValueFlow>,
}
impl FlowEnv {
#[must_use]
pub fn get(&self, name: &str) -> Option<&ValueFlow> {
self.map.get(&name.to_ascii_uppercase())
}
pub fn iter_names(&self) -> impl Iterator<Item = String> + '_ {
self.map.keys().cloned()
}
pub fn iter(&self) -> impl Iterator<Item = (&str, &ValueFlow)> + '_ {
self.map.iter().map(|(name, flow)| (name.as_str(), flow))
}
#[must_use]
pub fn len(&self) -> usize {
self.map.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
fn merge_into(&mut self, name: &str, flow: ValueFlow) {
let key = name.to_ascii_uppercase();
let entry = self.map.entry(key).or_default();
for k in flow.taint.kinds {
if !entry.taint.kinds.contains(&k) {
entry.taint.kinds.push(k);
}
}
for c in flow.taint.cleansed_by {
if !entry.taint.cleansed_by.contains(&c) {
entry.taint.cleansed_by.push(c);
}
}
let prev = std::mem::take(&mut entry.value_set);
entry.value_set = prev.join(flow.value_set);
if entry.constant != flow.constant {
entry.constant = None;
}
if entry.string_shape != flow.string_shape {
entry.string_shape = flow.string_shape.or(entry.string_shape.take());
}
}
}
#[derive(Clone, Debug, Default)]
pub struct TaintSources {
pub user_input_names: Vec<String>,
pub bind_names: Vec<String>,
}
#[must_use]
pub fn analyze_flow(stmts: &[Statement], sources: &TaintSources) -> FlowEnv {
analyze_flow_bounded(stmts, sources).0
}
#[must_use]
pub fn analyze_flow_bounded(
stmts: &[Statement],
sources: &TaintSources,
) -> (FlowEnv, crate::RecursionOutcome) {
let mut env = FlowEnv::default();
let mut outcome = crate::RecursionOutcome::default();
const MAX_PASSES: usize = 64;
for _ in 0..MAX_PASSES {
let before = env.clone();
let mut pass_outcome = crate::RecursionOutcome::default();
walk(stmts, sources, &mut env, 0, &mut pass_outcome);
outcome.absorb(pass_outcome);
if env == before {
break;
}
}
(env, outcome)
}
fn walk(
stmts: &[Statement],
sources: &TaintSources,
env: &mut FlowEnv,
depth: usize,
outcome: &mut crate::RecursionOutcome,
) {
macro_rules! recurse_body {
($text:expr) => {{
if depth + 1 >= crate::MAX_RELOWER_DEPTH {
outcome.note_truncated();
} else {
let lowered = crate::lower_statement_body($text);
walk(&lowered, sources, env, depth + 1, outcome);
}
}};
}
for s in stmts {
match s {
Statement::Assignment { target, rhs_text } => {
let rhs_expr = crate::expr::lower_expression(rhs_text);
let flow = expr_flow(&rhs_expr, sources, env);
env.merge_into(target, flow);
}
Statement::If {
arms,
else_body_text,
} => {
for arm in arms {
recurse_body!(&arm.body_text);
}
if let Some(eb) = else_body_text {
recurse_body!(eb);
}
}
Statement::ForLoop { body_text, .. }
| Statement::WhileLoop { body_text, .. }
| Statement::BareLoop { body_text } => {
recurse_body!(body_text);
}
Statement::NestedBlock { body_text } => {
let inner = crate::calls::strip_block_wrapper(body_text);
if inner != body_text.as_str() {
recurse_body!(inner);
}
}
_ => {}
}
}
}
fn expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv) -> ValueFlow {
let mut flow = ValueFlow::default();
collect_expr_flow(expr, sources, env, &mut flow);
flow
}
fn is_dbms_assert_sanitizer(path: &str) -> bool {
const VALIDATORS: &[&str] = &[
"SIMPLE_SQL_NAME",
"QUALIFIED_SQL_NAME",
"SCHEMA_NAME",
"ENQUOTE_NAME",
"SQL_OBJECT_NAME",
"ENQUOTE_LITERAL",
];
let segs: Vec<&str> = path.split('.').collect();
match segs.as_slice() {
[.., "DBMS_ASSERT", func] => VALIDATORS.contains(func),
_ => false,
}
}
fn collect_expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv, flow: &mut ValueFlow) {
match expr {
Expr::Name(n) => {
let head = n.parts.first().map(String::as_str).unwrap_or_default();
if sources
.user_input_names
.iter()
.any(|s| s.eq_ignore_ascii_case(head))
&& !flow.taint.kinds.contains(&TaintKind::UserInput)
{
flow.taint.kinds.push(TaintKind::UserInput);
}
if sources
.bind_names
.iter()
.any(|s| s.eq_ignore_ascii_case(head))
&& !flow.taint.kinds.contains(&TaintKind::BindVariable)
{
flow.taint.kinds.push(TaintKind::BindVariable);
}
if let Some(prev) = env.get(head) {
for k in &prev.taint.kinds {
if !flow.taint.kinds.contains(k) {
flow.taint.kinds.push(*k);
}
}
for c in &prev.taint.cleansed_by {
if !flow.taint.cleansed_by.contains(c) {
flow.taint.cleansed_by.push(*c);
}
}
if flow.string_shape.is_none() {
flow.string_shape = prev.string_shape.clone();
}
}
}
Expr::BindRef(_) if !flow.taint.kinds.contains(&TaintKind::BindVariable) => {
flow.taint.kinds.push(TaintKind::BindVariable);
}
Expr::StringLit(s) if flow.string_shape.is_none() => {
flow.string_shape = Some(StringShape::Literal { value: s.clone() });
}
Expr::Call { callee, args } => {
let path = callee.parts.join(".").to_ascii_uppercase();
if is_dbms_assert_sanitizer(&path) {
let mut sanitized = ValueFlow::default();
for a in args {
collect_expr_flow(a, sources, env, &mut sanitized);
}
if !sanitized.taint.kinds.is_empty()
&& !flow.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert)
{
flow.taint.cleansed_by.push(TaintCleanser::DbmsAssert);
}
if flow.string_shape.is_none() {
flow.string_shape = sanitized.string_shape;
}
} else {
for a in args {
collect_expr_flow(a, sources, env, flow);
}
}
}
Expr::Binary { lhs, rhs, .. } => {
collect_expr_flow(lhs, sources, env, flow);
collect_expr_flow(rhs, sources, env, flow);
}
Expr::Unary { operand, .. } => collect_expr_flow(operand, sources, env, flow),
Expr::Raw { .. } => {
if !flow.taint.kinds.contains(&TaintKind::Unanalyzable) {
flow.taint.kinds.push(TaintKind::Unanalyzable);
}
if flow.string_shape.is_none() {
flow.string_shape = Some(StringShape::FullyOpaque);
}
}
_ => {}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lower_statement_body;
fn src(user: &[&str]) -> TaintSources {
TaintSources {
user_input_names: user.iter().map(|s| s.to_string()).collect(),
bind_names: vec![],
}
}
#[test]
fn assignment_from_constant_has_no_taint() {
let s = lower_statement_body("v_x := 42;");
let env = analyze_flow(&s, &src(&[]));
assert!(!env.get("v_x").unwrap().taint.flags_alarm());
}
#[test]
fn assignment_from_user_input_is_tainted() {
let s = lower_statement_body("v_sql := p_user_table;");
let env = analyze_flow(&s, &src(&["p_user_table"]));
let f = env.get("v_sql").unwrap();
assert!(f.taint.kinds.contains(&TaintKind::UserInput));
assert!(f.taint.flags_alarm());
}
#[test]
fn unlowerable_case_expression_rhs_fails_closed_as_unanalyzable() {
let s = lower_statement_body("v_sql := CASE WHEN cond THEN p_user ELSE 'x' END;");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").expect("v_sql flow recorded");
assert!(
f.taint.kinds.contains(&TaintKind::Unanalyzable),
"un-lowerable CASE RHS must be marked Unanalyzable: {:?}",
f.taint
);
assert!(f.taint.flags_alarm(), "fail closed: must raise the alarm");
assert!(
matches!(f.string_shape, Some(StringShape::FullyOpaque)),
"un-lowerable value must not be mistaken for a constant literal: {:?}",
f.string_shape
);
}
#[test]
fn dbms_assert_call_cleanses_its_argument() {
let s = lower_statement_body("v_safe := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user_table);");
let env = analyze_flow(&s, &src(&["p_user_table"]));
let f = env.get("v_safe").unwrap();
assert!(!f.taint.flags_alarm(), "sanitized value must not alarm");
assert!(
!f.taint.kinds.contains(&TaintKind::UserInput),
"the sanitizer consumes the argument's taint"
);
}
#[test]
fn dbms_assert_does_not_cleanse_a_concatenated_sibling() {
let s = lower_statement_body("v_sql := DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user;");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"the uncleansed sibling p_user must remain tainted"
);
assert!(
f.taint.cleansed_by.is_empty(),
"the sibling assert's cleanser must not leak onto the whole expression"
);
assert!(
f.taint.flags_alarm(),
"raw user input concatenated with a sanitized literal must still alarm"
);
}
#[test]
fn taint_flows_through_concatenation() {
let s = lower_statement_body("v_sql := 'SELECT * FROM ' || p_tab;");
let env = analyze_flow(&s, &src(&["p_tab"]));
assert!(
env.get("v_sql")
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput)
);
}
#[test]
fn bind_ref_is_bind_taint() {
let s = lower_statement_body("v_x := :1;");
let env = analyze_flow(&s, &src(&[]));
assert!(
env.get("v_x")
.unwrap()
.taint
.kinds
.contains(&TaintKind::BindVariable)
);
}
#[test]
fn string_literal_assignment_records_shape() {
let s = lower_statement_body("v_msg := 'hello';");
let env = analyze_flow(&s, &src(&[]));
let literal = env.get("v_msg").and_then(|flow| match &flow.string_shape {
Some(StringShape::Literal { value }) => Some(value.as_str()),
_ => None,
});
assert_eq!(literal, Some("hello"));
}
#[test]
fn if_branch_assignments_both_recorded() {
let s = lower_statement_body("IF flag THEN v_x := p_a; ELSE v_x := 0; END IF;");
let env = analyze_flow(&s, &src(&["p_a"]));
assert!(
env.get("v_x")
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput)
);
}
#[test]
fn loop_body_assignment_recorded() {
let s = lower_statement_body("FOR i IN 1..10 LOOP v_acc := v_acc + p_in; END LOOP;");
let env = analyze_flow(&s, &src(&["p_in"]));
assert!(
env.get("v_acc")
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput)
);
}
#[test]
fn untainted_name_not_flagged() {
let s = lower_statement_body("v_x := v_y + 1;");
let env = analyze_flow(&s, &src(&["p_user"]));
assert!(!env.get("v_x").unwrap().taint.flags_alarm());
}
#[test]
fn branch_merge_sibling_cleanse_does_not_mask_live_kind() {
let s = lower_statement_body(
"IF c THEN v := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); ELSE v := p_user; END IF;",
);
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"the uncleansed ELSE-path UserInput kind must survive the branch join"
);
assert!(
f.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
"the THEN-path cleanser is still recorded for reporting"
);
assert!(
f.taint.flags_alarm(),
"a sibling cleanse on one branch must NOT mask the live kind on the other"
);
}
#[test]
fn case_insensitive_source_match() {
let s = lower_statement_body("v_x := P_USER;");
let env = analyze_flow(&s, &src(&["p_user"]));
assert!(
env.get("V_X")
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput)
);
}
#[test]
fn empty_body_yields_empty_env() {
let env = analyze_flow(&[], &src(&[]));
assert!(env.is_empty());
}
#[test]
fn two_hop_local_laundering_propagates_taint() {
let s = lower_statement_body("v_tmp := p_user; v_sql := v_tmp;");
let env = analyze_flow(&s, &src(&["p_user"]));
assert!(
env.get("v_tmp")
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput),
"the first hop is tainted from the source"
);
let sql = env.get("v_sql").unwrap();
assert!(
sql.taint.kinds.contains(&TaintKind::UserInput),
"taint laundered through v_tmp must reach v_sql"
);
assert!(sql.taint.flags_alarm(), "the laundered value still alarms");
}
#[test]
fn n_hop_local_laundering_propagates_taint() {
let s = lower_statement_body("v_a := p_user; v_b := v_a; v_c := v_b;");
let env = analyze_flow(&s, &src(&["p_user"]));
for name in ["v_a", "v_b", "v_c"] {
assert!(
env.get(name)
.unwrap()
.taint
.kinds
.contains(&TaintKind::UserInput),
"{name} must be tainted along the laundering chain"
);
}
}
#[test]
fn cleansed_local_then_reused_stays_clean() {
let s =
lower_statement_body("v_tmp := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); v_sql := v_tmp;");
let env = analyze_flow(&s, &src(&["p_user"]));
let sql = env.get("v_sql").unwrap();
assert!(
!sql.taint.kinds.contains(&TaintKind::UserInput),
"a reused sanitised local carries no live taint"
);
assert!(
!sql.taint.flags_alarm(),
"reusing a sanitised value must not alarm"
);
assert!(
sql.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
"the cleanser is carried forward for reporting"
);
}
#[test]
fn taint_laundered_through_local_into_concatenation_alarms() {
let s = lower_statement_body("v_t := p_user; v_sql := 'SELECT * FROM ' || v_t;");
let env = analyze_flow(&s, &src(&["p_user"]));
let sql = env.get("v_sql").unwrap();
assert!(
sql.taint.kinds.contains(&TaintKind::UserInput),
"laundered taint concatenated into SQL must remain tainted"
);
assert!(sql.taint.flags_alarm());
}
#[test]
fn verb_prefixed_local_laundering_propagates_taint() {
let s = lower_statement_body("return_val := p_user; v_sql := return_val;");
let env = analyze_flow(&s, &src(&["p_user"]));
let rv = env
.get("return_val")
.expect("the verb-prefixed local must be recorded as an assignment");
assert!(
rv.taint.kinds.contains(&TaintKind::UserInput),
"return_val must inherit p_user's taint"
);
let sql = env.get("v_sql").unwrap();
assert!(
sql.taint.kinds.contains(&TaintKind::UserInput),
"taint laundered through the verb-prefixed local must reach v_sql"
);
assert!(sql.taint.flags_alarm());
}
#[test]
fn dbms_assert_noop_is_not_a_sanitizer() {
let s = lower_statement_body("v_sql := 'SELECT * FROM ' || DBMS_ASSERT.NOOP(p_user);");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"NOOP performs no validation; its argument's taint must survive"
);
assert!(
f.taint.flags_alarm(),
"user input wrapped in DBMS_ASSERT.NOOP must still alarm"
);
}
#[test]
fn dbms_assert_noop_direct_assignment_stays_tainted() {
let s = lower_statement_body("v_sql := DBMS_ASSERT.NOOP(p_user);");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"NOOP does not consume taint"
);
assert!(f.taint.flags_alarm());
}
#[test]
fn sys_prefixed_dbms_assert_sanitizer_cleanses() {
let s = lower_statement_body("v_safe := SYS.DBMS_ASSERT.SIMPLE_SQL_NAME(p_tab);");
let env = analyze_flow(&s, &src(&["p_tab"]));
let f = env.get("v_safe").unwrap();
assert!(
!f.taint.flags_alarm(),
"a schema-prefixed real sanitizer must still cleanse"
);
assert!(
!f.taint.kinds.contains(&TaintKind::UserInput),
"the sanitizer consumes the argument's taint"
);
}
#[test]
fn non_shrinking_for_update_does_not_stack_overflow_and_reports_limit() {
let stmts = vec![Statement::BareLoop {
body_text: "FOR UPDATE".to_string(),
}];
let (env, outcome) = analyze_flow_bounded(&stmts, &src(&[]));
assert!(
outcome.limit_hit,
"the non-shrinking `FOR UPDATE` BareLoop must trip the \
bounded depth cap, outcome={outcome:?}"
);
assert!(outcome.truncated_bodies >= 1);
assert!(env.is_empty());
let _ = analyze_flow(&stmts, &src(&[]));
}
#[test]
fn analyze_flow_over_lowered_for_update_terminates() {
let stmts = lower_statement_body("FOR UPDATE");
let env = analyze_flow(&stmts, &TaintSources::default());
let _ = env.is_empty();
}
#[test]
fn deep_nested_loop_chain_degrades_to_limit_not_overflow() {
const DEPTH: usize = 1_000;
const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
let mut body = String::with_capacity(DEPTH * 16 + 32);
for _ in 0..DEPTH {
body.push_str("LOOP ");
}
body.push_str("v_x := p_user; ");
for _ in 0..DEPTH {
body.push_str("END LOOP; ");
}
let stmts = lower_statement_body(&body);
let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
assert!(
outcome.limit_hit,
"a {DEPTH}-deep nested LOOP chain must trip the depth cap, \
outcome={outcome:?}"
);
}
#[test]
fn nested_begin_block_launders_taint_into_assignment() {
let s = lower_statement_body("BEGIN v_sql := p_user; END;");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env
.get("v_sql")
.expect("the nested-block assignment to v_sql must be recorded");
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"taint laundered through a BEGIN…END sub-block must reach v_sql"
);
assert!(f.taint.flags_alarm(), "the laundered value still alarms");
}
#[test]
fn nested_declare_block_launders_taint_into_assignment() {
let s = lower_statement_body("DECLARE v_x NUMBER; BEGIN v_sql := p_user; END;");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env
.get("v_sql")
.expect("the DECLARE-wrapped assignment to v_sql must be recorded");
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"taint laundered through a DECLARE…END sub-block must reach v_sql"
);
assert!(f.taint.flags_alarm());
}
#[test]
fn deep_nested_block_chain_degrades_to_limit_not_overflow() {
const DEPTH: usize = 1_000;
const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
let mut body = String::with_capacity(DEPTH * 12 + 32);
for _ in 0..DEPTH {
body.push_str("BEGIN ");
}
body.push_str("v_x := p_user; ");
for _ in 0..DEPTH {
body.push_str("END; ");
}
let stmts = lower_statement_body(&body);
let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
assert!(
outcome.limit_hit,
"a {DEPTH}-deep nested BEGIN chain must trip the depth cap, \
outcome={outcome:?}"
);
}
#[test]
fn parenthesised_concat_operand_keeps_taint() {
let s = lower_statement_body("v_sql := 'SELECT * FROM ' || (p_user);");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"a parenthesised tainted operand must remain tainted"
);
assert!(f.taint.flags_alarm());
}
#[test]
fn whole_rhs_paren_group_keeps_taint() {
let s = lower_statement_body("v_sql := ('SELECT * FROM ' || p_user);");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(
f.taint.kinds.contains(&TaintKind::UserInput),
"a whole-RHS parenthesised group must preserve inner taint"
);
assert!(f.taint.flags_alarm());
}
#[test]
fn bare_paren_group_is_tainted_name() {
let s = lower_statement_body("v_sql := (p_user);");
let env = analyze_flow(&s, &src(&["p_user"]));
let f = env.get("v_sql").unwrap();
assert!(f.taint.kinds.contains(&TaintKind::UserInput));
assert!(f.taint.flags_alarm());
}
}