plsql_ir/flow_intra.rs
1//! Intra-procedural assignment + expression flow.
2//!
3//! Walks a lowered statement body and propagates [`ValueFlow`]
4//! facts (FLOW-001) through assignments and expressions inside a
5//! single routine. The pass is deliberately a *may*-analysis
6//! over a flat statement list: it does not model branch joins
7//! precisely (that needs a CFG, scheduled for a later pass) —
8//! it conservatively merges every assignment's RHS flow into the
9//! LHS via `ValueSet::join`, which is sound for taint /
10//! string-shape over-approximation.
11//!
12//! Taint is *use-def transitive*: an RHS that references a local
13//! already tainted earlier in the body inherits that taint, so
14//! laundering through intermediates (`v_tmp := p_user;
15//! v_sql := v_tmp;`) cannot escape the analysis. The walk is
16//! iterated to a fixpoint over the finite taint lattice so a name
17//! tainted only on a later pass (e.g. across a loop back-edge) is
18//! still captured.
19//!
20//! Outputs a `FlowEnv` mapping each assigned name to its
21//! accumulated `ValueFlow`. SAST consumes this to answer "does
22//! tainted input reach a dynamic-SQL sink without a cleanser?".
23//!
24//! ## /oracle evidence
25//!
26//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
27//! assignment + parameter-mode chapters define how a value
28//! enters / moves through a routine.
29//! * `LOW-LEVEL-CATALOGS.md` Supplied Package Buckets —
30//! `DBMS_ASSERT` is the cleanser that resets a name's taint.
31
32use std::collections::BTreeMap;
33
34use crate::expr::Expr;
35use crate::flow::{StringShape, TaintCleanser, TaintKind, ValueFlow};
36use crate::stmt::Statement;
37
38/// Per-routine flow environment: name (upper-cased) → flow.
39#[derive(Clone, Debug, Default, PartialEq, Eq)]
40pub struct FlowEnv {
41 map: BTreeMap<String, ValueFlow>,
42}
43
44impl FlowEnv {
45 #[must_use]
46 pub fn get(&self, name: &str) -> Option<&ValueFlow> {
47 self.map.get(&name.to_ascii_uppercase())
48 }
49
50 /// Iterate every name (upper-cased) the environment tracks.
51 /// Used by the FLOW-005 query facade to enumerate tainted
52 /// names without exposing the inner map.
53 pub fn iter_names(&self) -> impl Iterator<Item = String> + '_ {
54 self.map.keys().cloned()
55 }
56
57 /// Iterate every tracked name with its aggregate flow state.
58 /// Fact projection uses this to materialize the flow lattice into
59 /// normalized [`FactStore`](crate::FactStore) rows without exposing
60 /// mutation of the environment.
61 pub fn iter(&self) -> impl Iterator<Item = (&str, &ValueFlow)> + '_ {
62 self.map.iter().map(|(name, flow)| (name.as_str(), flow))
63 }
64
65 #[must_use]
66 pub fn len(&self) -> usize {
67 self.map.len()
68 }
69
70 #[must_use]
71 pub fn is_empty(&self) -> bool {
72 self.map.is_empty()
73 }
74
75 fn merge_into(&mut self, name: &str, flow: ValueFlow) {
76 let key = name.to_ascii_uppercase();
77 let entry = self.map.entry(key).or_default();
78 // Taint kinds accumulate (union) across the branch arms a may-analysis
79 // folds into one env. `cleansed_by` also accumulates, but ONLY for
80 // reporting: the alarm reads `kinds` (live, uncleansed taint), so a
81 // cleanser recorded on one arm cannot mask a live kind contributed by a
82 // sibling arm. (Under the former "tainted-but-cleansed" model this union
83 // was a fail-open at branch joins — oracle-qm3q.26; the live-kinds model
84 // from oracle-qm3q.1 makes the join sound without needing CFG-precise
85 // path-intersection of cleansers.)
86 for k in flow.taint.kinds {
87 if !entry.taint.kinds.contains(&k) {
88 entry.taint.kinds.push(k);
89 }
90 }
91 for c in flow.taint.cleansed_by {
92 if !entry.taint.cleansed_by.contains(&c) {
93 entry.taint.cleansed_by.push(c);
94 }
95 }
96 // Value set joins (lattice over-approx).
97 let prev = std::mem::take(&mut entry.value_set);
98 entry.value_set = prev.join(flow.value_set);
99 // Constant: if both sides agree keep it, else drop to None.
100 if entry.constant != flow.constant {
101 entry.constant = None;
102 }
103 // String shape: keep the more-specific one only if equal.
104 if entry.string_shape != flow.string_shape {
105 entry.string_shape = flow.string_shape.or(entry.string_shape.take());
106 }
107 }
108}
109
110/// Names referenced inside an expression that look like
111/// parameters/binds the caller flagged as tainted. The caller
112/// passes the set of tainted source names (e.g. public IN
113/// parameters); any reference to one taints the expression's
114/// flow with `UserInput`.
115#[derive(Clone, Debug, Default)]
116pub struct TaintSources {
117 pub user_input_names: Vec<String>,
118 pub bind_names: Vec<String>,
119}
120
121/// Run intra-procedural flow over `stmts`. `sources` declares
122/// which bare names are tainted on entry (public params, binds).
123///
124/// Taint propagates transitively through assignments: an RHS that
125/// references a previously-tainted *local* (`v_sql := v_tmp` after
126/// `v_tmp := p_user`) inherits that local's live taint, so
127/// multi-hop laundering through intermediate variables cannot
128/// escape the analysis. Because branches and loops can re-read a
129/// name that is only tainted on a later pass, `walk` is iterated to
130/// a fixpoint over the (finite) taint lattice before the env is
131/// returned.
132///
133/// Back-compat wrapper over [`analyze_flow_bounded`]: the per-pass
134/// re-lowering recursion is depth-guarded so a non-shrinking
135/// malformed body (e.g. the bare token `FOR UPDATE` that a
136/// `SELECT … FOR UPDATE;` fragment leaves behind, which classifies
137/// as a `BareLoop` whose `body_text` re-lowers to the *identical*
138/// `BareLoop`) terminates instead of overflowing the stack /
139/// aborting the process (R13). Callers that need to surface the
140/// typed degradation (`outcome.limit_hit`) should call
141/// [`analyze_flow_bounded`] directly.
142#[must_use]
143pub fn analyze_flow(stmts: &[Statement], sources: &TaintSources) -> FlowEnv {
144 analyze_flow_bounded(stmts, sources).0
145}
146
147/// Depth-bounded variant of [`analyze_flow`]. Returns the flow
148/// environment plus a [`RecursionOutcome`] recording whether (and
149/// how often) a nested re-lowered body was abandoned at the
150/// recursion-depth cap rather than walked unbounded. The caller is
151/// responsible for emitting an honest typed diagnostic when
152/// `outcome.limit_hit` (R13 — never silently truncate, never
153/// stack-overflow on a non-shrinking malformed slice).
154#[must_use]
155pub fn analyze_flow_bounded(
156 stmts: &[Statement],
157 sources: &TaintSources,
158) -> (FlowEnv, crate::RecursionOutcome) {
159 let mut env = FlowEnv::default();
160 let mut outcome = crate::RecursionOutcome::default();
161 // Iterate to a fixpoint: `merge_into` is monotone (it only ever
162 // unions kinds/cleansers and joins value-sets upward), so the
163 // finite lattice guarantees the env stops growing. The cap is a
164 // belt-and-suspenders bound (never expected to bind) so a
165 // pathological body can never spin forever.
166 const MAX_PASSES: usize = 64;
167 for _ in 0..MAX_PASSES {
168 let before = env.clone();
169 // Re-accumulate the truncation outcome each pass over a
170 // *fresh* outcome so the count reflects one pass, not the
171 // sum across passes; the env still folds monotonically.
172 let mut pass_outcome = crate::RecursionOutcome::default();
173 walk(stmts, sources, &mut env, 0, &mut pass_outcome);
174 outcome.absorb(pass_outcome);
175 if env == before {
176 break;
177 }
178 }
179 (env, outcome)
180}
181
182fn walk(
183 stmts: &[Statement],
184 sources: &TaintSources,
185 env: &mut FlowEnv,
186 depth: usize,
187 outcome: &mut crate::RecursionOutcome,
188) {
189 // Recurse into a re-lowered control-flow body only while we
190 // have depth budget left. At the cap we record the truncation
191 // and stop descending — never silently drop, never recurse
192 // unbounded (which stack-overflows on a non-shrinking malformed
193 // slice such as the bare `FOR UPDATE` token). Mirrors
194 // `calls.rs::walk_call_sites` / `dml_edges.rs::walk_table_accesses`.
195 macro_rules! recurse_body {
196 ($text:expr) => {{
197 if depth + 1 >= crate::MAX_RELOWER_DEPTH {
198 outcome.note_truncated();
199 } else {
200 let lowered = crate::lower_statement_body($text);
201 walk(&lowered, sources, env, depth + 1, outcome);
202 }
203 }};
204 }
205 for s in stmts {
206 match s {
207 Statement::Assignment { target, rhs_text } => {
208 let rhs_expr = crate::expr::lower_expression(rhs_text);
209 // Read the live env (use-def aware) so taint already
210 // accumulated on a referenced local flows into the RHS.
211 let flow = expr_flow(&rhs_expr, sources, env);
212 env.merge_into(target, flow);
213 }
214 Statement::If {
215 arms,
216 else_body_text,
217 } => {
218 for arm in arms {
219 recurse_body!(&arm.body_text);
220 }
221 if let Some(eb) = else_body_text {
222 recurse_body!(eb);
223 }
224 }
225 Statement::ForLoop { body_text, .. }
226 | Statement::WhileLoop { body_text, .. }
227 | Statement::BareLoop { body_text } => {
228 recurse_body!(body_text);
229 }
230 Statement::NestedBlock { body_text } => {
231 // Anonymous `BEGIN … END` / `DECLARE … END` sub-block: a
232 // value laundered through it (`BEGIN v_sql := p_user; END;`)
233 // must still taint `v_sql`, or the FLOW-001 pass fails open
234 // for that name and SEC001 misses the injection. Strip the
235 // wrapper and re-lower the inner statements, mirroring
236 // `calls.rs::walk_call_sites` / `dml_edges.rs`. Only recurse
237 // when the stripped slice differs from the original so the
238 // depth-guarded `recurse_body!` cannot spin on a non-stripping
239 // slice (the cap already bounds a non-shrinking one). A block
240 // with no strippable wrapper carries no recoverable
241 // assignment, so it is left untouched.
242 let inner = crate::calls::strip_block_wrapper(body_text);
243 if inner != body_text.as_str() {
244 recurse_body!(inner);
245 }
246 }
247 _ => {}
248 }
249 }
250}
251
252/// Compute the `ValueFlow` of an expression. Taint flows from any
253/// referenced source name OR any previously-tainted local recorded
254/// in `env` (use-def transitivity); a `DBMS_ASSERT.*` call cleanses.
255fn expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv) -> ValueFlow {
256 let mut flow = ValueFlow::default();
257 collect_expr_flow(expr, sources, env, &mut flow);
258 flow
259}
260
261/// Is `path` (an already-upper-cased dotted call path) a *validating*
262/// `DBMS_ASSERT` entry point — i.e. one that actually rejects unsafe input
263/// and so cleanses the taint of its argument?
264///
265/// Two prior gaps, both fixed here (oracle-rwjl.4):
266///
267/// 1. **`DBMS_ASSERT.NOOP` is NOT a sanitizer.** Oracle documents NOOP as an
268/// identity pass-through that performs no validation and returns its
269/// argument unchanged. The old `path.starts_with("DBMS_ASSERT.")` guard
270/// matched it uniformly, so `EXECUTE IMMEDIATE DBMS_ASSERT.NOOP(p_user)`
271/// was reported clean — a SQL-injection fail-open in the flagship SEC001
272/// rule. NOOP (and any unrecognized DBMS_ASSERT entry point) must fall
273/// through to the transparent branch so its argument's taint reaches the
274/// sink and still alarms.
275/// 2. **A schema prefix made a real sanitizer transparent.** `starts_with`
276/// failed to match `SYS.DBMS_ASSERT.SIMPLE_SQL_NAME(...)`, so a genuinely
277/// cleansed value over-reported. We now tolerate an optional leading
278/// schema segment.
279///
280/// The allowlist mirrors the validating set enumerated in
281/// `plsql-symbols/src/dynamic_sql.rs` (which lists NOOP separately, only for
282/// textual detection — never as a validator).
283fn is_dbms_assert_sanitizer(path: &str) -> bool {
284 const VALIDATORS: &[&str] = &[
285 "SIMPLE_SQL_NAME",
286 "QUALIFIED_SQL_NAME",
287 "SCHEMA_NAME",
288 "ENQUOTE_NAME",
289 "SQL_OBJECT_NAME",
290 "ENQUOTE_LITERAL",
291 ];
292 let segs: Vec<&str> = path.split('.').collect();
293 // Match `[schema.]DBMS_ASSERT.<fn>`: the trailing two segments must be
294 // `DBMS_ASSERT` then a validating function. NOOP (or any unknown entry
295 // point) deliberately fails this test and falls through to transparent.
296 match segs.as_slice() {
297 [.., "DBMS_ASSERT", func] => VALIDATORS.contains(func),
298 _ => false,
299 }
300}
301
302fn collect_expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv, flow: &mut ValueFlow) {
303 match expr {
304 Expr::Name(n) => {
305 let head = n.parts.first().map(String::as_str).unwrap_or_default();
306 if sources
307 .user_input_names
308 .iter()
309 .any(|s| s.eq_ignore_ascii_case(head))
310 && !flow.taint.kinds.contains(&TaintKind::UserInput)
311 {
312 flow.taint.kinds.push(TaintKind::UserInput);
313 }
314 if sources
315 .bind_names
316 .iter()
317 .any(|s| s.eq_ignore_ascii_case(head))
318 && !flow.taint.kinds.contains(&TaintKind::BindVariable)
319 {
320 flow.taint.kinds.push(TaintKind::BindVariable);
321 }
322 // Use-def transitivity: a reference to a previously-assigned
323 // local inherits that local's accumulated flow, so taint
324 // laundered through an intermediate variable
325 // (`v_tmp := p_user; v_sql := v_tmp;`) still reaches the sink.
326 // Only LIVE kinds carry the alarm; `cleansed_by` is unioned for
327 // reporting (a recorded cleanser never masks a live kind — see
328 // `flags_alarm`). String shape is preserved only when the parent
329 // has none yet.
330 if let Some(prev) = env.get(head) {
331 for k in &prev.taint.kinds {
332 if !flow.taint.kinds.contains(k) {
333 flow.taint.kinds.push(*k);
334 }
335 }
336 for c in &prev.taint.cleansed_by {
337 if !flow.taint.cleansed_by.contains(c) {
338 flow.taint.cleansed_by.push(*c);
339 }
340 }
341 if flow.string_shape.is_none() {
342 flow.string_shape = prev.string_shape.clone();
343 }
344 }
345 }
346 Expr::BindRef(_) if !flow.taint.kinds.contains(&TaintKind::BindVariable) => {
347 flow.taint.kinds.push(TaintKind::BindVariable);
348 }
349 Expr::StringLit(s) if flow.string_shape.is_none() => {
350 flow.string_shape = Some(StringShape::Literal { value: s.clone() });
351 }
352 Expr::Call { callee, args } => {
353 let path = callee.parts.join(".").to_ascii_uppercase();
354 if is_dbms_assert_sanitizer(&path) {
355 // A `DBMS_ASSERT.*` call SANITIZES its argument: the value it
356 // returns is safe to interpolate. The cleansing therefore binds to
357 // the call's *argument subtree*, NOT to the enclosing expression.
358 // Compute the args in an ISOLATED sub-flow and drop their taint
359 // (kinds + cleansers) — it is consumed by the sanitizer — so the
360 // call contributes nothing injectable to the parent. Only taint
361 // that flows AROUND the call (e.g. a concatenated sibling) reaches
362 // the parent and can still alarm.
363 //
364 // The old code pushed `DbmsAssert` onto the *shared* parent flow
365 // and recursed the args into it, so a cleanse on one operand zeroed
366 // the alarm for an unrelated sibling — e.g.
367 // `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user` came out
368 // {UserInput, cleansed:DbmsAssert} → flags_alarm=false (fail-open).
369 let mut sanitized = ValueFlow::default();
370 for a in args {
371 collect_expr_flow(a, sources, env, &mut sanitized);
372 }
373 // The sanitizer CONSUMES its argument's live taint: record the
374 // cleanser (for reporting) and DROP the kinds — they are no longer
375 // injectable. `kinds` holds only *live* (uncleansed) taint, so the
376 // dropped kinds simply never enter the enclosing `flow`. Only taint
377 // that flows AROUND the call (a concatenated sibling) reaches it.
378 if !sanitized.taint.kinds.is_empty()
379 && !flow.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert)
380 {
381 flow.taint.cleansed_by.push(TaintCleanser::DbmsAssert);
382 }
383 // Carry forward only non-taint shape info; the result is clean.
384 if flow.string_shape.is_none() {
385 flow.string_shape = sanitized.string_shape;
386 }
387 } else {
388 // A non-sanitizing call is transparent to taint: its arguments'
389 // taint flows through to the enclosing expression.
390 for a in args {
391 collect_expr_flow(a, sources, env, flow);
392 }
393 }
394 }
395 Expr::Binary { lhs, rhs, .. } => {
396 collect_expr_flow(lhs, sources, env, flow);
397 collect_expr_flow(rhs, sources, env, flow);
398 }
399 Expr::Unary { operand, .. } => collect_expr_flow(operand, sources, env, flow),
400 Expr::Raw { .. } => {
401 // The recognizer could not lower this sub-expression (an
402 // unrecognized shape like a SQL `CASE` expression, an
403 // unbalanced/unterminated fragment, or a depth-limit-collapsed
404 // concat tail). Any user-tainted operand inside it is invisible to
405 // this collector, so treating the value as clean would be a silent
406 // taint fail-open (R13). Fail CLOSED: mark the value Unanalyzable so
407 // a downstream dynamic-SQL sink flags it, and force the string shape
408 // opaque so it can never be mistaken for a provably-constant literal.
409 if !flow.taint.kinds.contains(&TaintKind::Unanalyzable) {
410 flow.taint.kinds.push(TaintKind::Unanalyzable);
411 }
412 if flow.string_shape.is_none() {
413 flow.string_shape = Some(StringShape::FullyOpaque);
414 }
415 }
416 _ => {}
417 }
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423 use crate::lower_statement_body;
424
425 fn src(user: &[&str]) -> TaintSources {
426 TaintSources {
427 user_input_names: user.iter().map(|s| s.to_string()).collect(),
428 bind_names: vec![],
429 }
430 }
431
432 #[test]
433 fn assignment_from_constant_has_no_taint() {
434 let s = lower_statement_body("v_x := 42;");
435 let env = analyze_flow(&s, &src(&[]));
436 assert!(!env.get("v_x").unwrap().taint.flags_alarm());
437 }
438
439 #[test]
440 fn assignment_from_user_input_is_tainted() {
441 let s = lower_statement_body("v_sql := p_user_table;");
442 let env = analyze_flow(&s, &src(&["p_user_table"]));
443 let f = env.get("v_sql").unwrap();
444 assert!(f.taint.kinds.contains(&TaintKind::UserInput));
445 assert!(f.taint.flags_alarm());
446 }
447
448 #[test]
449 fn unlowerable_case_expression_rhs_fails_closed_as_unanalyzable() {
450 // oracle-qo1v.2: a SQL CASE expression on an assignment RHS is not a
451 // recognized Expr shape, so it lowers to Expr::Raw and the user-tainted
452 // operand (p_user) inside it is invisible to the taint collector. The
453 // old catch-all dropped it silently (taint fail-open). The collector now
454 // fails CLOSED: the value is marked Unanalyzable (raises the alarm so a
455 // downstream EXECUTE IMMEDIATE is flagged) and forced to an opaque string
456 // shape so it can never be read as a provably-constant literal.
457 let s = lower_statement_body("v_sql := CASE WHEN cond THEN p_user ELSE 'x' END;");
458 let env = analyze_flow(&s, &src(&["p_user"]));
459 let f = env.get("v_sql").expect("v_sql flow recorded");
460 assert!(
461 f.taint.kinds.contains(&TaintKind::Unanalyzable),
462 "un-lowerable CASE RHS must be marked Unanalyzable: {:?}",
463 f.taint
464 );
465 assert!(f.taint.flags_alarm(), "fail closed: must raise the alarm");
466 assert!(
467 matches!(f.string_shape, Some(StringShape::FullyOpaque)),
468 "un-lowerable value must not be mistaken for a constant literal: {:?}",
469 f.string_shape
470 );
471 }
472
473 #[test]
474 fn dbms_assert_call_cleanses_its_argument() {
475 // DBMS_ASSERT.* sanitizes its argument: the result is a clean value with no
476 // alarm. The arg's taint is consumed by the sanitizer, so the result no
477 // longer carries the UserInput kind (we dropped the old "tainted-but-
478 // cleansed" representation, which let an unrelated cleanser mask a
479 // concatenated sibling — see the fail-open regression below).
480 let s = lower_statement_body("v_safe := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user_table);");
481 let env = analyze_flow(&s, &src(&["p_user_table"]));
482 let f = env.get("v_safe").unwrap();
483 assert!(!f.taint.flags_alarm(), "sanitized value must not alarm");
484 assert!(
485 !f.taint.kinds.contains(&TaintKind::UserInput),
486 "the sanitizer consumes the argument's taint"
487 );
488 }
489
490 #[test]
491 fn dbms_assert_does_not_cleanse_a_concatenated_sibling() {
492 // SEC001 fail-open regression: a DBMS_ASSERT cleanse on ONE operand must
493 // NOT zero the injection alarm for tainted input concatenated ALONGSIDE it.
494 // `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user` interpolates raw p_user.
495 let s = lower_statement_body("v_sql := DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user;");
496 let env = analyze_flow(&s, &src(&["p_user"]));
497 let f = env.get("v_sql").unwrap();
498 assert!(
499 f.taint.kinds.contains(&TaintKind::UserInput),
500 "the uncleansed sibling p_user must remain tainted"
501 );
502 assert!(
503 f.taint.cleansed_by.is_empty(),
504 "the sibling assert's cleanser must not leak onto the whole expression"
505 );
506 assert!(
507 f.taint.flags_alarm(),
508 "raw user input concatenated with a sanitized literal must still alarm"
509 );
510 }
511
512 #[test]
513 fn taint_flows_through_concatenation() {
514 let s = lower_statement_body("v_sql := 'SELECT * FROM ' || p_tab;");
515 let env = analyze_flow(&s, &src(&["p_tab"]));
516 assert!(
517 env.get("v_sql")
518 .unwrap()
519 .taint
520 .kinds
521 .contains(&TaintKind::UserInput)
522 );
523 }
524
525 #[test]
526 fn bind_ref_is_bind_taint() {
527 let s = lower_statement_body("v_x := :1;");
528 let env = analyze_flow(&s, &src(&[]));
529 assert!(
530 env.get("v_x")
531 .unwrap()
532 .taint
533 .kinds
534 .contains(&TaintKind::BindVariable)
535 );
536 }
537
538 #[test]
539 fn string_literal_assignment_records_shape() {
540 let s = lower_statement_body("v_msg := 'hello';");
541 let env = analyze_flow(&s, &src(&[]));
542 let literal = env.get("v_msg").and_then(|flow| match &flow.string_shape {
543 Some(StringShape::Literal { value }) => Some(value.as_str()),
544 _ => None,
545 });
546 assert_eq!(literal, Some("hello"));
547 }
548
549 #[test]
550 fn if_branch_assignments_both_recorded() {
551 let s = lower_statement_body("IF flag THEN v_x := p_a; ELSE v_x := 0; END IF;");
552 let env = analyze_flow(&s, &src(&["p_a"]));
553 // May-analysis: v_x carries the union of both branches'
554 // flow, so the tainted branch taints it.
555 assert!(
556 env.get("v_x")
557 .unwrap()
558 .taint
559 .kinds
560 .contains(&TaintKind::UserInput)
561 );
562 }
563
564 #[test]
565 fn loop_body_assignment_recorded() {
566 let s = lower_statement_body("FOR i IN 1..10 LOOP v_acc := v_acc + p_in; END LOOP;");
567 let env = analyze_flow(&s, &src(&["p_in"]));
568 assert!(
569 env.get("v_acc")
570 .unwrap()
571 .taint
572 .kinds
573 .contains(&TaintKind::UserInput)
574 );
575 }
576
577 #[test]
578 fn untainted_name_not_flagged() {
579 let s = lower_statement_body("v_x := v_y + 1;");
580 let env = analyze_flow(&s, &src(&["p_user"]));
581 assert!(!env.get("v_x").unwrap().taint.flags_alarm());
582 }
583
584 #[test]
585 fn branch_merge_sibling_cleanse_does_not_mask_live_kind() {
586 // Regression for oracle-qm3q.26 (cleanser-union fail-open across a
587 // branch join). One arm sanitises `v` with DBMS_ASSERT; the OTHER arm
588 // assigns raw `p_user`. `merge_into` unions the cleanser from the THEN
589 // arm with the live UserInput kind from the ELSE arm — but because
590 // `kinds` tracks only LIVE (uncleansed) taint and `flags_alarm` no
591 // longer depends on `cleansed_by`, the uncleansed ELSE path still
592 // alarms. (Under the old "tainted-but-cleansed" model the recorded
593 // DbmsAssert cleanser would have masked the live ELSE-path kind — a
594 // SEC001 fail-open.)
595 let s = lower_statement_body(
596 "IF c THEN v := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); ELSE v := p_user; END IF;",
597 );
598 let env = analyze_flow(&s, &src(&["p_user"]));
599 let f = env.get("v").unwrap();
600 assert!(
601 f.taint.kinds.contains(&TaintKind::UserInput),
602 "the uncleansed ELSE-path UserInput kind must survive the branch join"
603 );
604 assert!(
605 f.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
606 "the THEN-path cleanser is still recorded for reporting"
607 );
608 assert!(
609 f.taint.flags_alarm(),
610 "a sibling cleanse on one branch must NOT mask the live kind on the other"
611 );
612 }
613
614 #[test]
615 fn case_insensitive_source_match() {
616 let s = lower_statement_body("v_x := P_USER;");
617 let env = analyze_flow(&s, &src(&["p_user"]));
618 assert!(
619 env.get("V_X")
620 .unwrap()
621 .taint
622 .kinds
623 .contains(&TaintKind::UserInput)
624 );
625 }
626
627 #[test]
628 fn empty_body_yields_empty_env() {
629 let env = analyze_flow(&[], &src(&[]));
630 assert!(env.is_empty());
631 }
632
633 #[test]
634 fn two_hop_local_laundering_propagates_taint() {
635 // Regression for oracle-qm3q.20 (transitive intra-procedural taint).
636 // `v_tmp` launders `p_user`; `v_sql := v_tmp` must inherit the taint so
637 // an EXECUTE IMMEDIATE built from v_sql is still flagged. Before the
638 // use-def fix, expr_flow only consulted the static `sources` set and
639 // never the live env, so v_sql came out clean (a SEC001 false negative).
640 let s = lower_statement_body("v_tmp := p_user; v_sql := v_tmp;");
641 let env = analyze_flow(&s, &src(&["p_user"]));
642 assert!(
643 env.get("v_tmp")
644 .unwrap()
645 .taint
646 .kinds
647 .contains(&TaintKind::UserInput),
648 "the first hop is tainted from the source"
649 );
650 let sql = env.get("v_sql").unwrap();
651 assert!(
652 sql.taint.kinds.contains(&TaintKind::UserInput),
653 "taint laundered through v_tmp must reach v_sql"
654 );
655 assert!(sql.taint.flags_alarm(), "the laundered value still alarms");
656 }
657
658 #[test]
659 fn n_hop_local_laundering_propagates_taint() {
660 // Deeper chain: p_user -> a -> b -> c. Each hop must carry the taint
661 // forward through the live env.
662 let s = lower_statement_body("v_a := p_user; v_b := v_a; v_c := v_b;");
663 let env = analyze_flow(&s, &src(&["p_user"]));
664 for name in ["v_a", "v_b", "v_c"] {
665 assert!(
666 env.get(name)
667 .unwrap()
668 .taint
669 .kinds
670 .contains(&TaintKind::UserInput),
671 "{name} must be tainted along the laundering chain"
672 );
673 }
674 }
675
676 #[test]
677 fn cleansed_local_then_reused_stays_clean() {
678 // The dual of laundering: once a local is sanitised by DBMS_ASSERT,
679 // reusing it must NOT resurrect a live UserInput kind. The transitive
680 // env-consult inherits cleansed_by (for reporting) but no live kind,
681 // because the sanitiser already drained the kinds it consumed.
682 let s =
683 lower_statement_body("v_tmp := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); v_sql := v_tmp;");
684 let env = analyze_flow(&s, &src(&["p_user"]));
685 let sql = env.get("v_sql").unwrap();
686 assert!(
687 !sql.taint.kinds.contains(&TaintKind::UserInput),
688 "a reused sanitised local carries no live taint"
689 );
690 assert!(
691 !sql.taint.flags_alarm(),
692 "reusing a sanitised value must not alarm"
693 );
694 assert!(
695 sql.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
696 "the cleanser is carried forward for reporting"
697 );
698 }
699
700 #[test]
701 fn taint_laundered_through_local_into_concatenation_alarms() {
702 // Combine transitivity with the sibling-cleanse guard: stage raw user
703 // input in a local, then concatenate it into a dynamic-SQL string.
704 let s = lower_statement_body("v_t := p_user; v_sql := 'SELECT * FROM ' || v_t;");
705 let env = analyze_flow(&s, &src(&["p_user"]));
706 let sql = env.get("v_sql").unwrap();
707 assert!(
708 sql.taint.kinds.contains(&TaintKind::UserInput),
709 "laundered taint concatenated into SQL must remain tainted"
710 );
711 assert!(sql.taint.flags_alarm());
712 }
713
714 // oracle-rwjl.3: a verb-prefixed local (`return_val`) used to be swallowed
715 // by classify() (→ Statement::Return), dropping the assignment from
716 // flow_intra::walk so taint laundered through it never reached the sink.
717 // Now it is a real Assignment, so v_sql inherits p_user's taint.
718 #[test]
719 fn verb_prefixed_local_laundering_propagates_taint() {
720 let s = lower_statement_body("return_val := p_user; v_sql := return_val;");
721 let env = analyze_flow(&s, &src(&["p_user"]));
722 let rv = env
723 .get("return_val")
724 .expect("the verb-prefixed local must be recorded as an assignment");
725 assert!(
726 rv.taint.kinds.contains(&TaintKind::UserInput),
727 "return_val must inherit p_user's taint"
728 );
729 let sql = env.get("v_sql").unwrap();
730 assert!(
731 sql.taint.kinds.contains(&TaintKind::UserInput),
732 "taint laundered through the verb-prefixed local must reach v_sql"
733 );
734 assert!(sql.taint.flags_alarm());
735 }
736
737 // oracle-rwjl.4: DBMS_ASSERT.NOOP is Oracle's documented identity
738 // pass-through — it performs NO validation, so it must NOT cleanse. Raw
739 // user input wrapped in NOOP and concatenated into dynamic SQL must still
740 // alarm (the old uniform `starts_with("DBMS_ASSERT.")` reported it clean —
741 // a SEC001 fail-open).
742 #[test]
743 fn dbms_assert_noop_is_not_a_sanitizer() {
744 let s = lower_statement_body("v_sql := 'SELECT * FROM ' || DBMS_ASSERT.NOOP(p_user);");
745 let env = analyze_flow(&s, &src(&["p_user"]));
746 let f = env.get("v_sql").unwrap();
747 assert!(
748 f.taint.kinds.contains(&TaintKind::UserInput),
749 "NOOP performs no validation; its argument's taint must survive"
750 );
751 assert!(
752 f.taint.flags_alarm(),
753 "user input wrapped in DBMS_ASSERT.NOOP must still alarm"
754 );
755 }
756
757 // oracle-rwjl.4 (direct, not just concatenated): a bare NOOP wrap is also
758 // transparent.
759 #[test]
760 fn dbms_assert_noop_direct_assignment_stays_tainted() {
761 let s = lower_statement_body("v_sql := DBMS_ASSERT.NOOP(p_user);");
762 let env = analyze_flow(&s, &src(&["p_user"]));
763 let f = env.get("v_sql").unwrap();
764 assert!(
765 f.taint.kinds.contains(&TaintKind::UserInput),
766 "NOOP does not consume taint"
767 );
768 assert!(f.taint.flags_alarm());
769 }
770
771 // oracle-rwjl.4: a REAL validating sanitizer with a SYS schema prefix must
772 // still be recognised as a cleanser (the old `starts_with` missed the
773 // prefix and over-reported a genuinely safe value).
774 #[test]
775 fn sys_prefixed_dbms_assert_sanitizer_cleanses() {
776 let s = lower_statement_body("v_safe := SYS.DBMS_ASSERT.SIMPLE_SQL_NAME(p_tab);");
777 let env = analyze_flow(&s, &src(&["p_tab"]));
778 let f = env.get("v_safe").unwrap();
779 assert!(
780 !f.taint.flags_alarm(),
781 "a schema-prefixed real sanitizer must still cleanse"
782 );
783 assert!(
784 !f.taint.kinds.contains(&TaintKind::UserInput),
785 "the sanitizer consumes the argument's taint"
786 );
787 }
788
789 // oracle-lokg.2: the exact crash shape from the bundled public
790 // fixture. A `SELECT … FOR UPDATE;` body fragment leaves the bare
791 // token `FOR UPDATE`; the text-scanner's `classify_loop` treats
792 // `FOR …` as a FOR-loop, finds no word-bounded `IN` and no
793 // `END LOOP`, and falls back to a `BareLoop` whose `body_text` is
794 // *the same string* `FOR UPDATE`. Re-lowering it yields the
795 // identical non-shrinking `BareLoop` → before the depth guard
796 // `walk` recursed unbounded and aborted the whole `analyze_flow`
797 // (SIGABRT / "stack overflow"; MAX_PASSES=64 bounds only the OUTER
798 // fixpoint, not the per-pass recursion). It must now terminate and
799 // report the truncation honestly (R13).
800 #[test]
801 fn non_shrinking_for_update_does_not_stack_overflow_and_reports_limit() {
802 let stmts = vec![Statement::BareLoop {
803 body_text: "FOR UPDATE".to_string(),
804 }];
805 let (env, outcome) = analyze_flow_bounded(&stmts, &src(&[]));
806 assert!(
807 outcome.limit_hit,
808 "the non-shrinking `FOR UPDATE` BareLoop must trip the \
809 bounded depth cap, outcome={outcome:?}"
810 );
811 assert!(outcome.truncated_bodies >= 1);
812 // No assignment can be recovered from the malformed fragment.
813 assert!(env.is_empty());
814 // The back-compat wrapper must also simply terminate
815 // (no panic / abort) rather than recurse unbounded.
816 let _ = analyze_flow(&stmts, &src(&[]));
817 }
818
819 // oracle-lokg.2: the same shape arrived at via the lowering path
820 // (not a hand-built `Statement`), proving the end-to-end public API
821 // `analyze_flow(&lower_statement_body("FOR UPDATE"), …)` terminates.
822 #[test]
823 fn analyze_flow_over_lowered_for_update_terminates() {
824 let stmts = lower_statement_body("FOR UPDATE");
825 let env = analyze_flow(&stmts, &TaintSources::default());
826 // We do not assert the env contents — only that the call
827 // returned at all (before the guard this aborted the process).
828 let _ = env.is_empty();
829 }
830
831 // oracle-lokg.2: a genuinely deep linear nesting chain must
832 // terminate at the depth cap with a clean typed truncation outcome
833 // instead of overflowing the stack. Each level is a `BareLoop`
834 // wrapping the next, so the re-lowered slice shrinks one level per
835 // recursion — but without the cap a sufficiently deep chain would
836 // overflow the native stack. DEPTH is set well above
837 // `MAX_RELOWER_DEPTH` (128) so the cap is guaranteed to fire while
838 // keeping the per-level re-lowering scan cheap; the same guard
839 // bounds the recursion to 128 frames no matter how deep the input.
840 #[test]
841 fn deep_nested_loop_chain_degrades_to_limit_not_overflow() {
842 const DEPTH: usize = 1_000;
843 // Compile-time invariant: DEPTH must exceed the cap so the
844 // truncation is guaranteed to fire.
845 const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
846 // Build the chain with a single linear pass (no quadratic
847 // string re-allocation): DEPTH `LOOP ` openers, the innermost
848 // assignment, then DEPTH ` END LOOP;` closers.
849 let mut body = String::with_capacity(DEPTH * 16 + 32);
850 for _ in 0..DEPTH {
851 body.push_str("LOOP ");
852 }
853 body.push_str("v_x := p_user; ");
854 for _ in 0..DEPTH {
855 body.push_str("END LOOP; ");
856 }
857 let stmts = lower_statement_body(&body);
858 let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
859 assert!(
860 outcome.limit_hit,
861 "a {DEPTH}-deep nested LOOP chain must trip the depth cap, \
862 outcome={outcome:?}"
863 );
864 }
865
866 // oracle-hrzg.2: taint laundered through an anonymous BEGIN…END
867 // sub-block must still reach the assigned name. Before the
868 // NestedBlock arm in `walk`, the `_ => {}` catch-all dropped the
869 // sub-block entirely, so `v_sql` came back UNtainted (FLOW-001
870 // fail-open → SEC001 misses the injection once wired).
871 #[test]
872 fn nested_begin_block_launders_taint_into_assignment() {
873 let s = lower_statement_body("BEGIN v_sql := p_user; END;");
874 let env = analyze_flow(&s, &src(&["p_user"]));
875 let f = env
876 .get("v_sql")
877 .expect("the nested-block assignment to v_sql must be recorded");
878 assert!(
879 f.taint.kinds.contains(&TaintKind::UserInput),
880 "taint laundered through a BEGIN…END sub-block must reach v_sql"
881 );
882 assert!(f.taint.flags_alarm(), "the laundered value still alarms");
883 }
884
885 // oracle-hrzg.2: the same, via a DECLARE…END wrapper (the other
886 // anonymous-block shape the classifier emits as NestedBlock).
887 #[test]
888 fn nested_declare_block_launders_taint_into_assignment() {
889 let s = lower_statement_body("DECLARE v_x NUMBER; BEGIN v_sql := p_user; END;");
890 let env = analyze_flow(&s, &src(&["p_user"]));
891 let f = env
892 .get("v_sql")
893 .expect("the DECLARE-wrapped assignment to v_sql must be recorded");
894 assert!(
895 f.taint.kinds.contains(&TaintKind::UserInput),
896 "taint laundered through a DECLARE…END sub-block must reach v_sql"
897 );
898 assert!(f.taint.flags_alarm());
899 }
900
901 // oracle-hrzg.2: a deeply nested chain of anonymous blocks must
902 // terminate at the MAX_RELOWER_DEPTH cap (honest typed truncation)
903 // rather than overflowing the stack — same posture as the loop-chain
904 // guard. Each level wraps the next in `BEGIN … END;` so the stripped
905 // slice shrinks one level per recursion.
906 #[test]
907 fn deep_nested_block_chain_degrades_to_limit_not_overflow() {
908 const DEPTH: usize = 1_000;
909 const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
910 let mut body = String::with_capacity(DEPTH * 12 + 32);
911 for _ in 0..DEPTH {
912 body.push_str("BEGIN ");
913 }
914 body.push_str("v_x := p_user; ");
915 for _ in 0..DEPTH {
916 body.push_str("END; ");
917 }
918 let stmts = lower_statement_body(&body);
919 let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
920 assert!(
921 outcome.limit_hit,
922 "a {DEPTH}-deep nested BEGIN chain must trip the depth cap, \
923 outcome={outcome:?}"
924 );
925 }
926
927 // oracle-hrzg.5: a parenthesised concatenation operand
928 // `'SELECT … ' || (p_user)` must keep p_user's taint — the paren
929 // group is unwrapped before the `||` split. Before the
930 // `recognise_paren_group` recognizer, `(p_user)` lowered to
931 // `Raw{UnrecognizedShape}`, contributing zero taint, and the byte-
932 // identical un-parenthesised form alarmed while this one did not
933 // (SEC001 fail-open on a no-obfuscation code shape).
934 #[test]
935 fn parenthesised_concat_operand_keeps_taint() {
936 let s = lower_statement_body("v_sql := 'SELECT * FROM ' || (p_user);");
937 let env = analyze_flow(&s, &src(&["p_user"]));
938 let f = env.get("v_sql").unwrap();
939 assert!(
940 f.taint.kinds.contains(&TaintKind::UserInput),
941 "a parenthesised tainted operand must remain tainted"
942 );
943 assert!(f.taint.flags_alarm());
944 }
945
946 // oracle-hrzg.5: a whole-RHS parenthesised group
947 // `('SELECT …' || p_user)` is unwrapped first, then the inner `||`
948 // splits normally so the taint survives.
949 #[test]
950 fn whole_rhs_paren_group_keeps_taint() {
951 let s = lower_statement_body("v_sql := ('SELECT * FROM ' || p_user);");
952 let env = analyze_flow(&s, &src(&["p_user"]));
953 let f = env.get("v_sql").unwrap();
954 assert!(
955 f.taint.kinds.contains(&TaintKind::UserInput),
956 "a whole-RHS parenthesised group must preserve inner taint"
957 );
958 assert!(f.taint.flags_alarm());
959 }
960
961 // oracle-hrzg.5: a bare `(p_user)` group is a Name, so it taints
962 // identically to the un-parenthesised reference.
963 #[test]
964 fn bare_paren_group_is_tainted_name() {
965 let s = lower_statement_body("v_sql := (p_user);");
966 let env = analyze_flow(&s, &src(&["p_user"]));
967 let f = env.get("v_sql").unwrap();
968 assert!(f.taint.kinds.contains(&TaintKind::UserInput));
969 assert!(f.taint.flags_alarm());
970 }
971}