plsql_ir/
flow_intra.rs

1//! Intra-procedural assignment + expression flow.
2//!
3//! Walks a lowered statement body and propagates [`ValueFlow`]
4//! facts (FLOW-001) through assignments and expressions inside a
5//! single routine. The pass is deliberately a *may*-analysis
6//! over a flat statement list: it does not model branch joins
7//! precisely (that needs a CFG, scheduled for a later pass) —
8//! it conservatively merges every assignment's RHS flow into the
9//! LHS via `ValueSet::join`, which is sound for taint /
10//! string-shape over-approximation.
11//!
12//! Taint is *use-def transitive*: an RHS that references a local
13//! already tainted earlier in the body inherits that taint, so
14//! laundering through intermediates (`v_tmp := p_user;
15//! v_sql := v_tmp;`) cannot escape the analysis. The walk is
16//! iterated to a fixpoint over the finite taint lattice so a name
17//! tainted only on a later pass (e.g. across a loop back-edge) is
18//! still captured.
19//!
20//! Outputs a `FlowEnv` mapping each assigned name to its
21//! accumulated `ValueFlow`. SAST consumes this to answer "does
22//! tainted input reach a dynamic-SQL sink without a cleanser?".
23//!
24//! ## /oracle evidence
25//!
26//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
27//!   assignment + parameter-mode chapters define how a value
28//!   enters / moves through a routine.
29//! * `LOW-LEVEL-CATALOGS.md` Supplied Package Buckets —
30//!   `DBMS_ASSERT` is the cleanser that resets a name's taint.
31
32use std::collections::BTreeMap;
33
34use crate::expr::Expr;
35use crate::flow::{StringShape, TaintCleanser, TaintKind, ValueFlow};
36use crate::stmt::Statement;
37
38/// Per-routine flow environment: name (upper-cased) → flow.
39#[derive(Clone, Debug, Default, PartialEq, Eq)]
40pub struct FlowEnv {
41    map: BTreeMap<String, ValueFlow>,
42}
43
44impl FlowEnv {
45    #[must_use]
46    pub fn get(&self, name: &str) -> Option<&ValueFlow> {
47        self.map.get(&name.to_ascii_uppercase())
48    }
49
50    /// Iterate every name (upper-cased) the environment tracks.
51    /// Used by the FLOW-005 query facade to enumerate tainted
52    /// names without exposing the inner map.
53    pub fn iter_names(&self) -> impl Iterator<Item = String> + '_ {
54        self.map.keys().cloned()
55    }
56
57    /// Iterate every tracked name with its aggregate flow state.
58    /// Fact projection uses this to materialize the flow lattice into
59    /// normalized [`FactStore`](crate::FactStore) rows without exposing
60    /// mutation of the environment.
61    pub fn iter(&self) -> impl Iterator<Item = (&str, &ValueFlow)> + '_ {
62        self.map.iter().map(|(name, flow)| (name.as_str(), flow))
63    }
64
65    #[must_use]
66    pub fn len(&self) -> usize {
67        self.map.len()
68    }
69
70    #[must_use]
71    pub fn is_empty(&self) -> bool {
72        self.map.is_empty()
73    }
74
75    fn merge_into(&mut self, name: &str, flow: ValueFlow) {
76        let key = name.to_ascii_uppercase();
77        let entry = self.map.entry(key).or_default();
78        // Taint kinds accumulate (union) across the branch arms a may-analysis
79        // folds into one env. `cleansed_by` also accumulates, but ONLY for
80        // reporting: the alarm reads `kinds` (live, uncleansed taint), so a
81        // cleanser recorded on one arm cannot mask a live kind contributed by a
82        // sibling arm. (Under the former "tainted-but-cleansed" model this union
83        // was a fail-open at branch joins — oracle-qm3q.26; the live-kinds model
84        // from oracle-qm3q.1 makes the join sound without needing CFG-precise
85        // path-intersection of cleansers.)
86        for k in flow.taint.kinds {
87            if !entry.taint.kinds.contains(&k) {
88                entry.taint.kinds.push(k);
89            }
90        }
91        for c in flow.taint.cleansed_by {
92            if !entry.taint.cleansed_by.contains(&c) {
93                entry.taint.cleansed_by.push(c);
94            }
95        }
96        // Value set joins (lattice over-approx).
97        let prev = std::mem::take(&mut entry.value_set);
98        entry.value_set = prev.join(flow.value_set);
99        // Constant: if both sides agree keep it, else drop to None.
100        if entry.constant != flow.constant {
101            entry.constant = None;
102        }
103        // String shape: keep the more-specific one only if equal.
104        if entry.string_shape != flow.string_shape {
105            entry.string_shape = flow.string_shape.or(entry.string_shape.take());
106        }
107    }
108}
109
110/// Names referenced inside an expression that look like
111/// parameters/binds the caller flagged as tainted. The caller
112/// passes the set of tainted source names (e.g. public IN
113/// parameters); any reference to one taints the expression's
114/// flow with `UserInput`.
115#[derive(Clone, Debug, Default)]
116pub struct TaintSources {
117    pub user_input_names: Vec<String>,
118    pub bind_names: Vec<String>,
119}
120
121/// Run intra-procedural flow over `stmts`. `sources` declares
122/// which bare names are tainted on entry (public params, binds).
123///
124/// Taint propagates transitively through assignments: an RHS that
125/// references a previously-tainted *local* (`v_sql := v_tmp` after
126/// `v_tmp := p_user`) inherits that local's live taint, so
127/// multi-hop laundering through intermediate variables cannot
128/// escape the analysis. Because branches and loops can re-read a
129/// name that is only tainted on a later pass, `walk` is iterated to
130/// a fixpoint over the (finite) taint lattice before the env is
131/// returned.
132///
133/// Back-compat wrapper over [`analyze_flow_bounded`]: the per-pass
134/// re-lowering recursion is depth-guarded so a non-shrinking
135/// malformed body (e.g. the bare token `FOR UPDATE` that a
136/// `SELECT … FOR UPDATE;` fragment leaves behind, which classifies
137/// as a `BareLoop` whose `body_text` re-lowers to the *identical*
138/// `BareLoop`) terminates instead of overflowing the stack /
139/// aborting the process (R13). Callers that need to surface the
140/// typed degradation (`outcome.limit_hit`) should call
141/// [`analyze_flow_bounded`] directly.
142#[must_use]
143pub fn analyze_flow(stmts: &[Statement], sources: &TaintSources) -> FlowEnv {
144    analyze_flow_bounded(stmts, sources).0
145}
146
147/// Depth-bounded variant of [`analyze_flow`]. Returns the flow
148/// environment plus a [`RecursionOutcome`] recording whether (and
149/// how often) a nested re-lowered body was abandoned at the
150/// recursion-depth cap rather than walked unbounded. The caller is
151/// responsible for emitting an honest typed diagnostic when
152/// `outcome.limit_hit` (R13 — never silently truncate, never
153/// stack-overflow on a non-shrinking malformed slice).
154#[must_use]
155pub fn analyze_flow_bounded(
156    stmts: &[Statement],
157    sources: &TaintSources,
158) -> (FlowEnv, crate::RecursionOutcome) {
159    let mut env = FlowEnv::default();
160    let mut outcome = crate::RecursionOutcome::default();
161    // Iterate to a fixpoint: `merge_into` is monotone (it only ever
162    // unions kinds/cleansers and joins value-sets upward), so the
163    // finite lattice guarantees the env stops growing. The cap is a
164    // belt-and-suspenders bound (never expected to bind) so a
165    // pathological body can never spin forever.
166    const MAX_PASSES: usize = 64;
167    for _ in 0..MAX_PASSES {
168        let before = env.clone();
169        // Re-accumulate the truncation outcome each pass over a
170        // *fresh* outcome so the count reflects one pass, not the
171        // sum across passes; the env still folds monotonically.
172        let mut pass_outcome = crate::RecursionOutcome::default();
173        walk(stmts, sources, &mut env, 0, &mut pass_outcome);
174        outcome.absorb(pass_outcome);
175        if env == before {
176            break;
177        }
178    }
179    (env, outcome)
180}
181
182fn walk(
183    stmts: &[Statement],
184    sources: &TaintSources,
185    env: &mut FlowEnv,
186    depth: usize,
187    outcome: &mut crate::RecursionOutcome,
188) {
189    // Recurse into a re-lowered control-flow body only while we
190    // have depth budget left. At the cap we record the truncation
191    // and stop descending — never silently drop, never recurse
192    // unbounded (which stack-overflows on a non-shrinking malformed
193    // slice such as the bare `FOR UPDATE` token). Mirrors
194    // `calls.rs::walk_call_sites` / `dml_edges.rs::walk_table_accesses`.
195    macro_rules! recurse_body {
196        ($text:expr) => {{
197            if depth + 1 >= crate::MAX_RELOWER_DEPTH {
198                outcome.note_truncated();
199            } else {
200                let lowered = crate::lower_statement_body($text);
201                walk(&lowered, sources, env, depth + 1, outcome);
202            }
203        }};
204    }
205    for s in stmts {
206        match s {
207            Statement::Assignment { target, rhs_text } => {
208                let rhs_expr = crate::expr::lower_expression(rhs_text);
209                // Read the live env (use-def aware) so taint already
210                // accumulated on a referenced local flows into the RHS.
211                let flow = expr_flow(&rhs_expr, sources, env);
212                env.merge_into(target, flow);
213            }
214            Statement::If {
215                arms,
216                else_body_text,
217            } => {
218                for arm in arms {
219                    recurse_body!(&arm.body_text);
220                }
221                if let Some(eb) = else_body_text {
222                    recurse_body!(eb);
223                }
224            }
225            Statement::ForLoop { body_text, .. }
226            | Statement::WhileLoop { body_text, .. }
227            | Statement::BareLoop { body_text } => {
228                recurse_body!(body_text);
229            }
230            Statement::NestedBlock { body_text } => {
231                // Anonymous `BEGIN … END` / `DECLARE … END` sub-block: a
232                // value laundered through it (`BEGIN v_sql := p_user; END;`)
233                // must still taint `v_sql`, or the FLOW-001 pass fails open
234                // for that name and SEC001 misses the injection. Strip the
235                // wrapper and re-lower the inner statements, mirroring
236                // `calls.rs::walk_call_sites` / `dml_edges.rs`. Only recurse
237                // when the stripped slice differs from the original so the
238                // depth-guarded `recurse_body!` cannot spin on a non-stripping
239                // slice (the cap already bounds a non-shrinking one). A block
240                // with no strippable wrapper carries no recoverable
241                // assignment, so it is left untouched.
242                let inner = crate::calls::strip_block_wrapper(body_text);
243                if inner != body_text.as_str() {
244                    recurse_body!(inner);
245                }
246            }
247            _ => {}
248        }
249    }
250}
251
252/// Compute the `ValueFlow` of an expression. Taint flows from any
253/// referenced source name OR any previously-tainted local recorded
254/// in `env` (use-def transitivity); a `DBMS_ASSERT.*` call cleanses.
255fn expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv) -> ValueFlow {
256    let mut flow = ValueFlow::default();
257    collect_expr_flow(expr, sources, env, &mut flow);
258    flow
259}
260
261/// Is `path` (an already-upper-cased dotted call path) a *validating*
262/// `DBMS_ASSERT` entry point — i.e. one that actually rejects unsafe input
263/// and so cleanses the taint of its argument?
264///
265/// Two prior gaps, both fixed here (oracle-rwjl.4):
266///
267/// 1. **`DBMS_ASSERT.NOOP` is NOT a sanitizer.** Oracle documents NOOP as an
268///    identity pass-through that performs no validation and returns its
269///    argument unchanged. The old `path.starts_with("DBMS_ASSERT.")` guard
270///    matched it uniformly, so `EXECUTE IMMEDIATE DBMS_ASSERT.NOOP(p_user)`
271///    was reported clean — a SQL-injection fail-open in the flagship SEC001
272///    rule. NOOP (and any unrecognized DBMS_ASSERT entry point) must fall
273///    through to the transparent branch so its argument's taint reaches the
274///    sink and still alarms.
275/// 2. **A schema prefix made a real sanitizer transparent.** `starts_with`
276///    failed to match `SYS.DBMS_ASSERT.SIMPLE_SQL_NAME(...)`, so a genuinely
277///    cleansed value over-reported. We now tolerate an optional leading
278///    schema segment.
279///
280/// The allowlist mirrors the validating set enumerated in
281/// `plsql-symbols/src/dynamic_sql.rs` (which lists NOOP separately, only for
282/// textual detection — never as a validator).
283fn is_dbms_assert_sanitizer(path: &str) -> bool {
284    const VALIDATORS: &[&str] = &[
285        "SIMPLE_SQL_NAME",
286        "QUALIFIED_SQL_NAME",
287        "SCHEMA_NAME",
288        "ENQUOTE_NAME",
289        "SQL_OBJECT_NAME",
290        "ENQUOTE_LITERAL",
291    ];
292    let segs: Vec<&str> = path.split('.').collect();
293    // Match `[schema.]DBMS_ASSERT.<fn>`: the trailing two segments must be
294    // `DBMS_ASSERT` then a validating function. NOOP (or any unknown entry
295    // point) deliberately fails this test and falls through to transparent.
296    match segs.as_slice() {
297        [.., "DBMS_ASSERT", func] => VALIDATORS.contains(func),
298        _ => false,
299    }
300}
301
302fn collect_expr_flow(expr: &Expr, sources: &TaintSources, env: &FlowEnv, flow: &mut ValueFlow) {
303    match expr {
304        Expr::Name(n) => {
305            let head = n.parts.first().map(String::as_str).unwrap_or_default();
306            if sources
307                .user_input_names
308                .iter()
309                .any(|s| s.eq_ignore_ascii_case(head))
310                && !flow.taint.kinds.contains(&TaintKind::UserInput)
311            {
312                flow.taint.kinds.push(TaintKind::UserInput);
313            }
314            if sources
315                .bind_names
316                .iter()
317                .any(|s| s.eq_ignore_ascii_case(head))
318                && !flow.taint.kinds.contains(&TaintKind::BindVariable)
319            {
320                flow.taint.kinds.push(TaintKind::BindVariable);
321            }
322            // Use-def transitivity: a reference to a previously-assigned
323            // local inherits that local's accumulated flow, so taint
324            // laundered through an intermediate variable
325            // (`v_tmp := p_user; v_sql := v_tmp;`) still reaches the sink.
326            // Only LIVE kinds carry the alarm; `cleansed_by` is unioned for
327            // reporting (a recorded cleanser never masks a live kind — see
328            // `flags_alarm`). String shape is preserved only when the parent
329            // has none yet.
330            if let Some(prev) = env.get(head) {
331                for k in &prev.taint.kinds {
332                    if !flow.taint.kinds.contains(k) {
333                        flow.taint.kinds.push(*k);
334                    }
335                }
336                for c in &prev.taint.cleansed_by {
337                    if !flow.taint.cleansed_by.contains(c) {
338                        flow.taint.cleansed_by.push(*c);
339                    }
340                }
341                if flow.string_shape.is_none() {
342                    flow.string_shape = prev.string_shape.clone();
343                }
344            }
345        }
346        Expr::BindRef(_) if !flow.taint.kinds.contains(&TaintKind::BindVariable) => {
347            flow.taint.kinds.push(TaintKind::BindVariable);
348        }
349        Expr::StringLit(s) if flow.string_shape.is_none() => {
350            flow.string_shape = Some(StringShape::Literal { value: s.clone() });
351        }
352        Expr::Call { callee, args } => {
353            let path = callee.parts.join(".").to_ascii_uppercase();
354            if is_dbms_assert_sanitizer(&path) {
355                // A `DBMS_ASSERT.*` call SANITIZES its argument: the value it
356                // returns is safe to interpolate. The cleansing therefore binds to
357                // the call's *argument subtree*, NOT to the enclosing expression.
358                // Compute the args in an ISOLATED sub-flow and drop their taint
359                // (kinds + cleansers) — it is consumed by the sanitizer — so the
360                // call contributes nothing injectable to the parent. Only taint
361                // that flows AROUND the call (e.g. a concatenated sibling) reaches
362                // the parent and can still alarm.
363                //
364                // The old code pushed `DbmsAssert` onto the *shared* parent flow
365                // and recursed the args into it, so a cleanse on one operand zeroed
366                // the alarm for an unrelated sibling — e.g.
367                // `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user` came out
368                // {UserInput, cleansed:DbmsAssert} → flags_alarm=false (fail-open).
369                let mut sanitized = ValueFlow::default();
370                for a in args {
371                    collect_expr_flow(a, sources, env, &mut sanitized);
372                }
373                // The sanitizer CONSUMES its argument's live taint: record the
374                // cleanser (for reporting) and DROP the kinds — they are no longer
375                // injectable. `kinds` holds only *live* (uncleansed) taint, so the
376                // dropped kinds simply never enter the enclosing `flow`. Only taint
377                // that flows AROUND the call (a concatenated sibling) reaches it.
378                if !sanitized.taint.kinds.is_empty()
379                    && !flow.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert)
380                {
381                    flow.taint.cleansed_by.push(TaintCleanser::DbmsAssert);
382                }
383                // Carry forward only non-taint shape info; the result is clean.
384                if flow.string_shape.is_none() {
385                    flow.string_shape = sanitized.string_shape;
386                }
387            } else {
388                // A non-sanitizing call is transparent to taint: its arguments'
389                // taint flows through to the enclosing expression.
390                for a in args {
391                    collect_expr_flow(a, sources, env, flow);
392                }
393            }
394        }
395        Expr::Binary { lhs, rhs, .. } => {
396            collect_expr_flow(lhs, sources, env, flow);
397            collect_expr_flow(rhs, sources, env, flow);
398        }
399        Expr::Unary { operand, .. } => collect_expr_flow(operand, sources, env, flow),
400        Expr::Raw { .. } => {
401            // The recognizer could not lower this sub-expression (an
402            // unrecognized shape like a SQL `CASE` expression, an
403            // unbalanced/unterminated fragment, or a depth-limit-collapsed
404            // concat tail). Any user-tainted operand inside it is invisible to
405            // this collector, so treating the value as clean would be a silent
406            // taint fail-open (R13). Fail CLOSED: mark the value Unanalyzable so
407            // a downstream dynamic-SQL sink flags it, and force the string shape
408            // opaque so it can never be mistaken for a provably-constant literal.
409            if !flow.taint.kinds.contains(&TaintKind::Unanalyzable) {
410                flow.taint.kinds.push(TaintKind::Unanalyzable);
411            }
412            if flow.string_shape.is_none() {
413                flow.string_shape = Some(StringShape::FullyOpaque);
414            }
415        }
416        _ => {}
417    }
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423    use crate::lower_statement_body;
424
425    fn src(user: &[&str]) -> TaintSources {
426        TaintSources {
427            user_input_names: user.iter().map(|s| s.to_string()).collect(),
428            bind_names: vec![],
429        }
430    }
431
432    #[test]
433    fn assignment_from_constant_has_no_taint() {
434        let s = lower_statement_body("v_x := 42;");
435        let env = analyze_flow(&s, &src(&[]));
436        assert!(!env.get("v_x").unwrap().taint.flags_alarm());
437    }
438
439    #[test]
440    fn assignment_from_user_input_is_tainted() {
441        let s = lower_statement_body("v_sql := p_user_table;");
442        let env = analyze_flow(&s, &src(&["p_user_table"]));
443        let f = env.get("v_sql").unwrap();
444        assert!(f.taint.kinds.contains(&TaintKind::UserInput));
445        assert!(f.taint.flags_alarm());
446    }
447
448    #[test]
449    fn unlowerable_case_expression_rhs_fails_closed_as_unanalyzable() {
450        // oracle-qo1v.2: a SQL CASE expression on an assignment RHS is not a
451        // recognized Expr shape, so it lowers to Expr::Raw and the user-tainted
452        // operand (p_user) inside it is invisible to the taint collector. The
453        // old catch-all dropped it silently (taint fail-open). The collector now
454        // fails CLOSED: the value is marked Unanalyzable (raises the alarm so a
455        // downstream EXECUTE IMMEDIATE is flagged) and forced to an opaque string
456        // shape so it can never be read as a provably-constant literal.
457        let s = lower_statement_body("v_sql := CASE WHEN cond THEN p_user ELSE 'x' END;");
458        let env = analyze_flow(&s, &src(&["p_user"]));
459        let f = env.get("v_sql").expect("v_sql flow recorded");
460        assert!(
461            f.taint.kinds.contains(&TaintKind::Unanalyzable),
462            "un-lowerable CASE RHS must be marked Unanalyzable: {:?}",
463            f.taint
464        );
465        assert!(f.taint.flags_alarm(), "fail closed: must raise the alarm");
466        assert!(
467            matches!(f.string_shape, Some(StringShape::FullyOpaque)),
468            "un-lowerable value must not be mistaken for a constant literal: {:?}",
469            f.string_shape
470        );
471    }
472
473    #[test]
474    fn dbms_assert_call_cleanses_its_argument() {
475        // DBMS_ASSERT.* sanitizes its argument: the result is a clean value with no
476        // alarm. The arg's taint is consumed by the sanitizer, so the result no
477        // longer carries the UserInput kind (we dropped the old "tainted-but-
478        // cleansed" representation, which let an unrelated cleanser mask a
479        // concatenated sibling — see the fail-open regression below).
480        let s = lower_statement_body("v_safe := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user_table);");
481        let env = analyze_flow(&s, &src(&["p_user_table"]));
482        let f = env.get("v_safe").unwrap();
483        assert!(!f.taint.flags_alarm(), "sanitized value must not alarm");
484        assert!(
485            !f.taint.kinds.contains(&TaintKind::UserInput),
486            "the sanitizer consumes the argument's taint"
487        );
488    }
489
490    #[test]
491    fn dbms_assert_does_not_cleanse_a_concatenated_sibling() {
492        // SEC001 fail-open regression: a DBMS_ASSERT cleanse on ONE operand must
493        // NOT zero the injection alarm for tainted input concatenated ALONGSIDE it.
494        // `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user` interpolates raw p_user.
495        let s = lower_statement_body("v_sql := DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user;");
496        let env = analyze_flow(&s, &src(&["p_user"]));
497        let f = env.get("v_sql").unwrap();
498        assert!(
499            f.taint.kinds.contains(&TaintKind::UserInput),
500            "the uncleansed sibling p_user must remain tainted"
501        );
502        assert!(
503            f.taint.cleansed_by.is_empty(),
504            "the sibling assert's cleanser must not leak onto the whole expression"
505        );
506        assert!(
507            f.taint.flags_alarm(),
508            "raw user input concatenated with a sanitized literal must still alarm"
509        );
510    }
511
512    #[test]
513    fn taint_flows_through_concatenation() {
514        let s = lower_statement_body("v_sql := 'SELECT * FROM ' || p_tab;");
515        let env = analyze_flow(&s, &src(&["p_tab"]));
516        assert!(
517            env.get("v_sql")
518                .unwrap()
519                .taint
520                .kinds
521                .contains(&TaintKind::UserInput)
522        );
523    }
524
525    #[test]
526    fn bind_ref_is_bind_taint() {
527        let s = lower_statement_body("v_x := :1;");
528        let env = analyze_flow(&s, &src(&[]));
529        assert!(
530            env.get("v_x")
531                .unwrap()
532                .taint
533                .kinds
534                .contains(&TaintKind::BindVariable)
535        );
536    }
537
538    #[test]
539    fn string_literal_assignment_records_shape() {
540        let s = lower_statement_body("v_msg := 'hello';");
541        let env = analyze_flow(&s, &src(&[]));
542        let literal = env.get("v_msg").and_then(|flow| match &flow.string_shape {
543            Some(StringShape::Literal { value }) => Some(value.as_str()),
544            _ => None,
545        });
546        assert_eq!(literal, Some("hello"));
547    }
548
549    #[test]
550    fn if_branch_assignments_both_recorded() {
551        let s = lower_statement_body("IF flag THEN v_x := p_a; ELSE v_x := 0; END IF;");
552        let env = analyze_flow(&s, &src(&["p_a"]));
553        // May-analysis: v_x carries the union of both branches'
554        // flow, so the tainted branch taints it.
555        assert!(
556            env.get("v_x")
557                .unwrap()
558                .taint
559                .kinds
560                .contains(&TaintKind::UserInput)
561        );
562    }
563
564    #[test]
565    fn loop_body_assignment_recorded() {
566        let s = lower_statement_body("FOR i IN 1..10 LOOP v_acc := v_acc + p_in; END LOOP;");
567        let env = analyze_flow(&s, &src(&["p_in"]));
568        assert!(
569            env.get("v_acc")
570                .unwrap()
571                .taint
572                .kinds
573                .contains(&TaintKind::UserInput)
574        );
575    }
576
577    #[test]
578    fn untainted_name_not_flagged() {
579        let s = lower_statement_body("v_x := v_y + 1;");
580        let env = analyze_flow(&s, &src(&["p_user"]));
581        assert!(!env.get("v_x").unwrap().taint.flags_alarm());
582    }
583
584    #[test]
585    fn branch_merge_sibling_cleanse_does_not_mask_live_kind() {
586        // Regression for oracle-qm3q.26 (cleanser-union fail-open across a
587        // branch join). One arm sanitises `v` with DBMS_ASSERT; the OTHER arm
588        // assigns raw `p_user`. `merge_into` unions the cleanser from the THEN
589        // arm with the live UserInput kind from the ELSE arm — but because
590        // `kinds` tracks only LIVE (uncleansed) taint and `flags_alarm` no
591        // longer depends on `cleansed_by`, the uncleansed ELSE path still
592        // alarms. (Under the old "tainted-but-cleansed" model the recorded
593        // DbmsAssert cleanser would have masked the live ELSE-path kind — a
594        // SEC001 fail-open.)
595        let s = lower_statement_body(
596            "IF c THEN v := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); ELSE v := p_user; END IF;",
597        );
598        let env = analyze_flow(&s, &src(&["p_user"]));
599        let f = env.get("v").unwrap();
600        assert!(
601            f.taint.kinds.contains(&TaintKind::UserInput),
602            "the uncleansed ELSE-path UserInput kind must survive the branch join"
603        );
604        assert!(
605            f.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
606            "the THEN-path cleanser is still recorded for reporting"
607        );
608        assert!(
609            f.taint.flags_alarm(),
610            "a sibling cleanse on one branch must NOT mask the live kind on the other"
611        );
612    }
613
614    #[test]
615    fn case_insensitive_source_match() {
616        let s = lower_statement_body("v_x := P_USER;");
617        let env = analyze_flow(&s, &src(&["p_user"]));
618        assert!(
619            env.get("V_X")
620                .unwrap()
621                .taint
622                .kinds
623                .contains(&TaintKind::UserInput)
624        );
625    }
626
627    #[test]
628    fn empty_body_yields_empty_env() {
629        let env = analyze_flow(&[], &src(&[]));
630        assert!(env.is_empty());
631    }
632
633    #[test]
634    fn two_hop_local_laundering_propagates_taint() {
635        // Regression for oracle-qm3q.20 (transitive intra-procedural taint).
636        // `v_tmp` launders `p_user`; `v_sql := v_tmp` must inherit the taint so
637        // an EXECUTE IMMEDIATE built from v_sql is still flagged. Before the
638        // use-def fix, expr_flow only consulted the static `sources` set and
639        // never the live env, so v_sql came out clean (a SEC001 false negative).
640        let s = lower_statement_body("v_tmp := p_user; v_sql := v_tmp;");
641        let env = analyze_flow(&s, &src(&["p_user"]));
642        assert!(
643            env.get("v_tmp")
644                .unwrap()
645                .taint
646                .kinds
647                .contains(&TaintKind::UserInput),
648            "the first hop is tainted from the source"
649        );
650        let sql = env.get("v_sql").unwrap();
651        assert!(
652            sql.taint.kinds.contains(&TaintKind::UserInput),
653            "taint laundered through v_tmp must reach v_sql"
654        );
655        assert!(sql.taint.flags_alarm(), "the laundered value still alarms");
656    }
657
658    #[test]
659    fn n_hop_local_laundering_propagates_taint() {
660        // Deeper chain: p_user -> a -> b -> c. Each hop must carry the taint
661        // forward through the live env.
662        let s = lower_statement_body("v_a := p_user; v_b := v_a; v_c := v_b;");
663        let env = analyze_flow(&s, &src(&["p_user"]));
664        for name in ["v_a", "v_b", "v_c"] {
665            assert!(
666                env.get(name)
667                    .unwrap()
668                    .taint
669                    .kinds
670                    .contains(&TaintKind::UserInput),
671                "{name} must be tainted along the laundering chain"
672            );
673        }
674    }
675
676    #[test]
677    fn cleansed_local_then_reused_stays_clean() {
678        // The dual of laundering: once a local is sanitised by DBMS_ASSERT,
679        // reusing it must NOT resurrect a live UserInput kind. The transitive
680        // env-consult inherits cleansed_by (for reporting) but no live kind,
681        // because the sanitiser already drained the kinds it consumed.
682        let s =
683            lower_statement_body("v_tmp := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user); v_sql := v_tmp;");
684        let env = analyze_flow(&s, &src(&["p_user"]));
685        let sql = env.get("v_sql").unwrap();
686        assert!(
687            !sql.taint.kinds.contains(&TaintKind::UserInput),
688            "a reused sanitised local carries no live taint"
689        );
690        assert!(
691            !sql.taint.flags_alarm(),
692            "reusing a sanitised value must not alarm"
693        );
694        assert!(
695            sql.taint.cleansed_by.contains(&TaintCleanser::DbmsAssert),
696            "the cleanser is carried forward for reporting"
697        );
698    }
699
700    #[test]
701    fn taint_laundered_through_local_into_concatenation_alarms() {
702        // Combine transitivity with the sibling-cleanse guard: stage raw user
703        // input in a local, then concatenate it into a dynamic-SQL string.
704        let s = lower_statement_body("v_t := p_user; v_sql := 'SELECT * FROM ' || v_t;");
705        let env = analyze_flow(&s, &src(&["p_user"]));
706        let sql = env.get("v_sql").unwrap();
707        assert!(
708            sql.taint.kinds.contains(&TaintKind::UserInput),
709            "laundered taint concatenated into SQL must remain tainted"
710        );
711        assert!(sql.taint.flags_alarm());
712    }
713
714    // oracle-rwjl.3: a verb-prefixed local (`return_val`) used to be swallowed
715    // by classify() (→ Statement::Return), dropping the assignment from
716    // flow_intra::walk so taint laundered through it never reached the sink.
717    // Now it is a real Assignment, so v_sql inherits p_user's taint.
718    #[test]
719    fn verb_prefixed_local_laundering_propagates_taint() {
720        let s = lower_statement_body("return_val := p_user; v_sql := return_val;");
721        let env = analyze_flow(&s, &src(&["p_user"]));
722        let rv = env
723            .get("return_val")
724            .expect("the verb-prefixed local must be recorded as an assignment");
725        assert!(
726            rv.taint.kinds.contains(&TaintKind::UserInput),
727            "return_val must inherit p_user's taint"
728        );
729        let sql = env.get("v_sql").unwrap();
730        assert!(
731            sql.taint.kinds.contains(&TaintKind::UserInput),
732            "taint laundered through the verb-prefixed local must reach v_sql"
733        );
734        assert!(sql.taint.flags_alarm());
735    }
736
737    // oracle-rwjl.4: DBMS_ASSERT.NOOP is Oracle's documented identity
738    // pass-through — it performs NO validation, so it must NOT cleanse. Raw
739    // user input wrapped in NOOP and concatenated into dynamic SQL must still
740    // alarm (the old uniform `starts_with("DBMS_ASSERT.")` reported it clean —
741    // a SEC001 fail-open).
742    #[test]
743    fn dbms_assert_noop_is_not_a_sanitizer() {
744        let s = lower_statement_body("v_sql := 'SELECT * FROM ' || DBMS_ASSERT.NOOP(p_user);");
745        let env = analyze_flow(&s, &src(&["p_user"]));
746        let f = env.get("v_sql").unwrap();
747        assert!(
748            f.taint.kinds.contains(&TaintKind::UserInput),
749            "NOOP performs no validation; its argument's taint must survive"
750        );
751        assert!(
752            f.taint.flags_alarm(),
753            "user input wrapped in DBMS_ASSERT.NOOP must still alarm"
754        );
755    }
756
757    // oracle-rwjl.4 (direct, not just concatenated): a bare NOOP wrap is also
758    // transparent.
759    #[test]
760    fn dbms_assert_noop_direct_assignment_stays_tainted() {
761        let s = lower_statement_body("v_sql := DBMS_ASSERT.NOOP(p_user);");
762        let env = analyze_flow(&s, &src(&["p_user"]));
763        let f = env.get("v_sql").unwrap();
764        assert!(
765            f.taint.kinds.contains(&TaintKind::UserInput),
766            "NOOP does not consume taint"
767        );
768        assert!(f.taint.flags_alarm());
769    }
770
771    // oracle-rwjl.4: a REAL validating sanitizer with a SYS schema prefix must
772    // still be recognised as a cleanser (the old `starts_with` missed the
773    // prefix and over-reported a genuinely safe value).
774    #[test]
775    fn sys_prefixed_dbms_assert_sanitizer_cleanses() {
776        let s = lower_statement_body("v_safe := SYS.DBMS_ASSERT.SIMPLE_SQL_NAME(p_tab);");
777        let env = analyze_flow(&s, &src(&["p_tab"]));
778        let f = env.get("v_safe").unwrap();
779        assert!(
780            !f.taint.flags_alarm(),
781            "a schema-prefixed real sanitizer must still cleanse"
782        );
783        assert!(
784            !f.taint.kinds.contains(&TaintKind::UserInput),
785            "the sanitizer consumes the argument's taint"
786        );
787    }
788
789    // oracle-lokg.2: the exact crash shape from the bundled public
790    // fixture. A `SELECT … FOR UPDATE;` body fragment leaves the bare
791    // token `FOR UPDATE`; the text-scanner's `classify_loop` treats
792    // `FOR …` as a FOR-loop, finds no word-bounded `IN` and no
793    // `END LOOP`, and falls back to a `BareLoop` whose `body_text` is
794    // *the same string* `FOR UPDATE`. Re-lowering it yields the
795    // identical non-shrinking `BareLoop` → before the depth guard
796    // `walk` recursed unbounded and aborted the whole `analyze_flow`
797    // (SIGABRT / "stack overflow"; MAX_PASSES=64 bounds only the OUTER
798    // fixpoint, not the per-pass recursion). It must now terminate and
799    // report the truncation honestly (R13).
800    #[test]
801    fn non_shrinking_for_update_does_not_stack_overflow_and_reports_limit() {
802        let stmts = vec![Statement::BareLoop {
803            body_text: "FOR UPDATE".to_string(),
804        }];
805        let (env, outcome) = analyze_flow_bounded(&stmts, &src(&[]));
806        assert!(
807            outcome.limit_hit,
808            "the non-shrinking `FOR UPDATE` BareLoop must trip the \
809             bounded depth cap, outcome={outcome:?}"
810        );
811        assert!(outcome.truncated_bodies >= 1);
812        // No assignment can be recovered from the malformed fragment.
813        assert!(env.is_empty());
814        // The back-compat wrapper must also simply terminate
815        // (no panic / abort) rather than recurse unbounded.
816        let _ = analyze_flow(&stmts, &src(&[]));
817    }
818
819    // oracle-lokg.2: the same shape arrived at via the lowering path
820    // (not a hand-built `Statement`), proving the end-to-end public API
821    // `analyze_flow(&lower_statement_body("FOR UPDATE"), …)` terminates.
822    #[test]
823    fn analyze_flow_over_lowered_for_update_terminates() {
824        let stmts = lower_statement_body("FOR UPDATE");
825        let env = analyze_flow(&stmts, &TaintSources::default());
826        // We do not assert the env contents — only that the call
827        // returned at all (before the guard this aborted the process).
828        let _ = env.is_empty();
829    }
830
831    // oracle-lokg.2: a genuinely deep linear nesting chain must
832    // terminate at the depth cap with a clean typed truncation outcome
833    // instead of overflowing the stack. Each level is a `BareLoop`
834    // wrapping the next, so the re-lowered slice shrinks one level per
835    // recursion — but without the cap a sufficiently deep chain would
836    // overflow the native stack. DEPTH is set well above
837    // `MAX_RELOWER_DEPTH` (128) so the cap is guaranteed to fire while
838    // keeping the per-level re-lowering scan cheap; the same guard
839    // bounds the recursion to 128 frames no matter how deep the input.
840    #[test]
841    fn deep_nested_loop_chain_degrades_to_limit_not_overflow() {
842        const DEPTH: usize = 1_000;
843        // Compile-time invariant: DEPTH must exceed the cap so the
844        // truncation is guaranteed to fire.
845        const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
846        // Build the chain with a single linear pass (no quadratic
847        // string re-allocation): DEPTH `LOOP ` openers, the innermost
848        // assignment, then DEPTH ` END LOOP;` closers.
849        let mut body = String::with_capacity(DEPTH * 16 + 32);
850        for _ in 0..DEPTH {
851            body.push_str("LOOP ");
852        }
853        body.push_str("v_x := p_user; ");
854        for _ in 0..DEPTH {
855            body.push_str("END LOOP; ");
856        }
857        let stmts = lower_statement_body(&body);
858        let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
859        assert!(
860            outcome.limit_hit,
861            "a {DEPTH}-deep nested LOOP chain must trip the depth cap, \
862             outcome={outcome:?}"
863        );
864    }
865
866    // oracle-hrzg.2: taint laundered through an anonymous BEGIN…END
867    // sub-block must still reach the assigned name. Before the
868    // NestedBlock arm in `walk`, the `_ => {}` catch-all dropped the
869    // sub-block entirely, so `v_sql` came back UNtainted (FLOW-001
870    // fail-open → SEC001 misses the injection once wired).
871    #[test]
872    fn nested_begin_block_launders_taint_into_assignment() {
873        let s = lower_statement_body("BEGIN v_sql := p_user; END;");
874        let env = analyze_flow(&s, &src(&["p_user"]));
875        let f = env
876            .get("v_sql")
877            .expect("the nested-block assignment to v_sql must be recorded");
878        assert!(
879            f.taint.kinds.contains(&TaintKind::UserInput),
880            "taint laundered through a BEGIN…END sub-block must reach v_sql"
881        );
882        assert!(f.taint.flags_alarm(), "the laundered value still alarms");
883    }
884
885    // oracle-hrzg.2: the same, via a DECLARE…END wrapper (the other
886    // anonymous-block shape the classifier emits as NestedBlock).
887    #[test]
888    fn nested_declare_block_launders_taint_into_assignment() {
889        let s = lower_statement_body("DECLARE v_x NUMBER; BEGIN v_sql := p_user; END;");
890        let env = analyze_flow(&s, &src(&["p_user"]));
891        let f = env
892            .get("v_sql")
893            .expect("the DECLARE-wrapped assignment to v_sql must be recorded");
894        assert!(
895            f.taint.kinds.contains(&TaintKind::UserInput),
896            "taint laundered through a DECLARE…END sub-block must reach v_sql"
897        );
898        assert!(f.taint.flags_alarm());
899    }
900
901    // oracle-hrzg.2: a deeply nested chain of anonymous blocks must
902    // terminate at the MAX_RELOWER_DEPTH cap (honest typed truncation)
903    // rather than overflowing the stack — same posture as the loop-chain
904    // guard. Each level wraps the next in `BEGIN … END;` so the stripped
905    // slice shrinks one level per recursion.
906    #[test]
907    fn deep_nested_block_chain_degrades_to_limit_not_overflow() {
908        const DEPTH: usize = 1_000;
909        const _: () = assert!(DEPTH > crate::MAX_RELOWER_DEPTH);
910        let mut body = String::with_capacity(DEPTH * 12 + 32);
911        for _ in 0..DEPTH {
912            body.push_str("BEGIN ");
913        }
914        body.push_str("v_x := p_user; ");
915        for _ in 0..DEPTH {
916            body.push_str("END; ");
917        }
918        let stmts = lower_statement_body(&body);
919        let (_, outcome) = analyze_flow_bounded(&stmts, &src(&["p_user"]));
920        assert!(
921            outcome.limit_hit,
922            "a {DEPTH}-deep nested BEGIN chain must trip the depth cap, \
923             outcome={outcome:?}"
924        );
925    }
926
927    // oracle-hrzg.5: a parenthesised concatenation operand
928    // `'SELECT … ' || (p_user)` must keep p_user's taint — the paren
929    // group is unwrapped before the `||` split. Before the
930    // `recognise_paren_group` recognizer, `(p_user)` lowered to
931    // `Raw{UnrecognizedShape}`, contributing zero taint, and the byte-
932    // identical un-parenthesised form alarmed while this one did not
933    // (SEC001 fail-open on a no-obfuscation code shape).
934    #[test]
935    fn parenthesised_concat_operand_keeps_taint() {
936        let s = lower_statement_body("v_sql := 'SELECT * FROM ' || (p_user);");
937        let env = analyze_flow(&s, &src(&["p_user"]));
938        let f = env.get("v_sql").unwrap();
939        assert!(
940            f.taint.kinds.contains(&TaintKind::UserInput),
941            "a parenthesised tainted operand must remain tainted"
942        );
943        assert!(f.taint.flags_alarm());
944    }
945
946    // oracle-hrzg.5: a whole-RHS parenthesised group
947    // `('SELECT …' || p_user)` is unwrapped first, then the inner `||`
948    // splits normally so the taint survives.
949    #[test]
950    fn whole_rhs_paren_group_keeps_taint() {
951        let s = lower_statement_body("v_sql := ('SELECT * FROM ' || p_user);");
952        let env = analyze_flow(&s, &src(&["p_user"]));
953        let f = env.get("v_sql").unwrap();
954        assert!(
955            f.taint.kinds.contains(&TaintKind::UserInput),
956            "a whole-RHS parenthesised group must preserve inner taint"
957        );
958        assert!(f.taint.flags_alarm());
959    }
960
961    // oracle-hrzg.5: a bare `(p_user)` group is a Name, so it taints
962    // identically to the un-parenthesised reference.
963    #[test]
964    fn bare_paren_group_is_tainted_name() {
965        let s = lower_statement_body("v_sql := (p_user);");
966        let env = analyze_flow(&s, &src(&["p_user"]));
967        let f = env.get("v_sql").unwrap();
968        assert!(f.taint.kinds.contains(&TaintKind::UserInput));
969        assert!(f.taint.flags_alarm());
970    }
971}
plsql_ir/flow_intra.rs

plsql_ir/
flow_intra.rs