Skip to main content

plsql_ir/
flow.rs

1//! Value-flow, taint, constant, value-set, and string-shape
2//! models.
3//!
4//! Downstream SAST and lineage layers reason about *how* values
5//! propagate, not just *whether* a name binds. This module
6//! defines the shapes those passes share so they all speak the
7//! same vocabulary:
8//!
9//! * [`TaintKind`] — the family of taint a value carries
10//!   (user-supplied, dynamic-SQL, db-link, file-system, …).
11//! * [`ConstantValue`] — when a value is provably constant, its
12//!   wire form (number / string / bool / null).
13//! * [`ValueSet`] — abstract domain summarising the set of values
14//!   a name might hold (Top / `OneOf` / `Range` / `Bottom`).
15//! * [`StringShape`] — abstract domain for string values
16//!   (literal / interpolated-with-prefix / fully-opaque).
17//! * [`ValueFlow`] — the per-name aggregate the passes return.
18//!
19//! Population happens in the intra- / inter-procedural flow passes.
20//! This module ships the types + serde + small helpers so the
21//! consumers (SAST, bindings, doc) program against a stable surface
22//! today.
23//!
24//! ## /oracle evidence
25//!
26//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
27//!   bind-variable + parameter-mode chapters drive how taint
28//!   enters a routine. `DBMS_ASSERT` (see
29//!   `LOW-LEVEL-CATALOGS.md` supplied-packages) is the
30//!   sanctioned cleanser.
31
32use serde::{Deserialize, Serialize};
33
34/// Per-name aggregate flow report.
35#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
36pub struct ValueFlow {
37    pub taint: Taint,
38    pub constant: Option<ConstantValue>,
39    pub value_set: ValueSet,
40    pub string_shape: Option<StringShape>,
41}
42
43/// Taint state. `kinds` lists the *live* (uncleansed) taint sources that
44/// flow into the value — a bound sanitiser (e.g. a `DBMS_ASSERT.*` call)
45/// removes the kinds it cleanses, so a sanitized value carries no live kind.
46/// `cleansed_by` records which sanitisers fired anywhere in the value's
47/// derivation (kept for reporting, not for the alarm). SAST emits a finding
48/// iff `kinds` is non-empty. Tracking *live* kinds (rather than all-seen
49/// kinds gated on an empty `cleansed_by`) binds cleansing to the sanitized
50/// sub-expression, so taint concatenated alongside a sanitized operand still
51/// alarms (e.g. `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user`).
52#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
53pub struct Taint {
54    pub kinds: Vec<TaintKind>,
55    pub cleansed_by: Vec<TaintCleanser>,
56}
57
58#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum TaintKind {
61    /// Value came from an IN parameter of a public routine.
62    UserInput,
63    /// Value came from a bind variable.
64    BindVariable,
65    /// Value came from `EXECUTE IMMEDIATE` / `OPEN FOR <expr>`
66    /// dynamic SQL substitution.
67    DynamicSql,
68    /// Value came from a remote `name@dblink` reference.
69    DbLink,
70    /// Value came from a file-system read (`UTL_FILE`).
71    FileSystem,
72    /// Value came from `UTL_HTTP` / `UTL_TCP` / `UTL_SMTP`.
73    Network,
74    /// Value came from the OS environment (`DBMS_SYSTEM`,
75    /// `SYS_CONTEXT('USERENV', …)`).
76    Environment,
77    /// Value came from an Oracle scheduler argument
78    /// (`DBMS_SCHEDULER.SET_JOB_ARGUMENT_VALUE`).
79    SchedulerArgument,
80    /// Value flowed through a sub-expression the recognizer could not lower
81    /// (`Expr::Raw` — an unrecognized shape such as a SQL `CASE` expression, an
82    /// unbalanced/unterminated fragment, or a depth-limit-collapsed concat
83    /// tail). The analyzer cannot prove the value safe, so it fails CLOSED:
84    /// the value is treated as potentially-injectable rather than silently
85    /// dropped (R13 — never swallow a blind spot). Carrying this as a live
86    /// taint kind makes a downstream dynamic-SQL sink flag it instead of
87    /// reading the un-lowered value as clean (oracle-qo1v.2).
88    Unanalyzable,
89}
90
91#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum TaintCleanser {
94    /// One of the `DBMS_ASSERT.*` sanitisers (per SYM-005).
95    DbmsAssert,
96    /// `SYS.UTL_RAW.CAST_TO_RAW` / equivalent hex-encode.
97    HexEncode,
98    /// Operator wrote a literal-only string — no taint flow.
99    LiteralOnly,
100    /// `DBMS_OUTPUT.PUT_LINE` consumer — taint does not flow
101    /// back into the database (terminal sink).
102    OutputSink,
103    /// Caller explicitly annotated the value as cleansed via a
104    /// project-local convention (e.g. comment marker).
105    OperatorAttested,
106}
107
108/// When the value is provably constant, its wire form. Variants
109/// use struct-form fields so the serde `tag = "kind"` adjacent-
110/// encoding doesn't trip on newtypes carrying `String` /
111/// primitive payloads.
112#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
113#[serde(tag = "kind", rename_all = "snake_case")]
114pub enum ConstantValue {
115    /// Integer literal preserved verbatim.
116    Int { value: String },
117    /// Floating-point or fixed-point literal preserved verbatim.
118    Float { value: String },
119    /// String literal body, doubled-`''` already de-escaped.
120    Str { value: String },
121    /// Boolean literal.
122    Bool { value: bool },
123    /// `NULL` literal.
124    Null,
125}
126
127/// Abstract domain summarising the set of values a name might
128/// hold. The lattice is `Bottom < Range / OneOf < Top` —
129/// passes refine `Top` toward the more specific variants as
130/// they accumulate evidence.
131#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
132#[serde(tag = "kind", rename_all = "snake_case")]
133pub enum ValueSet {
134    /// No information yet — could be anything.
135    #[default]
136    Top,
137    /// Value is one of a finite set of constants.
138    OneOf { values: Vec<ConstantValue> },
139    /// Numeric range `[lo, hi]` inclusive — `lo` / `hi` carry the
140    /// constant's wire form so `Range` covers integers, floats,
141    /// and bounded enums.
142    Range {
143        lo: ConstantValue,
144        hi: ConstantValue,
145    },
146    /// Empty set — the value is provably unreachable.
147    Bottom,
148}
149
150/// Abstract domain for string values. Powers SAST rules around
151/// dynamic-SQL composition + URL / file-path opening.
152#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
153#[serde(tag = "kind", rename_all = "snake_case")]
154pub enum StringShape {
155    /// String is a single literal.
156    Literal { value: String },
157    /// String is built from `literal_prefix` + a runtime
158    /// expression + `literal_suffix`. Either prefix / suffix may
159    /// be empty.
160    InterpolatedWithFix {
161        literal_prefix: String,
162        literal_suffix: String,
163    },
164    /// String is a concat of constants and runtime expressions
165    /// with no usable fixed substring on either end.
166    FullyOpaque,
167    /// String is empty.
168    Empty,
169}
170
171impl Taint {
172    /// True iff the value carries any *live* (uncleansed) taint kind.
173    /// `kinds` already excludes anything a bound sanitiser consumed (see the
174    /// struct doc), so the alarm is a simple non-emptiness check — no longer
175    /// gated on `cleansed_by`, which a sibling cleanse used to satisfy and
176    /// thereby mask a concatenated tainted operand (the SEC001 fail-open).
177    #[must_use]
178    pub fn flags_alarm(&self) -> bool {
179        !self.kinds.is_empty()
180    }
181}
182
183impl ValueSet {
184    /// Merge two `ValueSet`s with the lattice join. Top
185    /// dominates; Bottom yields the other side; two `OneOf`s
186    /// union their value lists.
187    #[must_use]
188    pub fn join(self, other: ValueSet) -> ValueSet {
189        match (self, other) {
190            (ValueSet::Top, _) | (_, ValueSet::Top) => ValueSet::Top,
191            (ValueSet::Bottom, x) | (x, ValueSet::Bottom) => x,
192            (ValueSet::OneOf { mut values }, ValueSet::OneOf { values: other }) => {
193                for v in other {
194                    if !values.contains(&v) {
195                        values.push(v);
196                    }
197                }
198                ValueSet::OneOf { values }
199            }
200            // Range + OneOf / Range + Range → Top (over-approx).
201            // Callers needing tighter joins can specialise.
202            _ => ValueSet::Top,
203        }
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn taint_flags_alarm_when_no_cleanser() {
213        let t = Taint {
214            kinds: vec![TaintKind::UserInput],
215            cleansed_by: vec![],
216        };
217        assert!(t.flags_alarm());
218    }
219
220    #[test]
221    fn taint_does_not_flag_when_cleansed() {
222        // A value sanitised by a bound cleanser carries NO live kind: the
223        // cleanser drained the kinds it consumed. `cleansed_by` is retained only
224        // for reporting and does not by itself suppress the alarm.
225        let t = Taint {
226            kinds: vec![],
227            cleansed_by: vec![TaintCleanser::DbmsAssert],
228        };
229        assert!(!t.flags_alarm());
230    }
231
232    #[test]
233    fn taint_flags_when_live_kind_present_despite_a_recorded_cleanser() {
234        // Regression for the SEC001 fail-open: a cleanser recorded somewhere in
235        // the derivation must NOT mask a live (uncleansed) kind from a sibling.
236        let t = Taint {
237            kinds: vec![TaintKind::UserInput],
238            cleansed_by: vec![TaintCleanser::DbmsAssert],
239        };
240        assert!(t.flags_alarm());
241    }
242
243    #[test]
244    fn taint_default_no_alarm() {
245        assert!(!Taint::default().flags_alarm());
246    }
247
248    #[test]
249    fn value_set_top_dominates_join() {
250        let a = ValueSet::Top;
251        let b = ValueSet::OneOf {
252            values: vec![ConstantValue::Int { value: "1".into() }],
253        };
254        assert!(matches!(a.join(b), ValueSet::Top));
255    }
256
257    #[test]
258    fn value_set_bottom_yields_other_side() {
259        let a = ValueSet::Bottom;
260        let b = ValueSet::OneOf {
261            values: vec![ConstantValue::Int { value: "7".into() }],
262        };
263        match a.join(b) {
264            ValueSet::OneOf { values } => {
265                assert_eq!(values.len(), 1);
266            }
267            _ => panic!(),
268        }
269    }
270
271    #[test]
272    fn one_of_join_unions_values_dedup() {
273        let a = ValueSet::OneOf {
274            values: vec![
275                ConstantValue::Int { value: "1".into() },
276                ConstantValue::Int { value: "2".into() },
277            ],
278        };
279        let b = ValueSet::OneOf {
280            values: vec![
281                ConstantValue::Int { value: "2".into() },
282                ConstantValue::Int { value: "3".into() },
283            ],
284        };
285        match a.join(b) {
286            ValueSet::OneOf { values } => {
287                assert_eq!(values.len(), 3);
288            }
289            _ => panic!(),
290        }
291    }
292
293    #[test]
294    fn range_plus_one_of_widens_to_top() {
295        let a = ValueSet::Range {
296            lo: ConstantValue::Int { value: "0".into() },
297            hi: ConstantValue::Int { value: "10".into() },
298        };
299        let b = ValueSet::OneOf {
300            values: vec![ConstantValue::Int { value: "5".into() }],
301        };
302        assert!(matches!(a.join(b), ValueSet::Top));
303    }
304
305    #[test]
306    fn string_shape_variants_serialise_snake_case() {
307        let lit = StringShape::Literal {
308            value: "hello".into(),
309        };
310        let json = serde_json::to_string(&lit).unwrap();
311        assert!(json.contains("\"kind\":\"literal\""));
312        let opaque = StringShape::FullyOpaque;
313        assert!(
314            serde_json::to_string(&opaque)
315                .unwrap()
316                .contains("\"fully_opaque\"")
317        );
318    }
319
320    #[test]
321    fn value_flow_default_is_top_no_taint_no_constant() {
322        let v = ValueFlow::default();
323        assert!(matches!(v.value_set, ValueSet::Top));
324        assert!(v.constant.is_none());
325        assert!(v.string_shape.is_none());
326        assert!(v.taint.kinds.is_empty());
327    }
328
329    #[test]
330    fn value_flow_serde_round_trip() {
331        let v = ValueFlow {
332            taint: Taint {
333                kinds: vec![TaintKind::UserInput, TaintKind::DynamicSql],
334                cleansed_by: vec![TaintCleanser::DbmsAssert],
335            },
336            constant: Some(ConstantValue::Str {
337                value: "hello".into(),
338            }),
339            value_set: ValueSet::OneOf {
340                values: vec![ConstantValue::Int { value: "1".into() }],
341            },
342            string_shape: Some(StringShape::InterpolatedWithFix {
343                literal_prefix: "SELECT * FROM ".into(),
344                literal_suffix: " WHERE id = 1".into(),
345            }),
346        };
347        let json = serde_json::to_string(&v).unwrap();
348        let back: ValueFlow = serde_json::from_str(&json).unwrap();
349        assert_eq!(back, v);
350        assert!(json.contains("\"user_input\""));
351        assert!(json.contains("\"dbms_assert\""));
352    }
353}