plsql_ir/flow.rs
1//! Value-flow, taint, constant, value-set, and string-shape
2//! models.
3//!
4//! Downstream SAST and lineage layers reason about *how* values
5//! propagate, not just *whether* a name binds. This module
6//! defines the shapes those passes share so they all speak the
7//! same vocabulary:
8//!
9//! * [`TaintKind`] — the family of taint a value carries
10//! (user-supplied, dynamic-SQL, db-link, file-system, …).
11//! * [`ConstantValue`] — when a value is provably constant, its
12//! wire form (number / string / bool / null).
13//! * [`ValueSet`] — abstract domain summarising the set of values
14//! a name might hold (Top / `OneOf` / `Range` / `Bottom`).
15//! * [`StringShape`] — abstract domain for string values
16//! (literal / interpolated-with-prefix / fully-opaque).
17//! * [`ValueFlow`] — the per-name aggregate the passes return.
18//!
19//! Population happens in the intra- / inter-procedural flow passes.
20//! This module ships the types + serde + small helpers so the
21//! consumers (SAST, bindings, doc) program against a stable surface
22//! today.
23//!
24//! ## /oracle evidence
25//!
26//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
27//! bind-variable + parameter-mode chapters drive how taint
28//! enters a routine. `DBMS_ASSERT` (see
29//! `LOW-LEVEL-CATALOGS.md` supplied-packages) is the
30//! sanctioned cleanser.
31
32use serde::{Deserialize, Serialize};
33
34/// Per-name aggregate flow report.
35#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
36pub struct ValueFlow {
37 pub taint: Taint,
38 pub constant: Option<ConstantValue>,
39 pub value_set: ValueSet,
40 pub string_shape: Option<StringShape>,
41}
42
43/// Taint state. `kinds` lists the *live* (uncleansed) taint sources that
44/// flow into the value — a bound sanitiser (e.g. a `DBMS_ASSERT.*` call)
45/// removes the kinds it cleanses, so a sanitized value carries no live kind.
46/// `cleansed_by` records which sanitisers fired anywhere in the value's
47/// derivation (kept for reporting, not for the alarm). SAST emits a finding
48/// iff `kinds` is non-empty. Tracking *live* kinds (rather than all-seen
49/// kinds gated on an empty `cleansed_by`) binds cleansing to the sanitized
50/// sub-expression, so taint concatenated alongside a sanitized operand still
51/// alarms (e.g. `DBMS_ASSERT.ENQUOTE_LITERAL('x') || p_user`).
52#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
53pub struct Taint {
54 pub kinds: Vec<TaintKind>,
55 pub cleansed_by: Vec<TaintCleanser>,
56}
57
58#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum TaintKind {
61 /// Value came from an IN parameter of a public routine.
62 UserInput,
63 /// Value came from a bind variable.
64 BindVariable,
65 /// Value came from `EXECUTE IMMEDIATE` / `OPEN FOR <expr>`
66 /// dynamic SQL substitution.
67 DynamicSql,
68 /// Value came from a remote `name@dblink` reference.
69 DbLink,
70 /// Value came from a file-system read (`UTL_FILE`).
71 FileSystem,
72 /// Value came from `UTL_HTTP` / `UTL_TCP` / `UTL_SMTP`.
73 Network,
74 /// Value came from the OS environment (`DBMS_SYSTEM`,
75 /// `SYS_CONTEXT('USERENV', …)`).
76 Environment,
77 /// Value came from an Oracle scheduler argument
78 /// (`DBMS_SCHEDULER.SET_JOB_ARGUMENT_VALUE`).
79 SchedulerArgument,
80 /// Value flowed through a sub-expression the recognizer could not lower
81 /// (`Expr::Raw` — an unrecognized shape such as a SQL `CASE` expression, an
82 /// unbalanced/unterminated fragment, or a depth-limit-collapsed concat
83 /// tail). The analyzer cannot prove the value safe, so it fails CLOSED:
84 /// the value is treated as potentially-injectable rather than silently
85 /// dropped (R13 — never swallow a blind spot). Carrying this as a live
86 /// taint kind makes a downstream dynamic-SQL sink flag it instead of
87 /// reading the un-lowered value as clean (oracle-qo1v.2).
88 Unanalyzable,
89}
90
91#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum TaintCleanser {
94 /// One of the `DBMS_ASSERT.*` sanitisers (per SYM-005).
95 DbmsAssert,
96 /// `SYS.UTL_RAW.CAST_TO_RAW` / equivalent hex-encode.
97 HexEncode,
98 /// Operator wrote a literal-only string — no taint flow.
99 LiteralOnly,
100 /// `DBMS_OUTPUT.PUT_LINE` consumer — taint does not flow
101 /// back into the database (terminal sink).
102 OutputSink,
103 /// Caller explicitly annotated the value as cleansed via a
104 /// project-local convention (e.g. comment marker).
105 OperatorAttested,
106}
107
108/// When the value is provably constant, its wire form. Variants
109/// use struct-form fields so the serde `tag = "kind"` adjacent-
110/// encoding doesn't trip on newtypes carrying `String` /
111/// primitive payloads.
112#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
113#[serde(tag = "kind", rename_all = "snake_case")]
114pub enum ConstantValue {
115 /// Integer literal preserved verbatim.
116 Int { value: String },
117 /// Floating-point or fixed-point literal preserved verbatim.
118 Float { value: String },
119 /// String literal body, doubled-`''` already de-escaped.
120 Str { value: String },
121 /// Boolean literal.
122 Bool { value: bool },
123 /// `NULL` literal.
124 Null,
125}
126
127/// Abstract domain summarising the set of values a name might
128/// hold. The lattice is `Bottom < Range / OneOf < Top` —
129/// passes refine `Top` toward the more specific variants as
130/// they accumulate evidence.
131#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
132#[serde(tag = "kind", rename_all = "snake_case")]
133pub enum ValueSet {
134 /// No information yet — could be anything.
135 #[default]
136 Top,
137 /// Value is one of a finite set of constants.
138 OneOf { values: Vec<ConstantValue> },
139 /// Numeric range `[lo, hi]` inclusive — `lo` / `hi` carry the
140 /// constant's wire form so `Range` covers integers, floats,
141 /// and bounded enums.
142 Range {
143 lo: ConstantValue,
144 hi: ConstantValue,
145 },
146 /// Empty set — the value is provably unreachable.
147 Bottom,
148}
149
150/// Abstract domain for string values. Powers SAST rules around
151/// dynamic-SQL composition + URL / file-path opening.
152#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
153#[serde(tag = "kind", rename_all = "snake_case")]
154pub enum StringShape {
155 /// String is a single literal.
156 Literal { value: String },
157 /// String is built from `literal_prefix` + a runtime
158 /// expression + `literal_suffix`. Either prefix / suffix may
159 /// be empty.
160 InterpolatedWithFix {
161 literal_prefix: String,
162 literal_suffix: String,
163 },
164 /// String is a concat of constants and runtime expressions
165 /// with no usable fixed substring on either end.
166 FullyOpaque,
167 /// String is empty.
168 Empty,
169}
170
171impl Taint {
172 /// True iff the value carries any *live* (uncleansed) taint kind.
173 /// `kinds` already excludes anything a bound sanitiser consumed (see the
174 /// struct doc), so the alarm is a simple non-emptiness check — no longer
175 /// gated on `cleansed_by`, which a sibling cleanse used to satisfy and
176 /// thereby mask a concatenated tainted operand (the SEC001 fail-open).
177 #[must_use]
178 pub fn flags_alarm(&self) -> bool {
179 !self.kinds.is_empty()
180 }
181}
182
183impl ValueSet {
184 /// Merge two `ValueSet`s with the lattice join. Top
185 /// dominates; Bottom yields the other side; two `OneOf`s
186 /// union their value lists.
187 #[must_use]
188 pub fn join(self, other: ValueSet) -> ValueSet {
189 match (self, other) {
190 (ValueSet::Top, _) | (_, ValueSet::Top) => ValueSet::Top,
191 (ValueSet::Bottom, x) | (x, ValueSet::Bottom) => x,
192 (ValueSet::OneOf { mut values }, ValueSet::OneOf { values: other }) => {
193 for v in other {
194 if !values.contains(&v) {
195 values.push(v);
196 }
197 }
198 ValueSet::OneOf { values }
199 }
200 // Range + OneOf / Range + Range → Top (over-approx).
201 // Callers needing tighter joins can specialise.
202 _ => ValueSet::Top,
203 }
204 }
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210
211 #[test]
212 fn taint_flags_alarm_when_no_cleanser() {
213 let t = Taint {
214 kinds: vec![TaintKind::UserInput],
215 cleansed_by: vec![],
216 };
217 assert!(t.flags_alarm());
218 }
219
220 #[test]
221 fn taint_does_not_flag_when_cleansed() {
222 // A value sanitised by a bound cleanser carries NO live kind: the
223 // cleanser drained the kinds it consumed. `cleansed_by` is retained only
224 // for reporting and does not by itself suppress the alarm.
225 let t = Taint {
226 kinds: vec![],
227 cleansed_by: vec![TaintCleanser::DbmsAssert],
228 };
229 assert!(!t.flags_alarm());
230 }
231
232 #[test]
233 fn taint_flags_when_live_kind_present_despite_a_recorded_cleanser() {
234 // Regression for the SEC001 fail-open: a cleanser recorded somewhere in
235 // the derivation must NOT mask a live (uncleansed) kind from a sibling.
236 let t = Taint {
237 kinds: vec![TaintKind::UserInput],
238 cleansed_by: vec![TaintCleanser::DbmsAssert],
239 };
240 assert!(t.flags_alarm());
241 }
242
243 #[test]
244 fn taint_default_no_alarm() {
245 assert!(!Taint::default().flags_alarm());
246 }
247
248 #[test]
249 fn value_set_top_dominates_join() {
250 let a = ValueSet::Top;
251 let b = ValueSet::OneOf {
252 values: vec![ConstantValue::Int { value: "1".into() }],
253 };
254 assert!(matches!(a.join(b), ValueSet::Top));
255 }
256
257 #[test]
258 fn value_set_bottom_yields_other_side() {
259 let a = ValueSet::Bottom;
260 let b = ValueSet::OneOf {
261 values: vec![ConstantValue::Int { value: "7".into() }],
262 };
263 match a.join(b) {
264 ValueSet::OneOf { values } => {
265 assert_eq!(values.len(), 1);
266 }
267 _ => panic!(),
268 }
269 }
270
271 #[test]
272 fn one_of_join_unions_values_dedup() {
273 let a = ValueSet::OneOf {
274 values: vec![
275 ConstantValue::Int { value: "1".into() },
276 ConstantValue::Int { value: "2".into() },
277 ],
278 };
279 let b = ValueSet::OneOf {
280 values: vec![
281 ConstantValue::Int { value: "2".into() },
282 ConstantValue::Int { value: "3".into() },
283 ],
284 };
285 match a.join(b) {
286 ValueSet::OneOf { values } => {
287 assert_eq!(values.len(), 3);
288 }
289 _ => panic!(),
290 }
291 }
292
293 #[test]
294 fn range_plus_one_of_widens_to_top() {
295 let a = ValueSet::Range {
296 lo: ConstantValue::Int { value: "0".into() },
297 hi: ConstantValue::Int { value: "10".into() },
298 };
299 let b = ValueSet::OneOf {
300 values: vec![ConstantValue::Int { value: "5".into() }],
301 };
302 assert!(matches!(a.join(b), ValueSet::Top));
303 }
304
305 #[test]
306 fn string_shape_variants_serialise_snake_case() {
307 let lit = StringShape::Literal {
308 value: "hello".into(),
309 };
310 let json = serde_json::to_string(&lit).unwrap();
311 assert!(json.contains("\"kind\":\"literal\""));
312 let opaque = StringShape::FullyOpaque;
313 assert!(
314 serde_json::to_string(&opaque)
315 .unwrap()
316 .contains("\"fully_opaque\"")
317 );
318 }
319
320 #[test]
321 fn value_flow_default_is_top_no_taint_no_constant() {
322 let v = ValueFlow::default();
323 assert!(matches!(v.value_set, ValueSet::Top));
324 assert!(v.constant.is_none());
325 assert!(v.string_shape.is_none());
326 assert!(v.taint.kinds.is_empty());
327 }
328
329 #[test]
330 fn value_flow_serde_round_trip() {
331 let v = ValueFlow {
332 taint: Taint {
333 kinds: vec![TaintKind::UserInput, TaintKind::DynamicSql],
334 cleansed_by: vec![TaintCleanser::DbmsAssert],
335 },
336 constant: Some(ConstantValue::Str {
337 value: "hello".into(),
338 }),
339 value_set: ValueSet::OneOf {
340 values: vec![ConstantValue::Int { value: "1".into() }],
341 },
342 string_shape: Some(StringShape::InterpolatedWithFix {
343 literal_prefix: "SELECT * FROM ".into(),
344 literal_suffix: " WHERE id = 1".into(),
345 }),
346 };
347 let json = serde_json::to_string(&v).unwrap();
348 let back: ValueFlow = serde_json::from_str(&json).unwrap();
349 assert_eq!(back, v);
350 assert!(json.contains("\"user_input\""));
351 assert!(json.contains("\"dbms_assert\""));
352 }
353}