Skip to main content

plsql_ir/
flow_query.rs

1//! Taint-path + string-shape query API.
2//!
3//! The SAST layer (Layer 3) and the dynamic-SQL consumers need
4//! to ask "is this name tainted, by what, and was it cleansed?"
5//! and "what's the string shape of this name?" — but Layer 2
6//! (this crate) must not depend on Layer 3. So the query surface
7//! lives here, on top of the FLOW-002 [`FlowEnv`] +
8//! FLOW-003 [`InterFlowResult`], and Layer 3 consumes it.
9//!
10//! The API is read-only and allocation-light: every query takes
11//! a name + the analysis outputs and returns a small typed
12//! answer the SAST rule pack can pattern-match on.
13//!
14//! ## /oracle evidence
15//!
16//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
17//!   taint sources (bind variables, IN parameters) and the
18//!   `DBMS_ASSERT` cleanser come straight from the language +
19//!   supplied-package references; this module only re-projects
20//!   the flow facts those passes already computed.
21
22use serde::{Deserialize, Serialize};
23
24use crate::flow::{StringShape, TaintCleanser, TaintKind};
25use crate::flow_inter::InterFlowResult;
26use crate::flow_intra::FlowEnv;
27
28/// Answer to "is this name tainted?".
29#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
30pub struct TaintAnswer {
31    /// True when at least one taint kind has no matching cleanser.
32    pub is_tainted: bool,
33    pub kinds: Vec<TaintKind>,
34    pub cleansed_by: Vec<TaintCleanser>,
35}
36
37/// Read-only query facade over the flow analysis outputs.
38#[derive(Clone, Copy, Debug)]
39pub struct FlowQuery<'a> {
40    env: &'a FlowEnv,
41    inter: Option<&'a InterFlowResult>,
42}
43
44impl<'a> FlowQuery<'a> {
45    #[must_use]
46    pub fn new(env: &'a FlowEnv) -> Self {
47        Self { env, inter: None }
48    }
49
50    /// Attach inter-procedural results so call-site result taint
51    /// is folded into `taint_of`.
52    #[must_use]
53    pub fn with_inter(mut self, inter: &'a InterFlowResult) -> Self {
54        self.inter = Some(inter);
55        self
56    }
57
58    /// Taint verdict for `name`. Folds in any inter-procedural
59    /// propagated-return taint whose `caller` matches `name`
60    /// (the call-site assignment target).
61    #[must_use]
62    pub fn taint_of(&self, name: &str) -> TaintAnswer {
63        let mut kinds: Vec<TaintKind> = Vec::new();
64        let mut cleansed: Vec<TaintCleanser> = Vec::new();
65        if let Some(f) = self.env.get(name) {
66            kinds.extend(f.taint.kinds.iter().copied());
67            cleansed.extend(f.taint.cleansed_by.iter().copied());
68        }
69        if let Some(inter) = self.inter {
70            for pr in &inter.propagated_returns {
71                if pr.caller.eq_ignore_ascii_case(name) {
72                    for k in &pr.result_taint {
73                        if !kinds.contains(k) {
74                            kinds.push(*k);
75                        }
76                    }
77                }
78            }
79        }
80        // `kinds` holds only live (uncleansed) taint — a bound sanitiser already
81        // dropped what it consumed — so a non-empty `kinds` is the alarm. We do
82        // NOT additionally require `cleansed.is_empty()`: a cleanser recorded for
83        // one operand must not suppress a live kind from a concatenated sibling.
84        let is_tainted = !kinds.is_empty();
85        TaintAnswer {
86            is_tainted,
87            kinds,
88            cleansed_by: cleansed,
89        }
90    }
91
92    /// True iff `name` carries `kind` (regardless of cleansing).
93    #[must_use]
94    pub fn has_taint_kind(&self, name: &str, kind: TaintKind) -> bool {
95        self.taint_of(name).kinds.contains(&kind)
96    }
97
98    /// String shape of `name`, if the flow pass computed one.
99    #[must_use]
100    pub fn string_shape_of(&self, name: &str) -> Option<StringShape> {
101        self.env.get(name).and_then(|f| f.string_shape.clone())
102    }
103
104    /// True iff `name` reaches a dynamic-SQL sink while tainted
105    /// AND no cleanser fired — the canonical SAST injection
106    /// predicate. `is_dynamic_sink` is supplied by the Layer 3
107    /// caller (which knows the sink set) so Layer 2 stays
108    /// independent of the SAST rule pack.
109    #[must_use]
110    pub fn taint_reaches_sink(&self, name: &str, is_dynamic_sink: bool) -> bool {
111        is_dynamic_sink && self.taint_of(name).is_tainted
112    }
113
114    /// Every name in the environment that is currently tainted
115    /// (uncleansed). Sorted for deterministic reports.
116    #[must_use]
117    pub fn tainted_names(&self) -> Vec<String> {
118        let mut out: Vec<String> = self
119            .env
120            .iter_names()
121            .filter(|n| self.taint_of(n).is_tainted)
122            .collect();
123        out.sort();
124        out
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::flow_intra::{TaintSources, analyze_flow};
132    use crate::lower_statement_body;
133
134    fn env(src: &str, user: &[&str]) -> FlowEnv {
135        let stmts = lower_statement_body(src);
136        analyze_flow(
137            &stmts,
138            &TaintSources {
139                user_input_names: user.iter().map(|s| s.to_string()).collect(),
140                bind_names: vec![],
141            },
142        )
143    }
144
145    #[test]
146    fn taint_of_reports_tainted_user_input() {
147        let e = env("v_sql := p_user;", &["p_user"]);
148        let q = FlowQuery::new(&e);
149        let a = q.taint_of("v_sql");
150        assert!(a.is_tainted);
151        assert!(a.kinds.contains(&TaintKind::UserInput));
152    }
153
154    #[test]
155    fn taint_of_clean_name_is_not_tainted() {
156        let e = env("v_x := 42;", &[]);
157        let q = FlowQuery::new(&e);
158        assert!(!q.taint_of("v_x").is_tainted);
159    }
160
161    #[test]
162    fn cleansed_name_not_flagged() {
163        let e = env("v_s := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user);", &["p_user"]);
164        let q = FlowQuery::new(&e);
165        let a = q.taint_of("v_s");
166        assert!(!a.is_tainted);
167        assert!(a.cleansed_by.contains(&TaintCleanser::DbmsAssert));
168    }
169
170    #[test]
171    fn sanitized_value_carries_no_live_taint_kind() {
172        // A value wholly sanitised by DBMS_ASSERT carries no live taint kind —
173        // the sanitiser consumed it — so it is neither flagged nor reported as a
174        // live UserInput source. (`cleansed_by` still records that DBMS_ASSERT
175        // fired; see `cleansed_name_not_flagged`.) This replaces the former
176        // "tainted-but-cleansed" representation that let a sibling cleanse mask a
177        // concatenated tainted operand.
178        let e = env("v_s := DBMS_ASSERT.SIMPLE_SQL_NAME(p_user);", &["p_user"]);
179        let q = FlowQuery::new(&e);
180        assert!(!q.has_taint_kind("v_s", TaintKind::UserInput));
181        assert!(!q.taint_of("v_s").is_tainted);
182    }
183
184    #[test]
185    fn string_shape_query_returns_literal() {
186        let e = env("v_msg := 'hello';", &[]);
187        let q = FlowQuery::new(&e);
188        match q.string_shape_of("v_msg") {
189            Some(StringShape::Literal { value }) => assert_eq!(value, "hello"),
190            other => panic!("{other:?}"),
191        }
192    }
193
194    #[test]
195    fn taint_reaches_sink_predicate() {
196        let e = env("v_sql := p_user;", &["p_user"]);
197        let q = FlowQuery::new(&e);
198        assert!(q.taint_reaches_sink("v_sql", true));
199        assert!(!q.taint_reaches_sink("v_sql", false));
200    }
201
202    #[test]
203    fn tainted_names_sorted_and_filtered() {
204        let e = env("z := p_a; a := p_a; clean := 1;", &["p_a"]);
205        let q = FlowQuery::new(&e);
206        let names = q.tainted_names();
207        assert!(names.contains(&"A".to_string()));
208        assert!(names.contains(&"Z".to_string()));
209        assert!(!names.contains(&"CLEAN".to_string()));
210        // Sorted.
211        let mut sorted = names.clone();
212        sorted.sort();
213        assert_eq!(names, sorted);
214    }
215
216    #[test]
217    fn inter_procedural_return_taint_folded_in() {
218        use crate::flow_inter::{InterFlowResult, PropagatedReturn};
219        let e = env("v_x := 0;", &[]);
220        let inter = InterFlowResult {
221            propagated_returns: vec![PropagatedReturn {
222                caller: "v_x".into(),
223                callee: "tainted_fn".into(),
224                result_taint: vec![TaintKind::DbLink],
225            }],
226            unknowns: vec![],
227        };
228        let q = FlowQuery::new(&e).with_inter(&inter);
229        let a = q.taint_of("v_x");
230        assert!(a.kinds.contains(&TaintKind::DbLink));
231        assert!(a.is_tainted);
232    }
233
234    #[test]
235    fn answer_serde_round_trip() {
236        let e = env("v_sql := p_user;", &["p_user"]);
237        let q = FlowQuery::new(&e);
238        let a = q.taint_of("v_sql");
239        let json = serde_json::to_string(&a).unwrap();
240        let back: TaintAnswer = serde_json::from_str(&json).unwrap();
241        assert_eq!(back, a);
242    }
243}