Skip to main content

plsql_ir/
flow_inter.rs

1//! Single-hop inter-procedural parameter/return flow.
2//!
3//! FLOW-002 propagates taint within one routine. This pass joins
4//! routines: when routine A calls routine B, the taint of A's
5//! actual arguments flows into B's formal parameters, and B's
6//! return taint flows back to A's call-site assignment.
7//!
8//! Each call edge is resolved **once** against the callee's
9//! [`RoutineFlowSummary`] — a single hop. The pass does NOT follow
10//! transitive chains (A→B→C) or iterate a recursive frontier;
11//! multi-hop following is a future pass and is intentionally out of
12//! scope here. A direct self-call (A→A) and any call whose callee
13//! summary is missing (external package, db-link, dynamic dispatch)
14//! are recorded as conservative [`FlowUnknownFact`]s so R13
15//! reporting never silently drops the boundary.
16//!
17//! Routine summaries are supplied by the caller as
18//! [`RoutineFlowSummary`] records (param taint sensitivity +
19//! return taint) so this module stays free of a hard
20//! `plsql-symbols` dependency.
21//!
22//! ## /oracle evidence
23//!
24//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference —
25//!   parameter modes (IN copies in, OUT copies back, IN OUT
26//!   both) define the flow direction across a call boundary.
27//! * `LOW-LEVEL-CATALOGS.md` — `ALL_ARGUMENTS` is the
28//!   server-side authority for a routine's formal-parameter
29//!   list when the source summary is unavailable.
30
31use std::collections::BTreeMap;
32
33use serde::{Deserialize, Serialize};
34
35use crate::flow::TaintKind;
36
37/// Per-routine flow summary the caller supplies. `param_taints`
38/// maps a 0-based parameter index to the taint kinds that param
39/// propagates into the body; `returns_taint` is the taint a
40/// caller should attribute to the call's result.
41#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
42pub struct RoutineFlowSummary {
43    pub logical_id: String,
44    pub param_taints: BTreeMap<usize, Vec<TaintKind>>,
45    pub returns_taint: Vec<TaintKind>,
46}
47
48/// A call site to resolve: `caller` invokes `callee` with the
49/// taint kinds of each positional actual argument.
50#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
51pub struct CallEdgeFlow {
52    pub caller: String,
53    pub callee: String,
54    /// Taint kinds of each positional actual argument.
55    pub actual_arg_taints: Vec<Vec<TaintKind>>,
56}
57
58/// Conservative boundary record (R13). Emitted whenever the pass
59/// cannot resolve a call: missing callee summary, or a direct
60/// recursion (self-call).
61#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
62pub struct FlowUnknownFact {
63    pub at_caller: String,
64    pub callee: String,
65    pub reason: FlowUnknownReason,
66}
67
68#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum FlowUnknownReason {
71    /// No `RoutineFlowSummary` for the callee (external package,
72    /// db-link, runtime dispatch).
73    MissingCalleeSummary,
74    /// Callee is the caller itself (direct recursion); the
75    /// single-hop pass does not unfold the cycle.
76    RecursionCycle,
77}
78
79/// Result of an inter-procedural propagation run.
80#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
81pub struct InterFlowResult {
82    /// Taint attributed to each caller's call-site result, keyed
83    /// by `(caller, callee)`.
84    pub propagated_returns: Vec<PropagatedReturn>,
85    pub unknowns: Vec<FlowUnknownFact>,
86}
87
88#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
89pub struct PropagatedReturn {
90    pub caller: String,
91    pub callee: String,
92    pub result_taint: Vec<TaintKind>,
93}
94
95/// Propagate taint across `call_edges` using the supplied
96/// `summaries`. Each edge is resolved one hop against its callee
97/// summary; a direct self-call and a missing summary surface as
98/// `FlowUnknownFact` (R13).
99#[must_use]
100pub fn propagate_inter(
101    call_edges: &[CallEdgeFlow],
102    summaries: &[RoutineFlowSummary],
103) -> InterFlowResult {
104    let by_id: BTreeMap<&str, &RoutineFlowSummary> = summaries
105        .iter()
106        .map(|s| (s.logical_id.as_str(), s))
107        .collect();
108    let mut result = InterFlowResult::default();
109
110    for edge in call_edges {
111        resolve_edge(edge, &by_id, &mut result);
112    }
113    result
114}
115
116fn resolve_edge(
117    edge: &CallEdgeFlow,
118    by_id: &BTreeMap<&str, &RoutineFlowSummary>,
119    result: &mut InterFlowResult,
120) {
121    // Single-hop: a callee that is the caller itself is a direct
122    // recursion the pass does not unfold. (Transitive cycles A→B→A
123    // are out of scope until multi-hop following lands.)
124    if edge.callee == edge.caller {
125        result.unknowns.push(FlowUnknownFact {
126            at_caller: edge.caller.clone(),
127            callee: edge.callee.clone(),
128            reason: FlowUnknownReason::RecursionCycle,
129        });
130        return;
131    }
132    let Some(summary) = by_id.get(edge.callee.as_str()) else {
133        result.unknowns.push(FlowUnknownFact {
134            at_caller: edge.caller.clone(),
135            callee: edge.callee.clone(),
136            reason: FlowUnknownReason::MissingCalleeSummary,
137        });
138        return;
139    };
140
141    // The callee's return taint = its declared return taint, plus
142    // any taint that an actual argument introduces into a
143    // taint-sensitive parameter.
144    let mut result_taint: Vec<TaintKind> = summary.returns_taint.clone();
145    for (idx, actual) in edge.actual_arg_taints.iter().enumerate() {
146        if let Some(param_kinds) = summary.param_taints.get(&idx)
147            && !param_kinds.is_empty()
148        {
149            // Param is taint-propagating: the actual's taint flows
150            // through to the result.
151            for k in actual {
152                if !result_taint.contains(k) {
153                    result_taint.push(*k);
154                }
155            }
156        }
157    }
158    result.propagated_returns.push(PropagatedReturn {
159        caller: edge.caller.clone(),
160        callee: edge.callee.clone(),
161        result_taint,
162    });
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    fn summ(id: &str, params: &[(usize, &[TaintKind])], ret: &[TaintKind]) -> RoutineFlowSummary {
170        let mut pt = BTreeMap::new();
171        for (i, ks) in params {
172            pt.insert(*i, ks.to_vec());
173        }
174        RoutineFlowSummary {
175            logical_id: id.into(),
176            param_taints: pt,
177            returns_taint: ret.to_vec(),
178        }
179    }
180
181    #[test]
182    fn taint_flows_through_propagating_param_to_result() {
183        let edges = vec![CallEdgeFlow {
184            caller: "a".into(),
185            callee: "b".into(),
186            actual_arg_taints: vec![vec![TaintKind::UserInput]],
187        }];
188        let summaries = vec![summ("b", &[(0, &[TaintKind::UserInput])], &[])];
189        let r = propagate_inter(&edges, &summaries);
190        assert_eq!(r.propagated_returns.len(), 1);
191        assert!(
192            r.propagated_returns[0]
193                .result_taint
194                .contains(&TaintKind::UserInput)
195        );
196        assert!(r.unknowns.is_empty());
197    }
198
199    #[test]
200    fn non_propagating_param_does_not_taint_result() {
201        let edges = vec![CallEdgeFlow {
202            caller: "a".into(),
203            callee: "b".into(),
204            actual_arg_taints: vec![vec![TaintKind::UserInput]],
205        }];
206        // b has no param_taints entry for index 0 → param is inert.
207        let summaries = vec![summ("b", &[], &[])];
208        let r = propagate_inter(&edges, &summaries);
209        assert!(r.propagated_returns[0].result_taint.is_empty());
210    }
211
212    #[test]
213    fn declared_return_taint_always_present() {
214        let edges = vec![CallEdgeFlow {
215            caller: "a".into(),
216            callee: "b".into(),
217            actual_arg_taints: vec![],
218        }];
219        let summaries = vec![summ("b", &[], &[TaintKind::DbLink])];
220        let r = propagate_inter(&edges, &summaries);
221        assert!(
222            r.propagated_returns[0]
223                .result_taint
224                .contains(&TaintKind::DbLink)
225        );
226    }
227
228    #[test]
229    fn missing_summary_records_unknown() {
230        let edges = vec![CallEdgeFlow {
231            caller: "a".into(),
232            callee: "external_pkg.proc".into(),
233            actual_arg_taints: vec![],
234        }];
235        let r = propagate_inter(&edges, &[]);
236        assert_eq!(r.unknowns.len(), 1);
237        assert_eq!(
238            r.unknowns[0].reason,
239            FlowUnknownReason::MissingCalleeSummary
240        );
241    }
242
243    #[test]
244    fn direct_recursion_records_cycle_unknown() {
245        let edges = vec![CallEdgeFlow {
246            caller: "rec".into(),
247            callee: "rec".into(),
248            actual_arg_taints: vec![],
249        }];
250        let summaries = vec![summ("rec", &[], &[])];
251        let r = propagate_inter(&edges, &summaries);
252        assert_eq!(r.unknowns[0].reason, FlowUnknownReason::RecursionCycle);
253    }
254
255    #[test]
256    fn multiple_taint_kinds_union_into_result() {
257        let edges = vec![CallEdgeFlow {
258            caller: "a".into(),
259            callee: "b".into(),
260            actual_arg_taints: vec![vec![TaintKind::UserInput, TaintKind::BindVariable]],
261        }];
262        let summaries = vec![summ("b", &[(0, &[TaintKind::UserInput])], &[])];
263        let r = propagate_inter(&edges, &summaries);
264        let t = &r.propagated_returns[0].result_taint;
265        assert!(t.contains(&TaintKind::UserInput));
266        assert!(t.contains(&TaintKind::BindVariable));
267    }
268
269    #[test]
270    fn result_taint_dedupes() {
271        let edges = vec![CallEdgeFlow {
272            caller: "a".into(),
273            callee: "b".into(),
274            actual_arg_taints: vec![vec![TaintKind::UserInput]],
275        }];
276        // returns_taint already has UserInput; actual adds it again.
277        let summaries = vec![summ(
278            "b",
279            &[(0, &[TaintKind::UserInput])],
280            &[TaintKind::UserInput],
281        )];
282        let r = propagate_inter(&edges, &summaries);
283        let count = r.propagated_returns[0]
284            .result_taint
285            .iter()
286            .filter(|k| **k == TaintKind::UserInput)
287            .count();
288        assert_eq!(count, 1);
289    }
290
291    #[test]
292    fn serde_round_trip() {
293        let edges = vec![CallEdgeFlow {
294            caller: "a".into(),
295            callee: "missing".into(),
296            actual_arg_taints: vec![],
297        }];
298        let r = propagate_inter(&edges, &[]);
299        let json = serde_json::to_string(&r).unwrap();
300        let back: InterFlowResult = serde_json::from_str(&json).unwrap();
301        assert_eq!(back, r);
302        assert!(json.contains("missing_callee_summary"));
303    }
304
305    #[test]
306    fn chain_is_resolved_single_hop_not_transitively() {
307        // The pass resolves each edge ONCE against its callee summary; it does
308        // not follow A→B→C transitively. With edges a→b and b→c where c returns
309        // DbLink, the result must contain two independent single-hop records and
310        // c's DbLink must NOT appear on a's record (no transitive composition).
311        // This pins the documented single-hop scope (replacing the former
312        // `depth_cap_fires_when_chain_exceeds_max` 6==6 tautology, which faked
313        // coverage of a depth cap that no input could ever trigger).
314        let edges = vec![
315            CallEdgeFlow {
316                caller: "a".into(),
317                callee: "b".into(),
318                actual_arg_taints: vec![vec![TaintKind::UserInput]],
319            },
320            CallEdgeFlow {
321                caller: "b".into(),
322                callee: "c".into(),
323                actual_arg_taints: vec![vec![TaintKind::UserInput]],
324            },
325        ];
326        let summaries = vec![
327            summ("b", &[(0, &[TaintKind::UserInput])], &[]),
328            summ("c", &[], &[TaintKind::DbLink]),
329        ];
330        let r = propagate_inter(&edges, &summaries);
331        assert!(r.unknowns.is_empty());
332        assert_eq!(r.propagated_returns.len(), 2);
333
334        let a_rec = r
335            .propagated_returns
336            .iter()
337            .find(|p| p.caller == "a")
338            .expect("a→b record present");
339        assert!(
340            a_rec.result_taint.contains(&TaintKind::UserInput),
341            "a→b folds the actual's UserInput through b's propagating param"
342        );
343        assert!(
344            !a_rec.result_taint.contains(&TaintKind::DbLink),
345            "single-hop: c's DbLink must NOT transitively reach a"
346        );
347
348        let b_rec = r
349            .propagated_returns
350            .iter()
351            .find(|p| p.caller == "b")
352            .expect("b→c record present");
353        assert!(
354            b_rec.result_taint.contains(&TaintKind::DbLink),
355            "b→c carries c's declared return taint to b"
356        );
357    }
358
359    #[test]
360    fn distinct_caller_callee_with_same_name_in_two_edges_is_not_a_cycle() {
361        // Guard against over-eager cycle detection: a→b and b→a are two distinct
362        // single-hop edges (mutual recursion at the chain level), but NEITHER is
363        // a direct self-call, so both resolve and neither is flagged a cycle.
364        let edges = vec![
365            CallEdgeFlow {
366                caller: "a".into(),
367                callee: "b".into(),
368                actual_arg_taints: vec![],
369            },
370            CallEdgeFlow {
371                caller: "b".into(),
372                callee: "a".into(),
373                actual_arg_taints: vec![],
374            },
375        ];
376        let summaries = vec![summ("a", &[], &[]), summ("b", &[], &[])];
377        let r = propagate_inter(&edges, &summaries);
378        assert!(
379            r.unknowns.is_empty(),
380            "mutual edges are single-hop resolvable, not direct self-recursion"
381        );
382        assert_eq!(r.propagated_returns.len(), 2);
383    }
384}