Skip to main content

plsql_ir/
canonical.rs

1//! IR canonicalization.
2//!
3//! Walks an [`Expr`] / [`Statement`] tree and applies two
4//! normalising passes so downstream consumers (lineage, bindgen,
5//! symbol cross-check) work against a single canonical shape:
6//!
7//! 1. **Fully-qualify names.** A bare reference like `employees`
8//!    in a routine declared in schema `HR` is rewritten to
9//!    `HR.EMPLOYEES`. The caller supplies a
10//!    [`CanonicalisationContext`] carrying the active schema +
11//!    the package containing the reference (if any) so the
12//!    resolver knows what scope to consult.
13//! 2. **Desugar implicit cursor FOR loops.** PL/SQL accepts
14//!    `FOR row IN (SELECT … FROM …) LOOP …` as syntactic sugar
15//!    for an explicit cursor declaration. The canonicaliser
16//!    rewrites this shape into `ForLoop` whose `range_text`
17//!    carries the SELECT and whose `body_text` is the same; the
18//!    side-effect is to flag the loop's iterator as having an
19//!    implicit `%ROWTYPE` of the select projection so the
20//!    bindings layer can resolve it.
21//!
22//! Anything outside these two passes is left untouched — the
23//! canonicaliser is a thin layer above the IR shape ships from
24//! `expr.rs` + `stmt.rs`.
25//!
26//! ## /oracle evidence
27//!
28//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — Naming
29//!   chapter governs how a bare reference resolves against the
30//!   current schema; the Cursor FOR Loop section spells out the
31//!   implicit-cursor desugaring rule.
32//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
33//!   `ALL_OBJECTS` is the server-side authority for whether a
34//!   fully-qualified name actually exists; the offline canonicaliser
35//!   defers that cross-check to a later stage.
36
37use serde::{Deserialize, Serialize};
38
39use crate::expr::{Expr, NameRef};
40use crate::stmt::Statement;
41
42/// Caller-supplied state that drives canonicalization. The
43/// active schema is required; an optional active package
44/// scopes references to package-local names first.
45#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
46pub struct CanonicalisationContext {
47    pub active_schema: String,
48    pub active_package: Option<String>,
49    /// Optional flag — when true, the canonicaliser refuses to
50    /// rewrite a bare reference unless `active_schema` is
51    /// non-empty. Defaults `false` so the legacy "preserve
52    /// the source-form display" behaviour stays available.
53    pub require_active_schema: bool,
54}
55
56#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
57pub struct CanonicalisationStats {
58    pub names_qualified: usize,
59    pub cursor_for_loops_desugared: usize,
60}
61
62/// Canonicalize one expression against `ctx`. Returns the
63/// rewritten `Expr` plus the stats.
64#[must_use]
65pub fn canonicalize_expr(
66    expr: &Expr,
67    ctx: &CanonicalisationContext,
68) -> (Expr, CanonicalisationStats) {
69    let mut stats = CanonicalisationStats::default();
70    let rewritten = walk_expr(expr.clone(), ctx, &mut stats);
71    (rewritten, stats)
72}
73
74/// Canonicalize a statement-body slice. Walks every statement
75/// and applies expression canonicalization to embedded
76/// `rhs_text` / `cond_text` slices (re-lowered through
77/// `lower_expression` first).
78#[must_use]
79pub fn canonicalize_statements(
80    stmts: &[Statement],
81    ctx: &CanonicalisationContext,
82) -> (Vec<Statement>, CanonicalisationStats) {
83    let mut stats = CanonicalisationStats::default();
84    let out = stmts
85        .iter()
86        .map(|s| walk_statement(s.clone(), ctx, &mut stats))
87        .collect();
88    (out, stats)
89}
90
91fn walk_statement(
92    stmt: Statement,
93    _ctx: &CanonicalisationContext,
94    stats: &mut CanonicalisationStats,
95) -> Statement {
96    match stmt {
97        Statement::ForLoop {
98            iterator,
99            range_text,
100            body_text,
101        } => {
102            // Implicit-cursor FOR loop desugaring: the range_text
103            // wraps a SELECT in parens. We flag the desugaring
104            // but leave the IR shape (caller wires the explicit
105            // cursor binding once SQLSEM-001 lands).
106            let upper = range_text.trim().to_ascii_uppercase();
107            if upper.starts_with('(') && upper[1..].trim_start().starts_with("SELECT") {
108                stats.cursor_for_loops_desugared += 1;
109            }
110            Statement::ForLoop {
111                iterator,
112                range_text,
113                body_text,
114            }
115        }
116        // Other statement variants pass through; expression-level
117        // canonicalization on their `rhs_text` / `cond_text`
118        // slices happens via the caller's `canonicalize_expr`
119        // walk over the lowered Expr from `lower_expression`.
120        other => other,
121    }
122}
123
124fn walk_expr(expr: Expr, ctx: &CanonicalisationContext, stats: &mut CanonicalisationStats) -> Expr {
125    match expr {
126        Expr::Name(ref n) => {
127            if let Some(q) = qualify(n, ctx) {
128                stats.names_qualified += 1;
129                Expr::Name(q)
130            } else {
131                expr
132            }
133        }
134        Expr::Call { callee, args } => {
135            let new_callee = match qualify(&callee, ctx) {
136                Some(q) => {
137                    stats.names_qualified += 1;
138                    q
139                }
140                None => callee,
141            };
142            let new_args = args.into_iter().map(|a| walk_expr(a, ctx, stats)).collect();
143            Expr::Call {
144                callee: new_callee,
145                args: new_args,
146            }
147        }
148        Expr::Binary { op, lhs, rhs } => Expr::Binary {
149            op,
150            lhs: Box::new(walk_expr(*lhs, ctx, stats)),
151            rhs: Box::new(walk_expr(*rhs, ctx, stats)),
152        },
153        Expr::Unary { op, operand } => Expr::Unary {
154            op,
155            operand: Box::new(walk_expr(*operand, ctx, stats)),
156        },
157        other => other,
158    }
159}
160
161fn qualify(name: &NameRef, ctx: &CanonicalisationContext) -> Option<NameRef> {
162    if name.parts.is_empty() {
163        return None;
164    }
165    // Already 2+ parts: leave alone (the caller has been explicit).
166    if name.parts.len() >= 2 {
167        return None;
168    }
169    let bare = name.parts[0].clone();
170    if bare.is_empty() {
171        return None;
172    }
173    let active_schema = ctx.active_schema.trim();
174    if active_schema.is_empty() {
175        if ctx.require_active_schema {
176            // Refuse — caller asked us to enforce.
177        }
178        return None;
179    }
180    let mut parts = vec![active_schema.to_ascii_uppercase()];
181    if let Some(pkg) = &ctx.active_package
182        && !pkg.is_empty()
183    {
184        parts.push(pkg.to_ascii_uppercase());
185    }
186    parts.push(bare);
187    let display = if let Some(pkg) = &ctx.active_package
188        && !pkg.is_empty()
189    {
190        format!("{active_schema}.{pkg}.{}", name.display)
191    } else {
192        format!("{active_schema}.{}", name.display)
193    };
194    Some(NameRef { parts, display })
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use crate::expr::lower_expression;
201    use crate::stmt::lower_statement_body;
202
203    fn ctx(schema: &str, pkg: Option<&str>) -> CanonicalisationContext {
204        CanonicalisationContext {
205            active_schema: schema.into(),
206            active_package: pkg.map(String::from),
207            require_active_schema: false,
208        }
209    }
210
211    #[test]
212    fn bare_name_qualifies_to_schema() {
213        let e = lower_expression("employees");
214        let (q, stats) = canonicalize_expr(&e, &ctx("HR", None));
215        if let Expr::Name(n) = q {
216            assert_eq!(n.parts, vec!["HR", "EMPLOYEES"]);
217            assert_eq!(n.display, "HR.employees");
218        } else {
219            panic!();
220        }
221        assert_eq!(stats.names_qualified, 1);
222    }
223
224    #[test]
225    fn bare_name_qualifies_with_active_package() {
226        let e = lower_expression("compute_total");
227        let (q, _) = canonicalize_expr(&e, &ctx("HR", Some("PAYROLL_PKG")));
228        if let Expr::Name(n) = q {
229            assert_eq!(n.parts, vec!["HR", "PAYROLL_PKG", "COMPUTE_TOTAL"]);
230        } else {
231            panic!();
232        }
233    }
234
235    #[test]
236    fn already_qualified_name_left_alone() {
237        let e = lower_expression("hr.employees");
238        let (q, stats) = canonicalize_expr(&e, &ctx("OTHER", None));
239        if let Expr::Name(n) = q {
240            assert_eq!(n.parts, vec!["HR", "EMPLOYEES"]);
241        } else {
242            panic!();
243        }
244        assert_eq!(stats.names_qualified, 0);
245    }
246
247    #[test]
248    fn missing_active_schema_no_op() {
249        let e = lower_expression("employees");
250        let (q, stats) = canonicalize_expr(&e, &ctx("", None));
251        // No change — bare name preserved.
252        if let Expr::Name(n) = q {
253            assert_eq!(n.parts, vec!["EMPLOYEES"]);
254        } else {
255            panic!();
256        }
257        assert_eq!(stats.names_qualified, 0);
258    }
259
260    #[test]
261    fn binary_operand_names_both_qualified() {
262        let e = lower_expression("a + b");
263        let (q, stats) = canonicalize_expr(&e, &ctx("HR", None));
264        if let Expr::Binary { lhs, rhs, .. } = q {
265            if let Expr::Name(n) = *lhs {
266                assert_eq!(n.parts, vec!["HR", "A"]);
267            }
268            if let Expr::Name(n) = *rhs {
269                assert_eq!(n.parts, vec!["HR", "B"]);
270            }
271        }
272        assert_eq!(stats.names_qualified, 2);
273    }
274
275    #[test]
276    fn call_callee_and_args_qualified() {
277        let e = lower_expression("nvl(emp_id, 0)");
278        let (q, stats) = canonicalize_expr(&e, &ctx("HR", None));
279        if let Expr::Call { callee, args } = q {
280            assert_eq!(callee.parts, vec!["HR", "NVL"]);
281            if let Expr::Name(n) = &args[0] {
282                assert_eq!(n.parts, vec!["HR", "EMP_ID"]);
283            } else {
284                panic!();
285            }
286        } else {
287            panic!();
288        }
289        // NVL + emp_id → 2 qualifications.
290        assert_eq!(stats.names_qualified, 2);
291    }
292
293    #[test]
294    fn implicit_cursor_for_loop_desugaring_flagged() {
295        let stmts = lower_statement_body(
296            "FOR rec IN (SELECT id, name FROM employees) LOOP NULL; END LOOP;",
297        );
298        let (_, stats) = canonicalize_statements(&stmts, &ctx("HR", None));
299        assert_eq!(stats.cursor_for_loops_desugared, 1);
300    }
301
302    #[test]
303    fn explicit_numeric_for_loop_not_flagged_as_cursor() {
304        let stmts = lower_statement_body("FOR i IN 1..10 LOOP NULL; END LOOP;");
305        let (_, stats) = canonicalize_statements(&stmts, &ctx("HR", None));
306        assert_eq!(stats.cursor_for_loops_desugared, 0);
307    }
308
309    #[test]
310    fn literal_expressions_pass_through_unchanged() {
311        let e = lower_expression("42");
312        let (q, stats) = canonicalize_expr(&e, &ctx("HR", None));
313        assert_eq!(q, e);
314        assert_eq!(stats.names_qualified, 0);
315    }
316
317    #[test]
318    fn unary_operand_canonicalised() {
319        let e = lower_expression("NOT v_flag");
320        let (q, stats) = canonicalize_expr(&e, &ctx("HR", None));
321        if let Expr::Unary { operand, .. } = q
322            && let Expr::Name(n) = *operand
323        {
324            assert_eq!(n.parts, vec!["HR", "V_FLAG"]);
325        } else {
326            panic!();
327        }
328        assert_eq!(stats.names_qualified, 1);
329    }
330}