Skip to main content

plsql_ir/
sql_sem.rs

1//! Semantic model for embedded SQL statements.
2//!
3//! `plsql_ir::Statement::Sql` carries the raw SQL text. This
4//! module adds the typed structure downstream lineage needs:
5//! tables referenced, columns read / written, projection items,
6//! and alias scope. Together these form [`SqlStatementModel`] —
7//! one per embedded SQL statement — and [`SqlSemanticModel`] —
8//! the per-package aggregate the lineage layer consumes.
9//!
10//! Population happens in two passes:
11//!
12//! 1. A heuristic recogniser (out of scope for this module) walks
13//!    the raw SQL and emits the structural pieces.
14//! 2. The IR canonicaliser is responsible for
15//!    fully-qualifying every `TableUse.table` and
16//!    `ColumnUse.column` reference once the alias scope has
17//!    been resolved.
18//!
19//! This module ships only the types + the constructor helpers so
20//! the downstream consumers (lineage, doc, bindings) can program
21//! against a stable surface today.
22//!
23//! ## /oracle evidence
24//!
25//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
26//!   embedded-SQL grammar plus the column / table / alias
27//!   semantics come from the SQL Language Reference chapter
28//!   the PL/SQL Language Reference defers to.
29//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
30//!   `ALL_TAB_COLUMNS` is the server-side authority later
31//!   passes use to cross-check `ColumnUse.column` against
32//!   the table's declared columns.
33
34use serde::{Deserialize, Serialize};
35
36/// One embedded SQL statement seen from inside a PL/SQL routine
37/// body. Carries the SQL verb (already in `Statement::Sql`), the
38/// list of tables touched, the columns read / written, the
39/// projection (for SELECT) and the alias-to-table map.
40#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
41pub struct SqlStatementModel {
42    pub verb: SqlSemanticVerb,
43    pub tables: Vec<TableUse>,
44    pub reads: Vec<ColumnUse>,
45    pub writes: Vec<ColumnUse>,
46    pub projection: Vec<ProjectionItem>,
47    pub alias_scope: AliasScope,
48}
49
50/// Aggregate over every embedded SQL statement found in a
51/// routine body / package. The lineage layer consumes the
52/// aggregate; the doc + bindings layers consume the per-statement
53/// model.
54#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
55pub struct SqlSemanticModel {
56    pub statements: Vec<SqlStatementModel>,
57}
58
59impl SqlSemanticModel {
60    /// Append a statement model. Returns the position so callers
61    /// can correlate the model with their source-statement
62    /// pointers.
63    pub fn push(&mut self, m: SqlStatementModel) -> usize {
64        let pos = self.statements.len();
65        self.statements.push(m);
66        pos
67    }
68
69    /// Iterator over every (statement_index, statement) pair.
70    pub fn iter(&self) -> impl Iterator<Item = (usize, &SqlStatementModel)> {
71        self.statements.iter().enumerate()
72    }
73
74    /// Sum of unique `(schema, table)` references across every
75    /// statement in the model.
76    #[must_use]
77    pub fn distinct_tables(&self) -> Vec<(String, String)> {
78        let mut out = std::collections::BTreeSet::new();
79        for s in &self.statements {
80            for t in &s.tables {
81                out.insert((t.schema.clone(), t.table.clone()));
82            }
83        }
84        out.into_iter().collect()
85    }
86}
87
88/// SQL verb classification — distinct from `plsql_ir::SqlVerb`
89/// because the semantic model needs to express MERGE's
90/// dual-update + insert nature.
91#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum SqlSemanticVerb {
94    #[default]
95    Select,
96    Insert,
97    Update,
98    Delete,
99    MergeUpdate,
100    MergeInsert,
101    MergeDelete,
102}
103
104/// One referenced table / view / synonym. `alias` is set when
105/// the FROM clause supplied one; otherwise it's the empty string.
106#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
107pub struct TableUse {
108    pub schema: String,
109    pub table: String,
110    pub alias: String,
111    pub usage: TableUsageKind,
112}
113
114#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
115#[serde(rename_all = "snake_case")]
116pub enum TableUsageKind {
117    /// Read-side reference (FROM clause, subquery, USING clause).
118    Read,
119    /// Write-side reference (INSERT INTO / UPDATE / DELETE FROM /
120    /// MERGE INTO).
121    Write,
122    /// Both: a MERGE INTO target that the same statement also
123    /// reads from in the USING clause.
124    ReadWrite,
125}
126
127/// One referenced column. `qualifier` is the alias / table that
128/// scopes the reference; empty when the source SQL referenced
129/// the column bare (an alias-scope resolver will rewrite later).
130#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
131pub struct ColumnUse {
132    pub qualifier: String,
133    pub column: String,
134    /// Column resolution state — drives lineage's
135    /// `ColumnAccessResult::resolution_error`.
136    pub resolution: ColumnResolution,
137}
138
139#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
140#[serde(rename_all = "snake_case")]
141pub enum ColumnResolution {
142    /// Alias-scope resolver mapped the column to a known table.
143    Resolved,
144    /// Star expansion (`*` / `t.*`) — resolved structurally but
145    /// the column list is the table's full projection.
146    StarExpansion,
147    /// Resolver could not find the column on any in-scope table.
148    Unresolved,
149    /// Resolver hasn't run yet; default state right after the
150    /// recogniser populates the model.
151    #[default]
152    Pending,
153}
154
155/// One item in a SELECT's projection list. `alias` is the SQL
156/// alias (after `AS`) if present; `expression_text` carries the
157/// raw expression so downstream readers can re-parse it via
158/// `plsql_ir::lower_expression`.
159#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
160pub struct ProjectionItem {
161    pub alias: String,
162    pub expression_text: String,
163    /// True when this item is a literal star (`*`) or a
164    /// qualified star (`t.*`).
165    pub is_star: bool,
166}
167
168/// Map of alias → fully-qualified table. The lineage resolver
169/// consults this to rewrite bare `col` into `<schema>.<table>.<col>`.
170#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
171pub struct AliasScope {
172    pub bindings: Vec<AliasBinding>,
173}
174
175#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
176pub struct AliasBinding {
177    pub alias: String,
178    pub schema: String,
179    pub table: String,
180}
181
182impl AliasScope {
183    /// Add a binding. Later bindings shadow earlier ones with the
184    /// same alias (Oracle behaviour on duplicate alias).
185    pub fn bind(&mut self, alias: &str, schema: &str, table: &str) {
186        self.bindings.retain(|b| b.alias != alias);
187        self.bindings.push(AliasBinding {
188            alias: alias.into(),
189            schema: schema.into(),
190            table: table.into(),
191        });
192    }
193
194    /// Look up the fully-qualified target for `alias`, returning
195    /// `(schema, table)` if bound. Lookup is case-insensitive on
196    /// the alias key.
197    #[must_use]
198    pub fn resolve(&self, alias: &str) -> Option<(&str, &str)> {
199        let needle = alias.to_ascii_uppercase();
200        self.bindings
201            .iter()
202            .rev()
203            .find(|b| b.alias.eq_ignore_ascii_case(&needle))
204            .map(|b| (b.schema.as_str(), b.table.as_str()))
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    fn table(schema: &str, name: &str, alias: &str, usage: TableUsageKind) -> TableUse {
213        TableUse {
214            schema: schema.into(),
215            table: name.into(),
216            alias: alias.into(),
217            usage,
218        }
219    }
220
221    fn col(qual: &str, name: &str) -> ColumnUse {
222        ColumnUse {
223            qualifier: qual.into(),
224            column: name.into(),
225            resolution: ColumnResolution::Pending,
226        }
227    }
228
229    #[test]
230    fn default_model_is_empty_select() {
231        let m = SqlStatementModel::default();
232        assert_eq!(m.verb, SqlSemanticVerb::Select);
233        assert!(m.tables.is_empty());
234        assert!(m.projection.is_empty());
235        assert!(m.alias_scope.bindings.is_empty());
236    }
237
238    #[test]
239    fn push_returns_position_and_appends() {
240        let mut m = SqlSemanticModel::default();
241        let p0 = m.push(SqlStatementModel::default());
242        let p1 = m.push(SqlStatementModel::default());
243        assert_eq!(p0, 0);
244        assert_eq!(p1, 1);
245        assert_eq!(m.statements.len(), 2);
246    }
247
248    #[test]
249    fn distinct_tables_dedupes_across_statements() {
250        let mut model = SqlSemanticModel::default();
251        let mut s = SqlStatementModel::default();
252        s.tables
253            .push(table("HR", "EMPLOYEES", "e", TableUsageKind::Read));
254        model.push(s.clone());
255        model.push(s); // duplicate
256        assert_eq!(model.distinct_tables().len(), 1);
257    }
258
259    #[test]
260    fn distinct_tables_keeps_distinct_schema_table_pairs() {
261        let mut model = SqlSemanticModel::default();
262        let mut s1 = SqlStatementModel::default();
263        s1.tables
264            .push(table("HR", "EMPLOYEES", "", TableUsageKind::Read));
265        let mut s2 = SqlStatementModel::default();
266        s2.tables
267            .push(table("HR", "DEPARTMENTS", "", TableUsageKind::Read));
268        model.push(s1);
269        model.push(s2);
270        let distinct = model.distinct_tables();
271        assert_eq!(distinct.len(), 2);
272    }
273
274    #[test]
275    fn alias_scope_bind_and_resolve() {
276        let mut scope = AliasScope::default();
277        scope.bind("e", "HR", "EMPLOYEES");
278        scope.bind("d", "HR", "DEPARTMENTS");
279        assert_eq!(scope.resolve("e"), Some(("HR", "EMPLOYEES")));
280        // Case-insensitive lookup.
281        assert_eq!(scope.resolve("E"), Some(("HR", "EMPLOYEES")));
282        assert_eq!(scope.resolve("d"), Some(("HR", "DEPARTMENTS")));
283        assert_eq!(scope.resolve("x"), None);
284    }
285
286    #[test]
287    fn alias_scope_shadows_duplicate_alias() {
288        let mut scope = AliasScope::default();
289        scope.bind("t", "HR", "EMPLOYEES");
290        scope.bind("t", "HR", "DEPARTMENTS");
291        // Latest binding wins.
292        assert_eq!(scope.resolve("t"), Some(("HR", "DEPARTMENTS")));
293        // And only one binding remains.
294        assert_eq!(scope.bindings.len(), 1);
295    }
296
297    #[test]
298    fn column_resolution_default_is_pending() {
299        let c = col("e", "salary");
300        assert_eq!(c.resolution, ColumnResolution::Pending);
301    }
302
303    #[test]
304    fn projection_item_carries_alias_and_star_flag() {
305        let p = ProjectionItem {
306            alias: "name_lower".into(),
307            expression_text: "LOWER(e.name)".into(),
308            is_star: false,
309        };
310        assert!(!p.is_star);
311        let star = ProjectionItem {
312            alias: String::new(),
313            expression_text: "*".into(),
314            is_star: true,
315        };
316        assert!(star.is_star);
317    }
318
319    #[test]
320    fn merge_verbs_are_distinct_from_select() {
321        assert_ne!(SqlSemanticVerb::MergeUpdate, SqlSemanticVerb::Select);
322        assert_ne!(SqlSemanticVerb::MergeInsert, SqlSemanticVerb::MergeUpdate);
323    }
324
325    #[test]
326    fn round_trip_through_serde() {
327        let mut model = SqlSemanticModel::default();
328        let mut s = SqlStatementModel {
329            verb: SqlSemanticVerb::Update,
330            ..SqlStatementModel::default()
331        };
332        s.tables
333            .push(table("HR", "EMPLOYEES", "e", TableUsageKind::Write));
334        s.writes.push(col("e", "salary"));
335        s.alias_scope.bind("e", "HR", "EMPLOYEES");
336        model.push(s);
337        let json = serde_json::to_string(&model).unwrap();
338        let back: SqlSemanticModel = serde_json::from_str(&json).unwrap();
339        assert_eq!(back, model);
340        // Snake-case wire tags.
341        assert!(json.contains("\"verb\":\"update\""));
342    }
343
344    #[test]
345    fn iter_yields_each_statement_with_index() {
346        let mut model = SqlSemanticModel::default();
347        model.push(SqlStatementModel::default());
348        model.push(SqlStatementModel::default());
349        model.push(SqlStatementModel::default());
350        let collected: Vec<usize> = model.iter().map(|(i, _)| i).collect();
351        assert_eq!(collected, vec![0, 1, 2]);
352    }
353}