Skip to main content

plsql_ir/
column_edges.rs

1//! Column-level edge extraction.
2//!
3//! Consumes a populated [`SqlStatementModel`] (SQLSEM-002 tables
4//! plus SQLSEM-003 columns) and emits typed column-granular
5//! dependency edges. Five edge kinds, picked from the column's
6//! [`ColumnResolution`] and whether its table is known:
7//!
8//! * `ReadsColumn` — resolved read of a specific column.
9//! * `WritesColumn` — resolved write of a specific column.
10//! * `DerivesColumn` — the column feeds a projection expression
11//!   or star expansion (the value is derived, not a direct
12//!   column-to-column dependency).
13//! * `ReadsUnknownColumnOfTable` — read of an unresolved column
14//!   while the table is known (bare column, multi-table scope).
15//! * `WritesUnknownColumnOfTable` — same, on the write side.
16//!
17//! This is the typed counterpart to SQLSEM-004's string-encoded
18//! `ReadsColumn:<marker>` facts; lineage consumes these enums
19//! directly without re-parsing the marker.
20//!
21//! ## /oracle evidence
22//!
23//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
24//!   column reference + projection grammar.
25//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
26//!   `ALL_TAB_COLUMNS` resolves an `Unknown*` edge into a
27//!   concrete `ReadsColumn` / `WritesColumn` once the catalog
28//!   is available (the unknown-of-table variant preserves the
29//!   table so that upgrade is possible).
30
31use serde::{Deserialize, Serialize};
32
33use crate::sql_sem::{ColumnResolution, SqlStatementModel};
34
35/// One column-granular dependency edge.
36#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
37pub struct ColumnEdge {
38    pub kind: ColumnEdgeKind,
39    /// Resolved `schema.table` when known; the bare qualifier or
40    /// `?` when the column couldn't be attributed to one table.
41    pub table: String,
42    pub column: String,
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum ColumnEdgeKind {
48    ReadsColumn,
49    WritesColumn,
50    DerivesColumn,
51    ReadsUnknownColumnOfTable,
52    WritesUnknownColumnOfTable,
53}
54
55/// Extract typed column edges from one statement model.
56#[must_use]
57pub fn extract_column_edges(model: &SqlStatementModel) -> Vec<ColumnEdge> {
58    let mut out: Vec<ColumnEdge> = Vec::new();
59    let single = if model.tables.len() == 1 {
60        let t = &model.tables[0];
61        Some(qualify(&t.schema, &t.table))
62    } else {
63        None
64    };
65
66    for c in &model.reads {
67        let table = resolve_table(model, &c.qualifier, single.as_deref());
68        let kind = match c.resolution {
69            ColumnResolution::Resolved => ColumnEdgeKind::ReadsColumn,
70            ColumnResolution::StarExpansion => ColumnEdgeKind::DerivesColumn,
71            ColumnResolution::Unresolved | ColumnResolution::Pending => {
72                ColumnEdgeKind::ReadsUnknownColumnOfTable
73            }
74        };
75        push(&mut out, kind, table, &c.column);
76    }
77    for c in &model.writes {
78        let table = resolve_table(model, &c.qualifier, single.as_deref());
79        let kind = match c.resolution {
80            ColumnResolution::Resolved => ColumnEdgeKind::WritesColumn,
81            ColumnResolution::StarExpansion => ColumnEdgeKind::DerivesColumn,
82            ColumnResolution::Unresolved | ColumnResolution::Pending => {
83                ColumnEdgeKind::WritesUnknownColumnOfTable
84            }
85        };
86        push(&mut out, kind, table, &c.column);
87    }
88    out
89}
90
91/// Extract column edges for every statement in a model.
92#[must_use]
93pub fn extract_column_edges_for_model(model: &crate::sql_sem::SqlSemanticModel) -> Vec<ColumnEdge> {
94    let mut out = Vec::new();
95    for (_, s) in model.iter() {
96        out.extend(extract_column_edges(s));
97    }
98    out
99}
100
101fn resolve_table(model: &SqlStatementModel, qualifier: &str, single: Option<&str>) -> String {
102    if !qualifier.is_empty() {
103        if let Some((schema, table)) = model.alias_scope.resolve(qualifier) {
104            return qualify(schema, table);
105        }
106        return qualifier.to_string();
107    }
108    single
109        .map(str::to_string)
110        .unwrap_or_else(|| "?".to_string())
111}
112
113fn qualify(schema: &str, table: &str) -> String {
114    if schema.is_empty() {
115        table.to_ascii_lowercase()
116    } else {
117        format!(
118            "{}.{}",
119            schema.to_ascii_lowercase(),
120            table.to_ascii_lowercase()
121        )
122    }
123}
124
125fn push(out: &mut Vec<ColumnEdge>, kind: ColumnEdgeKind, table: String, column: &str) {
126    let edge = ColumnEdge {
127        kind,
128        table,
129        column: column.to_ascii_uppercase(),
130    };
131    if !out.contains(&edge) {
132        out.push(edge);
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use crate::sql_columns::extract_columns;
140    use crate::sql_resolve::resolve_sql;
141
142    fn edges(raw: &str) -> Vec<ColumnEdge> {
143        let mut m = resolve_sql(raw);
144        extract_columns(&mut m, raw);
145        extract_column_edges(&m)
146    }
147
148    #[test]
149    fn resolved_select_column_is_reads_column() {
150        let e = edges("SELECT salary INTO v FROM employees");
151        assert!(e.iter().any(|x| x.kind == ColumnEdgeKind::ReadsColumn
152            && x.column == "SALARY"
153            && x.table == "employees"));
154    }
155
156    #[test]
157    fn resolved_update_column_is_writes_column() {
158        let e = edges("UPDATE employees e SET e.salary = 1 WHERE e.id = 2");
159        assert!(
160            e.iter()
161                .any(|x| x.kind == ColumnEdgeKind::WritesColumn && x.column == "SALARY")
162        );
163    }
164
165    #[test]
166    fn star_projection_is_derives_column() {
167        let e = edges("SELECT * INTO r FROM employees");
168        assert!(e.iter().any(|x| x.kind == ColumnEdgeKind::DerivesColumn));
169    }
170
171    #[test]
172    fn ambiguous_read_is_unknown_column_of_table() {
173        let e = edges("SELECT amount INTO v FROM orders o, payments p WHERE o.id = p.oid");
174        assert!(
175            e.iter().any(
176                |x| x.kind == ColumnEdgeKind::ReadsUnknownColumnOfTable && x.column == "AMOUNT"
177            )
178        );
179    }
180
181    #[test]
182    fn ambiguous_write_is_unknown_write_column() {
183        // UPDATE with no alias + a second FROM-style table is rare;
184        // simulate via a bare SET column where scope is multi.
185        let raw = "UPDATE t1 SET val = (SELECT x FROM t2) WHERE id = 1";
186        let mut m = resolve_sql(raw);
187        extract_columns(&mut m, raw);
188        let e = extract_column_edges(&m);
189        assert!(e.iter().any(|x| matches!(
190            x.kind,
191            ColumnEdgeKind::WritesColumn | ColumnEdgeKind::WritesUnknownColumnOfTable
192        )));
193    }
194
195    #[test]
196    fn qualified_column_resolves_table_via_alias_scope() {
197        let e = edges("SELECT e.salary INTO v FROM hr.employees e");
198        let c = e
199            .iter()
200            .find(|x| x.column == "SALARY" && x.kind == ColumnEdgeKind::ReadsColumn)
201            .unwrap();
202        assert_eq!(c.table, "hr.employees");
203    }
204
205    #[test]
206    fn unbound_qualifier_kept_as_table_string() {
207        let e = edges("SELECT zzz.col INTO v FROM hr.employees e");
208        let c = e.iter().find(|x| x.column == "COL").unwrap();
209        assert_eq!(c.kind, ColumnEdgeKind::ReadsUnknownColumnOfTable);
210        assert_eq!(c.table, "zzz");
211    }
212
213    #[test]
214    fn duplicate_edges_dedupe() {
215        let e = edges("SELECT id, id INTO a, b FROM t");
216        let id_edges = e
217            .iter()
218            .filter(|x| x.column == "ID" && x.kind == ColumnEdgeKind::ReadsColumn)
219            .count();
220        assert_eq!(id_edges, 1);
221    }
222
223    #[test]
224    fn model_wide_extraction_covers_all_statements() {
225        let mut model = crate::sql_sem::SqlSemanticModel::default();
226        let r1 = "SELECT a INTO v FROM t1";
227        let r2 = "INSERT INTO t2 (b) VALUES (1)";
228        let mut m1 = resolve_sql(r1);
229        extract_columns(&mut m1, r1);
230        let mut m2 = resolve_sql(r2);
231        extract_columns(&mut m2, r2);
232        model.push(m1);
233        model.push(m2);
234        let e = extract_column_edges_for_model(&model);
235        assert!(e.iter().any(|x| x.column == "A"));
236        assert!(e.iter().any(|x| x.column == "B"));
237    }
238
239    #[test]
240    fn serde_round_trip_with_snake_case_kind() {
241        let e = edges("SELECT salary INTO v FROM employees");
242        let json = serde_json::to_string(&e[0]).unwrap();
243        let back: ColumnEdge = serde_json::from_str(&json).unwrap();
244        assert_eq!(back, e[0]);
245        assert!(json.contains("reads_column"));
246    }
247}