Skip to main content

plsql_ir/
sql_fact_emit.rs

1//! Emit SQL table/column-use facts with precision markers.
2//!
3//! Walks a populated [`SqlStatementModel`] (tables + columns
4//! filled by SQLSEM-002 / SQLSEM-003) and emits normalized
5//! [`Fact`]s into a [`FactStore`]. Every fact carries a precision
6//! marker so the lineage layer can weight the edge:
7//!
8//! * `exact` — table/column resolved against a single bound
9//!   alias or single-table scope.
10//! * `expression` — column came from a projection expression
11//!   (function call / arithmetic) rather than a bare reference.
12//! * `unknown` — bare column with ambiguous (multi-table) scope
13//!   or a qualifier that didn't bind.
14//!
15//! The marker is encoded into the `DependencyEdge.edge_kind`
16//! string (`ReadsColumn:exact`, `WritesColumn:unknown`, …) so it
17//! survives the FACT-001 wire shape without a schema change.
18//!
19//! ## /oracle evidence
20//!
21//! * `DATABASE-REFERENCE.md` PL/SQL Language Reference — the
22//!   table/column reference grammar.
23//! * `LOW-LEVEL-CATALOGS.md` Data Dictionary View Families —
24//!   `ALL_TAB_COLUMNS` / `ALL_DEPENDENCIES` are the server-side
25//!   mirrors; the precision marker records how confident the
26//!   source-only pass is before that cross-check.
27
28use crate::fact::{FactPayload, FactProvenance, FactStore};
29use crate::sql_sem::{ColumnResolution, ColumnUse, SqlStatementModel, TableUsageKind};
30
31/// Emit table-level + column-level use facts for one statement.
32/// `owner_logical_id` is the routine the statement lives in (the
33/// `from` side of every edge). Returns the post-dedup count of
34/// facts added.
35pub fn emit_sql_use_facts(
36    store: &mut FactStore,
37    prov: &FactProvenance,
38    owner_logical_id: &str,
39    model: &SqlStatementModel,
40) -> usize {
41    let before = store.len();
42
43    // Table-level edges.
44    for t in &model.tables {
45        let target = qualify(&t.schema, &t.table);
46        let kind = match t.usage {
47            TableUsageKind::Read => "Reads",
48            TableUsageKind::Write => "Writes",
49            TableUsageKind::ReadWrite => "ReadsWrites",
50        };
51        push_edge(store, prov, owner_logical_id, &target, kind);
52    }
53
54    // Column-level edges with precision markers.
55    for c in &model.reads {
56        emit_column(store, prov, owner_logical_id, model, c, "ReadsColumn");
57    }
58    for c in &model.writes {
59        emit_column(store, prov, owner_logical_id, model, c, "WritesColumn");
60    }
61
62    store.len() - before
63}
64
65/// Emit use facts for every statement in a `SqlSemanticModel`.
66pub fn emit_sql_use_facts_for_model(
67    store: &mut FactStore,
68    prov: &FactProvenance,
69    owner_logical_id: &str,
70    model: &crate::sql_sem::SqlSemanticModel,
71) -> usize {
72    let before = store.len();
73    for (_, s) in model.iter() {
74        emit_sql_use_facts(store, prov, owner_logical_id, s);
75    }
76    store.len() - before
77}
78
79fn emit_column(
80    store: &mut FactStore,
81    prov: &FactProvenance,
82    owner: &str,
83    model: &SqlStatementModel,
84    c: &ColumnUse,
85    base_kind: &str,
86) {
87    let marker = precision_marker(c);
88    // Resolve the column's table via the alias scope when the
89    // qualifier is bound; otherwise leave the qualifier as-is so
90    // the catalog cross-check can finish the job.
91    let target = if c.qualifier.is_empty() {
92        // single-table scope: attribute to the lone table.
93        if model.tables.len() == 1 {
94            let t = &model.tables[0];
95            format!("{}.{}", qualify(&t.schema, &t.table), c.column)
96        } else {
97            format!("?.{}", c.column)
98        }
99    } else if let Some((schema, table)) = model.alias_scope.resolve(&c.qualifier) {
100        format!("{}.{}", qualify(schema, table), c.column)
101    } else {
102        format!("{}.{}", c.qualifier, c.column)
103    };
104    push_edge(
105        store,
106        prov,
107        owner,
108        &target,
109        &format!("{base_kind}:{marker}"),
110    );
111}
112
113fn precision_marker(c: &ColumnUse) -> &'static str {
114    match c.resolution {
115        ColumnResolution::Resolved => "exact",
116        ColumnResolution::StarExpansion => "expression",
117        ColumnResolution::Unresolved => "unknown",
118        ColumnResolution::Pending => "unknown",
119    }
120}
121
122fn qualify(schema: &str, table: &str) -> String {
123    if schema.is_empty() {
124        table.to_ascii_lowercase()
125    } else {
126        format!(
127            "{}.{}",
128            schema.to_ascii_lowercase(),
129            table.to_ascii_lowercase()
130        )
131    }
132}
133
134fn push_edge(store: &mut FactStore, prov: &FactProvenance, from: &str, to: &str, edge_kind: &str) {
135    let f = crate::fact::mint_fact(
136        prov.clone(),
137        FactPayload::DependencyEdge {
138            from_logical_id: from.to_string(),
139            to_logical_id: to.to_string(),
140            edge_kind: edge_kind.to_string(),
141        },
142    );
143    store.push(f);
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use crate::fact::FactKind;
150    use crate::sql_columns::extract_columns;
151    use crate::sql_resolve::resolve_sql;
152
153    fn prov() -> FactProvenance {
154        FactProvenance {
155            component: "plsql-ir".into(),
156            component_version: "0.1.0".into(),
157            run_id: String::new(),
158            source_logical_id: None,
159            source_file: None,
160        }
161    }
162
163    fn edge_kinds(store: &FactStore) -> Vec<String> {
164        store
165            .by_kind(FactKind::DependencyEdge)
166            .filter_map(|f| match &f.payload {
167                FactPayload::DependencyEdge { edge_kind, .. } => Some(edge_kind.clone()),
168                _ => None,
169            })
170            .collect()
171    }
172
173    #[test]
174    fn select_emits_reads_table_and_exact_columns() {
175        let raw = "SELECT salary INTO v FROM employees";
176        let mut m = resolve_sql(raw);
177        extract_columns(&mut m, raw);
178        let mut store = FactStore::default();
179        let n = emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
180        assert!(n >= 2);
181        let kinds = edge_kinds(&store);
182        assert!(kinds.iter().any(|k| k == "Reads"));
183        assert!(kinds.iter().any(|k| k == "ReadsColumn:exact"));
184    }
185
186    #[test]
187    fn ambiguous_column_marked_unknown() {
188        let raw = "SELECT amount INTO v FROM orders o, payments p WHERE o.id = p.oid";
189        let mut m = resolve_sql(raw);
190        extract_columns(&mut m, raw);
191        let mut store = FactStore::default();
192        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
193        let kinds = edge_kinds(&store);
194        assert!(kinds.iter().any(|k| k == "ReadsColumn:unknown"));
195    }
196
197    #[test]
198    fn star_projection_marked_expression() {
199        let raw = "SELECT * INTO r FROM employees";
200        let mut m = resolve_sql(raw);
201        extract_columns(&mut m, raw);
202        let mut store = FactStore::default();
203        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
204        let kinds = edge_kinds(&store);
205        assert!(kinds.iter().any(|k| k == "ReadsColumn:expression"));
206    }
207
208    #[test]
209    fn insert_emits_writes_table_and_columns() {
210        let raw = "INSERT INTO audit (event_id, ts) VALUES (1, SYSDATE)";
211        let mut m = resolve_sql(raw);
212        extract_columns(&mut m, raw);
213        let mut store = FactStore::default();
214        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
215        let kinds = edge_kinds(&store);
216        assert!(kinds.iter().any(|k| k == "Writes"));
217        assert!(kinds.iter().any(|k| k.starts_with("WritesColumn:")));
218    }
219
220    #[test]
221    fn merge_emits_readswrites_table_edge() {
222        let raw = "MERGE INTO target t USING source s ON (t.id = s.id) WHEN MATCHED THEN UPDATE SET t.v = s.v";
223        let mut m = resolve_sql(raw);
224        extract_columns(&mut m, raw);
225        let mut store = FactStore::default();
226        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
227        let kinds = edge_kinds(&store);
228        assert!(kinds.iter().any(|k| k == "ReadsWrites"));
229        assert!(kinds.iter().any(|k| k == "Reads"));
230    }
231
232    #[test]
233    fn column_target_resolves_through_alias_scope() {
234        let raw = "SELECT e.salary INTO v FROM hr.employees e";
235        let mut m = resolve_sql(raw);
236        extract_columns(&mut m, raw);
237        let mut store = FactStore::default();
238        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
239        let targets: Vec<String> = store
240            .by_kind(FactKind::DependencyEdge)
241            .filter_map(|f| match &f.payload {
242                FactPayload::DependencyEdge {
243                    to_logical_id,
244                    edge_kind,
245                    ..
246                } if edge_kind.starts_with("ReadsColumn") => Some(to_logical_id.clone()),
247                _ => None,
248            })
249            .collect();
250        assert!(targets.iter().any(|t| t == "hr.employees.SALARY"));
251    }
252
253    #[test]
254    fn facts_dedupe_on_repeat_emit() {
255        let raw = "SELECT salary INTO v FROM employees";
256        let mut m = resolve_sql(raw);
257        extract_columns(&mut m, raw);
258        let mut store = FactStore::default();
259        emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
260        let after_first = store.len();
261        let n2 = emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
262        assert_eq!(n2, 0);
263        assert_eq!(store.len(), after_first);
264    }
265
266    #[test]
267    fn model_wide_emit_covers_every_statement() {
268        let mut model = crate::sql_sem::SqlSemanticModel::default();
269        let r1 = "SELECT id INTO v FROM t1";
270        let r2 = "INSERT INTO t2 (c) VALUES (1)";
271        let mut m1 = resolve_sql(r1);
272        extract_columns(&mut m1, r1);
273        let mut m2 = resolve_sql(r2);
274        extract_columns(&mut m2, r2);
275        model.push(m1);
276        model.push(m2);
277        let mut store = FactStore::default();
278        let n = emit_sql_use_facts_for_model(&mut store, &prov(), "hr.run", &model);
279        assert!(n >= 4);
280        let kinds = edge_kinds(&store);
281        assert!(kinds.iter().any(|k| k == "Reads"));
282        assert!(kinds.iter().any(|k| k == "Writes"));
283    }
284
285    #[test]
286    fn precision_marker_maps_all_resolutions() {
287        let mk = |r| ColumnUse {
288            qualifier: String::new(),
289            column: "C".into(),
290            resolution: r,
291        };
292        assert_eq!(precision_marker(&mk(ColumnResolution::Resolved)), "exact");
293        assert_eq!(
294            precision_marker(&mk(ColumnResolution::StarExpansion)),
295            "expression"
296        );
297        assert_eq!(
298            precision_marker(&mk(ColumnResolution::Unresolved)),
299            "unknown"
300        );
301        assert_eq!(precision_marker(&mk(ColumnResolution::Pending)), "unknown");
302    }
303
304    #[test]
305    fn empty_model_emits_nothing() {
306        let m = SqlStatementModel::default();
307        let mut store = FactStore::default();
308        let n = emit_sql_use_facts(&mut store, &prov(), "hr.run", &m);
309        assert_eq!(n, 0);
310    }
311}