Skip to main content

plsql_ir/
lib.rs

1#![forbid(unsafe_code)]
2
3//! Typed semantic intermediate representation (IR) for `plsql-intelligence`.
4//!
5//! The IR is one step removed from the raw AST emitted by `plsql-parser`:
6//! the AST is syntactic, the IR is semantic. Downstream product surfaces
7//! (lineage, SAST, docs, bindgen, CI/CD) consume the IR rather than
8//! re-walking ASTs, so name resolution, overload selection, and Oracle
9//! catalog cross-checking happen in one place.
10//!
11//! introduces the top-level container types:
12//! [`SemanticModel`], [`FileModel`], and [`SchemaModel`].
13//! adds the [`Declaration`] enum and its variant payloads in the
14//! [`decl`] module. Statement lowering arrives in.
15
16pub mod calls;
17pub mod canonical;
18pub mod column_edges;
19pub mod decl;
20pub mod dml_edges;
21pub mod expr;
22pub mod fact;
23pub mod fact_emit;
24pub mod flow;
25pub mod flow_inter;
26pub mod flow_intra;
27pub mod flow_query;
28pub mod lower;
29pub mod recursion_guard;
30pub mod sql_columns;
31pub mod sql_fact_emit;
32pub mod sql_resolve;
33pub mod sql_sem;
34pub mod stmt;
35pub mod table_stub;
36
37/// Whether `b` is a PL/SQL identifier byte (`[A-Za-z0-9_$#]`). Shared by the
38/// lexical extractors (`dml_edges`, `sql_resolve`) which tokenize embedded SQL
39/// at the byte level; previously copied byte-for-byte in each (oracle-687a.7).
40#[must_use]
41pub(crate) fn is_ident_byte(b: u8) -> bool {
42    b.is_ascii_alphanumeric() || b == b'_' || b == b'$' || b == b'#'
43}
44
45pub use calls::{CallContext, CallSite, extract_call_sites, extract_call_sites_bounded};
46pub use canonical::{
47    CanonicalisationContext, CanonicalisationStats, canonicalize_expr, canonicalize_statements,
48};
49pub use column_edges::{
50    ColumnEdge, ColumnEdgeKind, extract_column_edges, extract_column_edges_for_model,
51};
52pub use dml_edges::{
53    AccessKind, TableAccess, extract_table_accesses, extract_table_accesses_bounded,
54};
55pub use expr::{Expr, NameRef, UnknownExprReason, lower_expression};
56pub use fact::{Fact, FactId, FactKind, FactPayload, FactProvenance, FactStore, mint_fact};
57pub use fact_emit::{
58    emit_call_facts, emit_declaration_facts, emit_declarations_from, emit_dynamic_sql_facts,
59    emit_flow_env_facts, emit_flow_facts, emit_privilege_facts, emit_reference_facts,
60    emit_unknown_facts,
61};
62pub use flow::{ConstantValue, StringShape, Taint, TaintCleanser, TaintKind, ValueFlow, ValueSet};
63pub use flow_inter::{
64    CallEdgeFlow, FlowUnknownFact, InterFlowResult, RoutineFlowSummary, propagate_inter,
65};
66pub use flow_intra::{FlowEnv, TaintSources, analyze_flow, analyze_flow_bounded};
67pub use flow_query::{FlowQuery, TaintAnswer};
68pub use lower::{LoweredFile, lower_top_level};
69pub use recursion_guard::{MAX_RELOWER_DEPTH, RecursionOutcome};
70pub use sql_columns::{extract_columns, extract_columns_for_model};
71pub use sql_fact_emit::{emit_sql_use_facts, emit_sql_use_facts_for_model};
72pub use sql_resolve::resolve_sql;
73pub use sql_sem::{
74    AliasBinding, AliasScope, ColumnResolution, ColumnUse, ProjectionItem, SqlSemanticModel,
75    SqlSemanticVerb, SqlStatementModel, TableUsageKind, TableUse,
76};
77pub use stmt::{IfArm, SqlVerb, Statement, UnknownStatementReason, lower_statement_body};
78pub use table_stub::DeclLike;
79
80pub use decl::{
81    AnchoredType, ColumnDecl, CursorDecl, DeclCommon, DeclKind, Declaration, FunctionDecl,
82    IndexDecl, PackageDecl, ParamDecl, ParamMode, ProcedureDecl, SequenceDecl, SynonymDecl,
83    TableDecl, TriggerDecl, TypeDecl, TypeRef, VariableDecl, ViewDecl,
84};
85
86use std::collections::{BTreeMap, HashMap};
87
88use plsql_catalog::{CatalogSnapshot, SynonymName};
89use plsql_core::{Diagnostic, FileId, ObjectId, ObjectName, SchemaName};
90use plsql_privileges::PrivilegeModel;
91use serde::{Deserialize, Serialize};
92use tracing::instrument;
93
94macro_rules! numeric_id {
95    ($name:ident, $doc:expr) => {
96        #[doc = $doc]
97        #[derive(
98            Clone,
99            Copy,
100            Debug,
101            Default,
102            Eq,
103            PartialEq,
104            Ord,
105            PartialOrd,
106            Hash,
107            Serialize,
108            Deserialize,
109        )]
110        #[serde(transparent)]
111        pub struct $name(u64);
112
113        impl $name {
114            #[must_use]
115            #[instrument(level = "trace")]
116            pub fn new(raw: u64) -> Self {
117                Self(raw)
118            }
119
120            #[must_use]
121            #[instrument(level = "trace", skip(self))]
122            pub fn get(self) -> u64 {
123                self.0
124            }
125        }
126    };
127}
128
129numeric_id!(
130    DeclId,
131    "Stable identity for a semantic declaration (procedure, function, package, type, variable, parameter, cursor, table, view, column, sequence, synonym, index, trigger). The concrete [`Declaration`] enum lands in `PLSQL-IR-002`."
132);
133numeric_id!(
134    StatementId,
135    "Stable identity for an IR statement node. The statement enum lands in `PLSQL-IR-004`; the embedded-SQL view in `PLSQL-SQLSEM-001`."
136);
137
138/// Top-level container produced by Layer 2 semantic analysis.
139///
140/// One `SemanticModel` summarizes a complete analysis run over a project:
141/// every file's top-level declarations and statements, every schema's
142/// objects and synonyms, an optional catalog snapshot, the privilege
143/// model, and any diagnostics raised while constructing the IR. Layer 2.5
144/// orchestration (`plsql-engine`) embeds this inside `AnalysisRun`, and
145/// every product surface consumes it from there.
146///
147/// Spec: plan.md ยง9.2.1.
148#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
149pub struct SemanticModel {
150    pub files: Vec<FileModel>,
151    pub schemas: BTreeMap<SchemaName, SchemaModel>,
152    pub catalog: Option<CatalogSnapshot>,
153    pub privileges: PrivilegeModel,
154    pub diagnostics: Vec<Diagnostic>,
155}
156
157impl SemanticModel {
158    #[must_use]
159    #[instrument(level = "trace")]
160    pub fn new() -> Self {
161        Self::default()
162    }
163
164    #[must_use]
165    #[instrument(level = "trace", skip(self))]
166    pub fn file(&self, file_id: FileId) -> Option<&FileModel> {
167        self.files.iter().find(|f| f.file_id == file_id)
168    }
169
170    #[must_use]
171    #[instrument(level = "trace", skip(self))]
172    pub fn schema(&self, name: SchemaName) -> Option<&SchemaModel> {
173        self.schemas.get(&name)
174    }
175}
176
177/// Per-source-file IR view.
178///
179/// `top_level` holds the declarations parsed from this file (package
180/// specs, package bodies, standalone routines, types, tables, views,
181/// triggers, ...). `statements` holds the file-scoped statements
182/// (anonymous blocks, DDL, SQL\*Plus-significant directives). The
183/// declarations themselves live in a future `DeclTable`; this struct
184/// only carries the IDs into it.
185#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
186pub struct FileModel {
187    pub file_id: FileId,
188    pub top_level: Vec<DeclId>,
189    pub statements: Vec<StatementId>,
190}
191
192impl FileModel {
193    #[must_use]
194    #[instrument(level = "trace")]
195    pub fn new(file_id: FileId) -> Self {
196        Self {
197            file_id,
198            top_level: Vec::new(),
199            statements: Vec::new(),
200        }
201    }
202}
203
204/// Per-schema IR view.
205///
206/// `objects` maps an object name (table, view, package, type, sequence,
207/// ...) inside this schema to its catalog/IR identity. `synonyms` maps a
208/// synonym name to the object it resolves to; private synonyms live with
209/// the owning schema, public synonyms live in the synthetic `PUBLIC`
210/// schema in [`SemanticModel::schemas`]. The exact semantics of an
211/// `ObjectId` (catalog-only vs source-only vs both) is recorded in
212/// [](catalog) and [](self).
213#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
214pub struct SchemaModel {
215    pub name: SchemaName,
216    pub objects: HashMap<ObjectName, ObjectId>,
217    pub synonyms: HashMap<SynonymName, ObjectId>,
218}
219
220impl SchemaModel {
221    #[must_use]
222    #[instrument(level = "trace")]
223    pub fn new(name: SchemaName) -> Self {
224        Self {
225            name,
226            objects: HashMap::new(),
227            synonyms: HashMap::new(),
228        }
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235    use plsql_core::SymbolId;
236
237    #[test]
238    fn semantic_model_defaults_are_empty() {
239        let model = SemanticModel::new();
240        assert!(model.files.is_empty());
241        assert!(model.schemas.is_empty());
242        assert!(model.catalog.is_none());
243        assert!(model.diagnostics.is_empty());
244    }
245
246    #[test]
247    fn file_model_tracks_decls_and_statements() {
248        let mut file = FileModel::new(FileId::new(7));
249        file.top_level.push(DeclId::new(1));
250        file.top_level.push(DeclId::new(2));
251        file.statements.push(StatementId::new(10));
252        assert_eq!(file.file_id, FileId::new(7));
253        assert_eq!(file.top_level.len(), 2);
254        assert_eq!(file.statements, vec![StatementId::new(10)]);
255    }
256
257    #[test]
258    fn schema_model_indexes_objects_and_synonyms() {
259        let schema_name = SchemaName::from(SymbolId::new(1));
260        let mut schema = SchemaModel::new(schema_name);
261        let object_name = ObjectName::from(SymbolId::new(2));
262        schema.objects.insert(object_name, ObjectId::new(42));
263        let synonym_name = SynonymName::from(SymbolId::new(3));
264        schema.synonyms.insert(synonym_name, ObjectId::new(42));
265        assert_eq!(schema.name, schema_name);
266        assert_eq!(schema.objects.get(&object_name), Some(&ObjectId::new(42)));
267        assert_eq!(schema.synonyms.get(&synonym_name), Some(&ObjectId::new(42)));
268    }
269
270    #[test]
271    fn semantic_model_lookups_round_trip() {
272        let mut model = SemanticModel::new();
273        let schema_name = SchemaName::from(SymbolId::new(1));
274        let file = FileModel::new(FileId::new(11));
275        model.files.push(file);
276        model
277            .schemas
278            .insert(schema_name, SchemaModel::new(schema_name));
279        assert!(model.file(FileId::new(11)).is_some());
280        assert!(model.schema(schema_name).is_some());
281        assert!(model.file(FileId::new(12)).is_none());
282    }
283
284    #[test]
285    fn ids_are_numeric_and_serialize_transparently() {
286        let serialized = serde_json::to_string(&DeclId::new(99)).unwrap();
287        assert_eq!(serialized, "99");
288        let serialized = serde_json::to_string(&StatementId::new(7)).unwrap();
289        assert_eq!(serialized, "7");
290    }
291}