Skip to main content

plsql_parser/
ast.rs

1//! Concrete syntax tree and abstract syntax tree types.
2//!
3//! These types define the public AST / CST surface for the parser frontend.
4//! Node hierarchies will be expanded over time, but the structural
5//! definitions — [`ConcreteSyntaxTree`], [`Ast`], [`TokenTape`], [`TriviaTable`]
6//! — are settled here.
7//!
8//! # Lossless vs lossy
9//!
10//! - [`ConcreteSyntaxTree`] is **lossless**: every delimiter, keyword, and
11//!   trivia is represented with byte-offset spans.  Round-tripping goes
12//!   through the CST / token tape.
13//!
14//! - [`Ast`] is **lossy** (semantic): whitespace, comments, and exact
15//!   delimiter positions are not preserved.  Pretty-printing from the AST
16//!   produces *equivalent* but not *byte-identical* output.
17//!
18//! # Spanned invariant
19//!
20//! Every AST node **MUST** carry a source [`Span`].  The [`Spanned`] trait
21//! formalises this requirement.  All new AST node types must implement
22//! [`Spanned`].  This is enforced by code review, not by a compile-time
23//! lint (Rust's type system cannot express "every variant of an enum has
24//! a `span` field").
25
26use std::collections::BTreeMap;
27
28use plsql_core::Span;
29use serde::{Deserialize, Serialize};
30
31use crate::tokens::{TokenTape, TriviaTable};
32
33// ---------------------------------------------------------------------------
34// Spanned trait (PLSQL-PARSE-010)
35// ---------------------------------------------------------------------------
36
37/// Every AST node must implement this trait.
38///
39/// The trait returns the node's source [`Span`] — the byte-offset range in the
40/// original source file that this node corresponds to.  This is a hard
41/// requirement for provenance tracking (R12) and diagnostic quality (every
42/// diagnostic has a non-empty `Span` pointing to the offending source range,
43/// plan §7.6).
44///
45/// # Contract
46///
47/// - `span()` MUST return the tightest bounding span that covers all tokens
48///   belonging to this node.
49/// - For nodes that span multiple non-contiguous ranges (e.g., a package spec
50///   with a separate body), `span()` returns the *primary* span (the spec
51///   keyword range).  Related spans are carried via `SpanLabel` in the
52///   `Evidence` or `Diagnostic` types.
53pub trait Spanned {
54    /// The source span of this AST node.
55    fn span(&self) -> Span;
56}
57
58// ---------------------------------------------------------------------------
59// CST node identifiers
60// ---------------------------------------------------------------------------
61
62/// Opaque identifier for a node in the [`ConcreteSyntaxTree`].
63///
64/// These are backend-local indices; they are NOT stable across parse
65/// invocations or backends.
66#[derive(
67    Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
68)]
69#[serde(transparent)]
70pub struct CstNodeId(pub u32);
71
72// ---------------------------------------------------------------------------
73// SourceMap
74// ---------------------------------------------------------------------------
75
76/// Maps [`CstNodeId`]s to their source [`Span`]s.
77///
78/// This is a side-table rather than embedding spans in every CST node, so
79/// the node arena stays compact and span lookups are O(log n).
80#[derive(Clone, Debug, Default, Serialize, Deserialize)]
81pub struct SourceMap {
82    inner: BTreeMap<u32, Span>,
83}
84
85impl SourceMap {
86    #[must_use]
87    pub fn new() -> Self {
88        Self::default()
89    }
90
91    /// Record the span for a given CST node.
92    pub fn insert(&mut self, node: CstNodeId, span: Span) {
93        self.inner.insert(node.0, span);
94    }
95
96    /// Look up the span for a given CST node.
97    #[must_use]
98    pub fn get(&self, node: CstNodeId) -> Option<&Span> {
99        self.inner.get(&node.0)
100    }
101
102    /// Number of entries.
103    #[must_use]
104    pub fn len(&self) -> usize {
105        self.inner.len()
106    }
107
108    #[must_use]
109    pub fn is_empty(&self) -> bool {
110        self.inner.is_empty()
111    }
112}
113
114// ---------------------------------------------------------------------------
115// ConcreteSyntaxTree
116// ---------------------------------------------------------------------------
117
118/// The lossless concrete syntax tree produced by a [`ParseBackend`].
119///
120/// The CST preserves every token and trivia element with source spans.
121/// Combined with the [`TokenTape`] and [`TriviaTable`], it supports
122/// byte-for-byte source reconstruction.
123#[derive(Clone, Debug, Default, Serialize, Deserialize)]
124pub struct ConcreteSyntaxTree {
125    /// The root node of the CST.
126    pub root: CstNodeId,
127    /// The lossless token tape.
128    pub token_tape: TokenTape,
129    /// Trivia (whitespace, comments) associated with tokens.
130    pub trivia: TriviaTable,
131    /// Maps CST node IDs to source spans.
132    pub source_map: SourceMap,
133}
134
135impl ConcreteSyntaxTree {
136    /// Create a new empty CST.
137    #[must_use]
138    pub fn new() -> Self {
139        Self::default()
140    }
141
142    /// Reconstruct the original source text from the CST.
143    ///
144    /// This is the lossless round-trip operation.
145    #[must_use]
146    pub fn reconstruct(&self) -> String {
147        self.token_tape.reconstruct(&self.trivia)
148    }
149}
150
151// ---------------------------------------------------------------------------
152// SourceFile / Ast
153// ---------------------------------------------------------------------------
154
155/// A single parsed source file (the root of the typed AST).
156///
157/// The `declarations` vector holds top-level PL/SQL declarations (packages,
158/// procedures, functions, triggers, views, types, DDL statements) discovered
159/// in the file.  Each carries a name and source span.
160#[derive(Clone, Debug, Default, Serialize, Deserialize)]
161pub struct SourceFile {
162    /// Byte span covering the entire file.
163    pub span: Span,
164    /// Top-level declarations discovered in the file.
165    pub declarations: Vec<AstDecl>,
166}
167
168impl Spanned for SourceFile {
169    fn span(&self) -> Span {
170        self.span
171    }
172}
173
174/// A top-level PL/SQL declaration.
175///
176/// Variants cover the full set of top-level constructs the parser must
177/// recognize (plan §7.2).  The `Unknown` variant satisfies R13 — no
178/// uncertainty is silently dropped.
179///
180/// **Every variant MUST carry a `span` field** (Spanned invariant).
181#[derive(Clone, Debug, Serialize, Deserialize)]
182pub enum AstDecl {
183    /// A PL/SQL package specification.
184    PackageSpec { name: String, span: Span },
185    /// A PL/SQL package body.
186    PackageBody { name: String, span: Span },
187    /// A standalone procedure.
188    Procedure { name: String, span: Span },
189    /// A standalone function.
190    Function { name: String, span: Span },
191    /// A trigger.
192    Trigger { name: String, span: Span },
193    /// A view.
194    View { name: String, span: Span },
195    /// A type specification.
196    TypeSpec { name: String, span: Span },
197    /// A type body.
198    TypeBody { name: String, span: Span },
199    /// A DDL statement (CREATE / ALTER / DROP / GRANT).
200    ///
201    /// `antlr_rule_path` is a bounded, `>`-joined path of ANTLR
202    /// *grammar rule names* (never source text or identifiers)
203    /// identifying the grammar position the DDL was recognised at.
204    /// `None` when the declaration did not originate from a real
205    /// ANTLR parse tree (e.g. the text-scanner fallback). It is a
206    /// plain `String`, so no ANTLR generated type crosses the crate
207    /// boundary (R20).
208    Ddl {
209        kind: String,
210        span: Span,
211        #[serde(default)]
212        antlr_rule_path: Option<String>,
213    },
214    /// A declaration the backend could not classify (R13).
215    ///
216    /// `antlr_rule_path` — see [`AstDecl::Ddl`].
217    Unknown {
218        span: Span,
219        #[serde(default)]
220        antlr_rule_path: Option<String>,
221    },
222}
223
224impl Spanned for AstDecl {
225    fn span(&self) -> Span {
226        match self {
227            Self::PackageSpec { span, .. }
228            | Self::PackageBody { span, .. }
229            | Self::Procedure { span, .. }
230            | Self::Function { span, .. }
231            | Self::Trigger { span, .. }
232            | Self::View { span, .. }
233            | Self::TypeSpec { span, .. }
234            | Self::TypeBody { span, .. }
235            | Self::Ddl { span, .. }
236            | Self::Unknown { span, .. } => *span,
237        }
238    }
239}
240
241/// A statement inside a routine / anonymous block body.
242///
243/// This is the **syntactic** projection of a statement body — one
244/// step before `plsql_ir::Statement` (the semantic IR). The
245/// parser frontend only recognises the shape; name resolution +
246/// flow happen in Layer 2. `Unknown` satisfies R13.
247///
248/// Every variant carries a `span` (Spanned invariant).
249#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
250pub enum AstStatement {
251    /// `NULL;`
252    Null { span: Span },
253    /// `target := <rhs>;` — RHS kept as raw text for the IR
254    /// lowering layer to re-parse.
255    Assignment {
256        target: String,
257        rhs_text: String,
258        span: Span,
259    },
260    /// `IF … THEN … [ELSIF …] [ELSE …] END IF;` — the body
261    /// slices are raw text the IR layer recurses into.
262    If { cond_text: String, span: Span },
263    /// Any loop form (`LOOP` / `FOR … LOOP` / `WHILE … LOOP`).
264    Loop { header_text: String, span: Span },
265    /// `RAISE [exception];`
266    Raise {
267        exception: Option<String>,
268        span: Span,
269    },
270    /// `RETURN [expr];`
271    Return {
272        value_text: Option<String>,
273        span: Span,
274    },
275    /// `EXECUTE IMMEDIATE '<sql>' [USING …];`
276    ExecuteImmediate {
277        sql_text: String,
278        has_using: bool,
279        span: Span,
280    },
281    /// An embedded SQL DML statement (`SELECT`/`INSERT`/`UPDATE`/
282    /// `DELETE`/`MERGE`). `raw_text` is the verbatim statement source
283    /// slice so the IR layer can recover table/column read/write
284    /// dependencies. Empty when the backend could only classify the
285    /// verb.
286    Sql {
287        verb: String,
288        raw_text: String,
289        span: Span,
290    },
291    /// A procedure / function call statement.
292    Call { callee: String, span: Span },
293    /// A statement the backend could not classify (R13).
294    Unknown { span: Span },
295}
296
297impl Spanned for AstStatement {
298    fn span(&self) -> Span {
299        match self {
300            Self::Null { span }
301            | Self::Assignment { span, .. }
302            | Self::If { span, .. }
303            | Self::Loop { span, .. }
304            | Self::Raise { span, .. }
305            | Self::Return { span, .. }
306            | Self::ExecuteImmediate { span, .. }
307            | Self::Sql { span, .. }
308            | Self::Call { span, .. }
309            | Self::Unknown { span } => *span,
310        }
311    }
312}
313
314/// A PL/SQL expression node.
315///
316/// The **syntactic** expression projection — binary ops,
317/// function / procedure calls, cursor + attribute references,
318/// literals, bind / substitution placeholders. One step before
319/// `plsql_ir::Expr` (the semantic IR). `Unknown` satisfies R13.
320///
321/// Every variant carries a `span` (Spanned invariant).
322#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
323pub enum AstExpr {
324    /// A literal (number / string / `NULL` / `TRUE` / `FALSE`),
325    /// kept verbatim so the IR can re-classify precisely.
326    Literal { text: String, span: Span },
327    /// A dotted name reference (`a`, `pkg.fn`, `t.col%TYPE`,
328    /// `c%ROWTYPE`, `:new.id`).
329    Name { path: String, span: Span },
330    /// Bind placeholder (`:1`, `:name`).
331    Bind { name: String, span: Span },
332    /// Substitution variable (`&v`, `&&v`).
333    Substitution {
334        name: String,
335        sticky: bool,
336        span: Span,
337    },
338    /// A call `callee(<args-text>)` — args kept as raw text for
339    /// the IR layer to split + recurse.
340    Call {
341        callee: String,
342        args_text: String,
343        span: Span,
344    },
345    /// Binary op at the top level. Operand slices are raw text.
346    Binary {
347        op: String,
348        lhs_text: String,
349        rhs_text: String,
350        span: Span,
351    },
352    /// Unary op (`NOT` / `-` / `+`).
353    Unary {
354        op: String,
355        operand_text: String,
356        span: Span,
357    },
358    /// An expression the backend could not classify (R13).
359    Unknown { text: String, span: Span },
360}
361
362impl Spanned for AstExpr {
363    fn span(&self) -> Span {
364        match self {
365            Self::Literal { span, .. }
366            | Self::Name { span, .. }
367            | Self::Bind { span, .. }
368            | Self::Substitution { span, .. }
369            | Self::Call { span, .. }
370            | Self::Binary { span, .. }
371            | Self::Unary { span, .. }
372            | Self::Unknown { span, .. } => *span,
373        }
374    }
375}
376
377/// A type declaration.
378///
379/// The **syntactic** projection of `CREATE TYPE … AS OBJECT`,
380/// `TABLE OF` / `VARRAY` collection types, and PL/SQL
381/// `TYPE … IS RECORD` declarations. Attribute / element text is
382/// kept raw for the bindgen layer to resolve. `Unknown` satisfies
383/// R13; every variant is Spanned.
384#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
385pub enum AstTypeDecl {
386    /// `CREATE [OR REPLACE] TYPE <name> AS OBJECT ( … )`.
387    Object {
388        name: String,
389        attributes_text: String,
390        span: Span,
391    },
392    /// `… AS TABLE OF <elem>` (nested table) or
393    /// `… AS VARRAY(n) OF <elem>`.
394    Collection {
395        name: String,
396        element_text: String,
397        is_varray: bool,
398        span: Span,
399    },
400    /// `TYPE <name> IS RECORD ( … )` (PL/SQL record).
401    Record {
402        name: String,
403        fields_text: String,
404        span: Span,
405    },
406    /// A type declaration the backend could not classify (R13).
407    Unknown { text: String, span: Span },
408}
409
410impl Spanned for AstTypeDecl {
411    fn span(&self) -> Span {
412        match self {
413            Self::Object { span, .. }
414            | Self::Collection { span, .. }
415            | Self::Record { span, .. }
416            | Self::Unknown { span, .. } => *span,
417        }
418    }
419}
420
421/// The typed abstract syntax tree.
422///
423/// This is a **semantic** projection — it is NOT required to preserve
424/// whitespace, comments, or exact delimiter positions.  Pretty-printing
425/// from the AST produces *equivalent* but not *byte-identical* output.
426#[derive(Clone, Debug, Default, Serialize, Deserialize)]
427pub struct Ast {
428    /// The root source-file node.
429    pub root: SourceFile,
430    /// Source map for AST nodes (maps node IDs to spans).
431    pub source_map: SourceMap,
432    /// Body statements for each top-level declaration, in parallel
433    /// with `root.declarations`.  `body_statements[i]` is the lowered
434    /// body of `root.declarations[i]`.  Declarations with no body
435    /// (e.g., package specs, views, DDL) carry an empty inner vec.
436    /// Defaulting to empty so existing callers that produce an `Ast`
437    /// without body lowering remain valid (backward-compatible; R13).
438    #[serde(default)]
439    pub body_statements: Vec<Vec<AstStatement>>,
440}
441
442impl Ast {
443    /// Create a new empty AST.
444    #[must_use]
445    pub fn new() -> Self {
446        Self::default()
447    }
448}
449
450// ---------------------------------------------------------------------------
451// Tests
452// ---------------------------------------------------------------------------
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457    use plsql_core::{FileId, Position};
458
459    fn span(offset: u32, len: u32) -> Span {
460        Span::new(
461            FileId::new(0),
462            Position::new(1, 1, offset),
463            Position::new(1, 1, offset + len),
464        )
465    }
466
467    #[test]
468    fn source_map_insert_and_get() {
469        let mut sm = SourceMap::new();
470        let id = CstNodeId(42);
471        let s = span(10, 5);
472        sm.insert(id, s);
473        assert_eq!(sm.get(id), Some(&s));
474        assert_eq!(sm.get(CstNodeId(99)), None);
475    }
476
477    #[test]
478    fn source_map_len() {
479        let mut sm = SourceMap::new();
480        assert!(sm.is_empty());
481        sm.insert(CstNodeId(0), span(0, 1));
482        sm.insert(CstNodeId(1), span(1, 1));
483        assert_eq!(sm.len(), 2);
484        assert!(!sm.is_empty());
485    }
486
487    #[test]
488    fn cst_default_has_empty_source_map() {
489        let cst = ConcreteSyntaxTree::new();
490        assert!(cst.source_map.is_empty());
491    }
492
493    #[test]
494    fn ast_default_has_empty_source_map() {
495        let ast = Ast::new();
496        assert!(ast.source_map.is_empty());
497    }
498
499    #[test]
500    fn source_map_serializes_round_trip() {
501        let mut sm = SourceMap::new();
502        sm.insert(CstNodeId(1), span(0, 10));
503        sm.insert(CstNodeId(5), span(20, 30));
504        let json = serde_json::to_string(&sm).unwrap();
505        let back: SourceMap = serde_json::from_str(&json).unwrap();
506        assert_eq!(back.len(), 2);
507        assert_eq!(back.get(CstNodeId(1)), Some(&span(0, 10)));
508    }
509
510    // -----------------------------------------------------------------------
511    // Spanned trait tests (PLSQL-PARSE-010)
512    // -----------------------------------------------------------------------
513
514    #[test]
515    fn source_file_is_spanned() {
516        let s = span(0, 100);
517        let sf = SourceFile {
518            span: s,
519            declarations: Vec::new(),
520        };
521        assert_eq!(sf.span(), s);
522    }
523
524    #[test]
525    fn ast_decl_all_variants_are_spanned() {
526        let s = span(10, 20);
527        let decls = vec![
528            AstDecl::PackageSpec {
529                name: "pkg".into(),
530                span: s,
531            },
532            AstDecl::PackageBody {
533                name: "pkg".into(),
534                span: s,
535            },
536            AstDecl::Procedure {
537                name: "p".into(),
538                span: s,
539            },
540            AstDecl::Function {
541                name: "f".into(),
542                span: s,
543            },
544            AstDecl::Trigger {
545                name: "t".into(),
546                span: s,
547            },
548            AstDecl::View {
549                name: "v".into(),
550                span: s,
551            },
552            AstDecl::TypeSpec {
553                name: "ty".into(),
554                span: s,
555            },
556            AstDecl::TypeBody {
557                name: "ty".into(),
558                span: s,
559            },
560            AstDecl::Ddl {
561                kind: "CREATE".into(),
562                span: s,
563                antlr_rule_path: None,
564            },
565            AstDecl::Unknown {
566                span: s,
567                antlr_rule_path: None,
568            },
569        ];
570
571        for decl in &decls {
572            assert_eq!(
573                decl.span(),
574                s,
575                "Spanned::span() returned wrong span for variant"
576            );
577        }
578    }
579
580    #[test]
581    fn spanned_trait_is_object_safe() {
582        // Verify that Spanned can be used as a trait object
583        fn take_spanned(node: &dyn Spanned) -> Span {
584            node.span()
585        }
586        let s = span(0, 50);
587        let sf = SourceFile {
588            span: s,
589            declarations: Vec::new(),
590        };
591        assert_eq!(take_spanned(&sf), s);
592    }
593}