plsql_parser/ast.rs
1//! Concrete syntax tree and abstract syntax tree types.
2//!
3//! These types define the public AST / CST surface for the parser frontend.
4//! Node hierarchies will be expanded over time, but the structural
5//! definitions — [`ConcreteSyntaxTree`], [`Ast`], [`TokenTape`], [`TriviaTable`]
6//! — are settled here.
7//!
8//! # Lossless vs lossy
9//!
10//! - [`ConcreteSyntaxTree`] is **lossless**: every delimiter, keyword, and
11//! trivia is represented with byte-offset spans. Round-tripping goes
12//! through the CST / token tape.
13//!
14//! - [`Ast`] is **lossy** (semantic): whitespace, comments, and exact
15//! delimiter positions are not preserved. Pretty-printing from the AST
16//! produces *equivalent* but not *byte-identical* output.
17//!
18//! # Spanned invariant
19//!
20//! Every AST node **MUST** carry a source [`Span`]. The [`Spanned`] trait
21//! formalises this requirement. All new AST node types must implement
22//! [`Spanned`]. This is enforced by code review, not by a compile-time
23//! lint (Rust's type system cannot express "every variant of an enum has
24//! a `span` field").
25
26use std::collections::BTreeMap;
27
28use plsql_core::Span;
29use serde::{Deserialize, Serialize};
30
31use crate::tokens::{TokenTape, TriviaTable};
32
33// ---------------------------------------------------------------------------
34// Spanned trait (PLSQL-PARSE-010)
35// ---------------------------------------------------------------------------
36
37/// Every AST node must implement this trait.
38///
39/// The trait returns the node's source [`Span`] — the byte-offset range in the
40/// original source file that this node corresponds to. This is a hard
41/// requirement for provenance tracking (R12) and diagnostic quality (every
42/// diagnostic has a non-empty `Span` pointing to the offending source range,
43/// plan §7.6).
44///
45/// # Contract
46///
47/// - `span()` MUST return the tightest bounding span that covers all tokens
48/// belonging to this node.
49/// - For nodes that span multiple non-contiguous ranges (e.g., a package spec
50/// with a separate body), `span()` returns the *primary* span (the spec
51/// keyword range). Related spans are carried via `SpanLabel` in the
52/// `Evidence` or `Diagnostic` types.
53pub trait Spanned {
54 /// The source span of this AST node.
55 fn span(&self) -> Span;
56}
57
58// ---------------------------------------------------------------------------
59// CST node identifiers
60// ---------------------------------------------------------------------------
61
62/// Opaque identifier for a node in the [`ConcreteSyntaxTree`].
63///
64/// These are backend-local indices; they are NOT stable across parse
65/// invocations or backends.
66#[derive(
67 Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize,
68)]
69#[serde(transparent)]
70pub struct CstNodeId(pub u32);
71
72// ---------------------------------------------------------------------------
73// SourceMap
74// ---------------------------------------------------------------------------
75
76/// Maps [`CstNodeId`]s to their source [`Span`]s.
77///
78/// This is a side-table rather than embedding spans in every CST node, so
79/// the node arena stays compact and span lookups are O(log n).
80#[derive(Clone, Debug, Default, Serialize, Deserialize)]
81pub struct SourceMap {
82 inner: BTreeMap<u32, Span>,
83}
84
85impl SourceMap {
86 #[must_use]
87 pub fn new() -> Self {
88 Self::default()
89 }
90
91 /// Record the span for a given CST node.
92 pub fn insert(&mut self, node: CstNodeId, span: Span) {
93 self.inner.insert(node.0, span);
94 }
95
96 /// Look up the span for a given CST node.
97 #[must_use]
98 pub fn get(&self, node: CstNodeId) -> Option<&Span> {
99 self.inner.get(&node.0)
100 }
101
102 /// Number of entries.
103 #[must_use]
104 pub fn len(&self) -> usize {
105 self.inner.len()
106 }
107
108 #[must_use]
109 pub fn is_empty(&self) -> bool {
110 self.inner.is_empty()
111 }
112}
113
114// ---------------------------------------------------------------------------
115// ConcreteSyntaxTree
116// ---------------------------------------------------------------------------
117
118/// The lossless concrete syntax tree produced by a [`ParseBackend`].
119///
120/// The CST preserves every token and trivia element with source spans.
121/// Combined with the [`TokenTape`] and [`TriviaTable`], it supports
122/// byte-for-byte source reconstruction.
123#[derive(Clone, Debug, Default, Serialize, Deserialize)]
124pub struct ConcreteSyntaxTree {
125 /// The root node of the CST.
126 pub root: CstNodeId,
127 /// The lossless token tape.
128 pub token_tape: TokenTape,
129 /// Trivia (whitespace, comments) associated with tokens.
130 pub trivia: TriviaTable,
131 /// Maps CST node IDs to source spans.
132 pub source_map: SourceMap,
133}
134
135impl ConcreteSyntaxTree {
136 /// Create a new empty CST.
137 #[must_use]
138 pub fn new() -> Self {
139 Self::default()
140 }
141
142 /// Reconstruct the original source text from the CST.
143 ///
144 /// This is the lossless round-trip operation.
145 #[must_use]
146 pub fn reconstruct(&self) -> String {
147 self.token_tape.reconstruct(&self.trivia)
148 }
149}
150
151// ---------------------------------------------------------------------------
152// SourceFile / Ast
153// ---------------------------------------------------------------------------
154
155/// A single parsed source file (the root of the typed AST).
156///
157/// The `declarations` vector holds top-level PL/SQL declarations (packages,
158/// procedures, functions, triggers, views, types, DDL statements) discovered
159/// in the file. Each carries a name and source span.
160#[derive(Clone, Debug, Default, Serialize, Deserialize)]
161pub struct SourceFile {
162 /// Byte span covering the entire file.
163 pub span: Span,
164 /// Top-level declarations discovered in the file.
165 pub declarations: Vec<AstDecl>,
166}
167
168impl Spanned for SourceFile {
169 fn span(&self) -> Span {
170 self.span
171 }
172}
173
174/// A top-level PL/SQL declaration.
175///
176/// Variants cover the full set of top-level constructs the parser must
177/// recognize (plan §7.2). The `Unknown` variant satisfies R13 — no
178/// uncertainty is silently dropped.
179///
180/// **Every variant MUST carry a `span` field** (Spanned invariant).
181#[derive(Clone, Debug, Serialize, Deserialize)]
182pub enum AstDecl {
183 /// A PL/SQL package specification.
184 PackageSpec { name: String, span: Span },
185 /// A PL/SQL package body.
186 PackageBody { name: String, span: Span },
187 /// A standalone procedure.
188 Procedure { name: String, span: Span },
189 /// A standalone function.
190 Function { name: String, span: Span },
191 /// A trigger.
192 Trigger { name: String, span: Span },
193 /// A view.
194 View { name: String, span: Span },
195 /// A type specification.
196 TypeSpec { name: String, span: Span },
197 /// A type body.
198 TypeBody { name: String, span: Span },
199 /// A DDL statement (CREATE / ALTER / DROP / GRANT).
200 ///
201 /// `antlr_rule_path` is a bounded, `>`-joined path of ANTLR
202 /// *grammar rule names* (never source text or identifiers)
203 /// identifying the grammar position the DDL was recognised at.
204 /// `None` when the declaration did not originate from a real
205 /// ANTLR parse tree (e.g. the text-scanner fallback). It is a
206 /// plain `String`, so no ANTLR generated type crosses the crate
207 /// boundary (R20).
208 Ddl {
209 kind: String,
210 span: Span,
211 #[serde(default)]
212 antlr_rule_path: Option<String>,
213 },
214 /// A declaration the backend could not classify (R13).
215 ///
216 /// `antlr_rule_path` — see [`AstDecl::Ddl`].
217 Unknown {
218 span: Span,
219 #[serde(default)]
220 antlr_rule_path: Option<String>,
221 },
222}
223
224impl Spanned for AstDecl {
225 fn span(&self) -> Span {
226 match self {
227 Self::PackageSpec { span, .. }
228 | Self::PackageBody { span, .. }
229 | Self::Procedure { span, .. }
230 | Self::Function { span, .. }
231 | Self::Trigger { span, .. }
232 | Self::View { span, .. }
233 | Self::TypeSpec { span, .. }
234 | Self::TypeBody { span, .. }
235 | Self::Ddl { span, .. }
236 | Self::Unknown { span, .. } => *span,
237 }
238 }
239}
240
241/// A statement inside a routine / anonymous block body.
242///
243/// This is the **syntactic** projection of a statement body — one
244/// step before `plsql_ir::Statement` (the semantic IR). The
245/// parser frontend only recognises the shape; name resolution +
246/// flow happen in Layer 2. `Unknown` satisfies R13.
247///
248/// Every variant carries a `span` (Spanned invariant).
249#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
250pub enum AstStatement {
251 /// `NULL;`
252 Null { span: Span },
253 /// `target := <rhs>;` — RHS kept as raw text for the IR
254 /// lowering layer to re-parse.
255 Assignment {
256 target: String,
257 rhs_text: String,
258 span: Span,
259 },
260 /// `IF … THEN … [ELSIF …] [ELSE …] END IF;` — the body
261 /// slices are raw text the IR layer recurses into.
262 If { cond_text: String, span: Span },
263 /// Any loop form (`LOOP` / `FOR … LOOP` / `WHILE … LOOP`).
264 Loop { header_text: String, span: Span },
265 /// `RAISE [exception];`
266 Raise {
267 exception: Option<String>,
268 span: Span,
269 },
270 /// `RETURN [expr];`
271 Return {
272 value_text: Option<String>,
273 span: Span,
274 },
275 /// `EXECUTE IMMEDIATE '<sql>' [USING …];`
276 ExecuteImmediate {
277 sql_text: String,
278 has_using: bool,
279 span: Span,
280 },
281 /// An embedded SQL DML statement (`SELECT`/`INSERT`/`UPDATE`/
282 /// `DELETE`/`MERGE`). `raw_text` is the verbatim statement source
283 /// slice so the IR layer can recover table/column read/write
284 /// dependencies. Empty when the backend could only classify the
285 /// verb.
286 Sql {
287 verb: String,
288 raw_text: String,
289 span: Span,
290 },
291 /// A procedure / function call statement.
292 Call { callee: String, span: Span },
293 /// A statement the backend could not classify (R13).
294 Unknown { span: Span },
295}
296
297impl Spanned for AstStatement {
298 fn span(&self) -> Span {
299 match self {
300 Self::Null { span }
301 | Self::Assignment { span, .. }
302 | Self::If { span, .. }
303 | Self::Loop { span, .. }
304 | Self::Raise { span, .. }
305 | Self::Return { span, .. }
306 | Self::ExecuteImmediate { span, .. }
307 | Self::Sql { span, .. }
308 | Self::Call { span, .. }
309 | Self::Unknown { span } => *span,
310 }
311 }
312}
313
314/// A PL/SQL expression node.
315///
316/// The **syntactic** expression projection — binary ops,
317/// function / procedure calls, cursor + attribute references,
318/// literals, bind / substitution placeholders. One step before
319/// `plsql_ir::Expr` (the semantic IR). `Unknown` satisfies R13.
320///
321/// Every variant carries a `span` (Spanned invariant).
322#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
323pub enum AstExpr {
324 /// A literal (number / string / `NULL` / `TRUE` / `FALSE`),
325 /// kept verbatim so the IR can re-classify precisely.
326 Literal { text: String, span: Span },
327 /// A dotted name reference (`a`, `pkg.fn`, `t.col%TYPE`,
328 /// `c%ROWTYPE`, `:new.id`).
329 Name { path: String, span: Span },
330 /// Bind placeholder (`:1`, `:name`).
331 Bind { name: String, span: Span },
332 /// Substitution variable (`&v`, `&&v`).
333 Substitution {
334 name: String,
335 sticky: bool,
336 span: Span,
337 },
338 /// A call `callee(<args-text>)` — args kept as raw text for
339 /// the IR layer to split + recurse.
340 Call {
341 callee: String,
342 args_text: String,
343 span: Span,
344 },
345 /// Binary op at the top level. Operand slices are raw text.
346 Binary {
347 op: String,
348 lhs_text: String,
349 rhs_text: String,
350 span: Span,
351 },
352 /// Unary op (`NOT` / `-` / `+`).
353 Unary {
354 op: String,
355 operand_text: String,
356 span: Span,
357 },
358 /// An expression the backend could not classify (R13).
359 Unknown { text: String, span: Span },
360}
361
362impl Spanned for AstExpr {
363 fn span(&self) -> Span {
364 match self {
365 Self::Literal { span, .. }
366 | Self::Name { span, .. }
367 | Self::Bind { span, .. }
368 | Self::Substitution { span, .. }
369 | Self::Call { span, .. }
370 | Self::Binary { span, .. }
371 | Self::Unary { span, .. }
372 | Self::Unknown { span, .. } => *span,
373 }
374 }
375}
376
377/// A type declaration.
378///
379/// The **syntactic** projection of `CREATE TYPE … AS OBJECT`,
380/// `TABLE OF` / `VARRAY` collection types, and PL/SQL
381/// `TYPE … IS RECORD` declarations. Attribute / element text is
382/// kept raw for the bindgen layer to resolve. `Unknown` satisfies
383/// R13; every variant is Spanned.
384#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
385pub enum AstTypeDecl {
386 /// `CREATE [OR REPLACE] TYPE <name> AS OBJECT ( … )`.
387 Object {
388 name: String,
389 attributes_text: String,
390 span: Span,
391 },
392 /// `… AS TABLE OF <elem>` (nested table) or
393 /// `… AS VARRAY(n) OF <elem>`.
394 Collection {
395 name: String,
396 element_text: String,
397 is_varray: bool,
398 span: Span,
399 },
400 /// `TYPE <name> IS RECORD ( … )` (PL/SQL record).
401 Record {
402 name: String,
403 fields_text: String,
404 span: Span,
405 },
406 /// A type declaration the backend could not classify (R13).
407 Unknown { text: String, span: Span },
408}
409
410impl Spanned for AstTypeDecl {
411 fn span(&self) -> Span {
412 match self {
413 Self::Object { span, .. }
414 | Self::Collection { span, .. }
415 | Self::Record { span, .. }
416 | Self::Unknown { span, .. } => *span,
417 }
418 }
419}
420
421/// The typed abstract syntax tree.
422///
423/// This is a **semantic** projection — it is NOT required to preserve
424/// whitespace, comments, or exact delimiter positions. Pretty-printing
425/// from the AST produces *equivalent* but not *byte-identical* output.
426#[derive(Clone, Debug, Default, Serialize, Deserialize)]
427pub struct Ast {
428 /// The root source-file node.
429 pub root: SourceFile,
430 /// Source map for AST nodes (maps node IDs to spans).
431 pub source_map: SourceMap,
432 /// Body statements for each top-level declaration, in parallel
433 /// with `root.declarations`. `body_statements[i]` is the lowered
434 /// body of `root.declarations[i]`. Declarations with no body
435 /// (e.g., package specs, views, DDL) carry an empty inner vec.
436 /// Defaulting to empty so existing callers that produce an `Ast`
437 /// without body lowering remain valid (backward-compatible; R13).
438 #[serde(default)]
439 pub body_statements: Vec<Vec<AstStatement>>,
440}
441
442impl Ast {
443 /// Create a new empty AST.
444 #[must_use]
445 pub fn new() -> Self {
446 Self::default()
447 }
448}
449
450// ---------------------------------------------------------------------------
451// Tests
452// ---------------------------------------------------------------------------
453
454#[cfg(test)]
455mod tests {
456 use super::*;
457 use plsql_core::{FileId, Position};
458
459 fn span(offset: u32, len: u32) -> Span {
460 Span::new(
461 FileId::new(0),
462 Position::new(1, 1, offset),
463 Position::new(1, 1, offset + len),
464 )
465 }
466
467 #[test]
468 fn source_map_insert_and_get() {
469 let mut sm = SourceMap::new();
470 let id = CstNodeId(42);
471 let s = span(10, 5);
472 sm.insert(id, s);
473 assert_eq!(sm.get(id), Some(&s));
474 assert_eq!(sm.get(CstNodeId(99)), None);
475 }
476
477 #[test]
478 fn source_map_len() {
479 let mut sm = SourceMap::new();
480 assert!(sm.is_empty());
481 sm.insert(CstNodeId(0), span(0, 1));
482 sm.insert(CstNodeId(1), span(1, 1));
483 assert_eq!(sm.len(), 2);
484 assert!(!sm.is_empty());
485 }
486
487 #[test]
488 fn cst_default_has_empty_source_map() {
489 let cst = ConcreteSyntaxTree::new();
490 assert!(cst.source_map.is_empty());
491 }
492
493 #[test]
494 fn ast_default_has_empty_source_map() {
495 let ast = Ast::new();
496 assert!(ast.source_map.is_empty());
497 }
498
499 #[test]
500 fn source_map_serializes_round_trip() {
501 let mut sm = SourceMap::new();
502 sm.insert(CstNodeId(1), span(0, 10));
503 sm.insert(CstNodeId(5), span(20, 30));
504 let json = serde_json::to_string(&sm).unwrap();
505 let back: SourceMap = serde_json::from_str(&json).unwrap();
506 assert_eq!(back.len(), 2);
507 assert_eq!(back.get(CstNodeId(1)), Some(&span(0, 10)));
508 }
509
510 // -----------------------------------------------------------------------
511 // Spanned trait tests (PLSQL-PARSE-010)
512 // -----------------------------------------------------------------------
513
514 #[test]
515 fn source_file_is_spanned() {
516 let s = span(0, 100);
517 let sf = SourceFile {
518 span: s,
519 declarations: Vec::new(),
520 };
521 assert_eq!(sf.span(), s);
522 }
523
524 #[test]
525 fn ast_decl_all_variants_are_spanned() {
526 let s = span(10, 20);
527 let decls = vec![
528 AstDecl::PackageSpec {
529 name: "pkg".into(),
530 span: s,
531 },
532 AstDecl::PackageBody {
533 name: "pkg".into(),
534 span: s,
535 },
536 AstDecl::Procedure {
537 name: "p".into(),
538 span: s,
539 },
540 AstDecl::Function {
541 name: "f".into(),
542 span: s,
543 },
544 AstDecl::Trigger {
545 name: "t".into(),
546 span: s,
547 },
548 AstDecl::View {
549 name: "v".into(),
550 span: s,
551 },
552 AstDecl::TypeSpec {
553 name: "ty".into(),
554 span: s,
555 },
556 AstDecl::TypeBody {
557 name: "ty".into(),
558 span: s,
559 },
560 AstDecl::Ddl {
561 kind: "CREATE".into(),
562 span: s,
563 antlr_rule_path: None,
564 },
565 AstDecl::Unknown {
566 span: s,
567 antlr_rule_path: None,
568 },
569 ];
570
571 for decl in &decls {
572 assert_eq!(
573 decl.span(),
574 s,
575 "Spanned::span() returned wrong span for variant"
576 );
577 }
578 }
579
580 #[test]
581 fn spanned_trait_is_object_safe() {
582 // Verify that Spanned can be used as a trait object
583 fn take_spanned(node: &dyn Spanned) -> Span {
584 node.span()
585 }
586 let s = span(0, 50);
587 let sf = SourceFile {
588 span: s,
589 declarations: Vec::new(),
590 };
591 assert_eq!(take_spanned(&sf), s);
592 }
593}