Skip to main content

plsql_parser/
lib.rs

1#![forbid(unsafe_code)]
2
3//! PL/SQL parser frontend.
4//!
5//! This crate defines the backend-independent parsing API that all downstream
6//! crates consume.  No ANTLR-generated types or grammar rule names escape this
7//! boundary (R2 / R20).
8//!
9//! # Design
10//!
11//! A [`ParseBackend`] implementation converts raw source text into a
12//! [`BackendParseResult`] containing the lossless **token tape**, a **CST**
13//! (concrete syntax tree), and a typed **AST** (abstract syntax tree).
14//!
15//! The public [`parse_file`] / [`parse_with_backend`] functions wrap
16//! [`BackendParseResult`] into a [`ParseResult`] that pairs the output with
17//! the originating [`FileId`].
18//!
19//! # Lossless contract
20//!
21//! The token tape is the source of truth for round-tripping.  Every token and
22//! trivia element carries a byte-offset span.  The AST is a *semantic*
23//! projection — it is NOT required to preserve whitespace or comments.
24
25pub mod ast;
26pub mod dialect;
27pub mod tokens;
28pub mod visit;
29
30use plsql_core::{Diagnostic, FileId};
31use serde::{Deserialize, Serialize};
32use tracing::instrument;
33
34pub use dialect::{
35    UNSUPPORTED_DIALECT_FEATURE_CODE, unsupported_dialect_feature_diagnostic,
36    unsupported_dialect_feature_remediation,
37};
38
39pub use ast::{
40    Ast, AstDecl, AstExpr, AstStatement, AstTypeDecl, ConcreteSyntaxTree, CstNodeId, SourceFile,
41    SourceMap, Spanned,
42};
43pub use tokens::{Token, TokenKind, TokenTape, Trivia, TriviaTable};
44
45// ---------------------------------------------------------------------------
46// ParseOptions
47// ---------------------------------------------------------------------------
48
49/// Configuration knobs passed to every parse invocation.
50#[derive(Clone, Debug, Serialize, Deserialize)]
51pub struct ParseOptions {
52    /// Which Oracle version to target (affects feature-gating in later passes).
53    pub oracle_version: OracleTargetVersion,
54    /// Whether the backend should attempt error recovery on syntax errors.
55    pub recovery: RecoveryMode,
56}
57
58impl Default for ParseOptions {
59    fn default() -> Self {
60        Self {
61            oracle_version: OracleTargetVersion::Oracle19c,
62            recovery: RecoveryMode::RecoverAtStatementBoundary,
63        }
64    }
65}
66
67/// Simplified Oracle version targeting for the parser.
68///
69/// This is intentionally *not* the same as `plsql_core::OracleVersion` — the
70/// parser uses a smaller enum that only covers what the grammar supports.
71/// Full version/feature policy lives in `AnalysisProfile`.
72#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
73pub enum OracleTargetVersion {
74    Oracle11g,
75    Oracle12c,
76    #[default]
77    Oracle19c,
78    Oracle21c,
79    Oracle23ai,
80    Oracle26ai,
81}
82
83/// Error-recovery strategy.
84#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
85pub enum RecoveryMode {
86    /// Stop at the first syntax error.
87    FailFast,
88    /// Skip to the next statement boundary (`;` or `/`) and continue.
89    #[default]
90    RecoverAtStatementBoundary,
91    /// Aggressively recover at any plausible boundary (for corpus fuzzing).
92    AggressiveRecovery,
93}
94
95// ---------------------------------------------------------------------------
96// ParseMetrics
97// ---------------------------------------------------------------------------
98
99/// Observability counters emitted alongside every parse result.
100#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
101pub struct ParseMetrics {
102    /// Total tokens produced by the lexer.
103    pub total_tokens: u64,
104    /// Number of trivia elements (whitespace, comments) captured.
105    pub trivia_count: u64,
106    /// Number of diagnostics emitted.
107    pub diagnostic_count: u64,
108    /// Number of recovery sites used (0 for a clean parse).
109    pub recovery_count: u64,
110    /// Number of bytes in the original source.
111    pub source_bytes: u64,
112}
113
114// ---------------------------------------------------------------------------
115// BackendParseResult
116// ---------------------------------------------------------------------------
117
118/// Raw output from a [`ParseBackend`] implementation.
119///
120/// This is the backend's *internal* result type.  The public API wraps it in
121/// [`ParseResult`], which adds the originating `FileId`.
122#[derive(Debug)]
123pub struct BackendParseResult {
124    /// The lossless concrete syntax tree.
125    pub cst: ConcreteSyntaxTree,
126    /// The typed abstract syntax tree (semantic projection).
127    pub ast: Ast,
128    /// Diagnostics emitted during lexing and parsing.
129    pub diagnostics: Vec<Diagnostic>,
130    /// Observability counters.
131    pub metrics: ParseMetrics,
132    /// `true` if error recovery was used at least once.
133    pub recovered: bool,
134}
135
136// ---------------------------------------------------------------------------
137// ParseResult
138// ---------------------------------------------------------------------------
139
140/// Public-facing parse result, paired with the file that produced it.
141#[derive(Debug)]
142pub struct ParseResult {
143    /// Which file this result came from.
144    pub file_id: FileId,
145    /// The lossless concrete syntax tree.
146    pub cst: ConcreteSyntaxTree,
147    /// The typed abstract syntax tree.
148    pub ast: Ast,
149    /// Diagnostics emitted during lexing and parsing.
150    pub diagnostics: Vec<Diagnostic>,
151    /// Observability counters.
152    pub metrics: ParseMetrics,
153    /// `true` if error recovery was used at least once.
154    pub recovered: bool,
155}
156
157impl ParseResult {
158    /// Returns `true` if the parse completed without any diagnostics at
159    /// [`Severity::Error`](plsql_core::Severity::Error) or above.
160    #[must_use]
161    #[instrument(level = "trace", skip(self))]
162    pub fn is_clean(&self) -> bool {
163        !self
164            .diagnostics
165            .iter()
166            .any(|d| d.severity >= plsql_core::Severity::Error)
167    }
168
169    /// Returns `true` if error recovery was used.
170    #[must_use]
171    #[instrument(level = "trace", skip(self))]
172    pub fn was_recovered(&self) -> bool {
173        self.recovered
174    }
175}
176
177// ---------------------------------------------------------------------------
178// ParseBackend trait
179// ---------------------------------------------------------------------------
180
181/// Backend-independent parser interface (R2 / R20).
182///
183/// Parser backends implement this trait behind the R20 isolation boundary.
184/// Backend-internal types (ANTLR parse trees, grammar rule names) are strictly
185/// private to the implementing crate.
186///
187/// The conformance test suite in `tests/conformance.rs` validates that all
188/// backends behave identically on a canonical fixture set.
189pub trait ParseBackend: Send + Sync {
190    /// Human-readable backend name (e.g. `"antlr4rust"`).
191    fn name(&self) -> &'static str;
192
193    /// Parse the given source text and return a [`BackendParseResult`].
194    ///
195    /// # Contract
196    ///
197    /// - MUST NOT panic on any input (adversarial or otherwise).
198    /// - MUST populate `cst.token_tape` such that `reconstruct(tape) == input`
199    ///   byte-for-byte (the lossless round-trip property).
200    /// - MUST emit at least one diagnostic per syntax error encountered.
201    /// - MUST set `recovered = true` if recovery was used.
202    fn parse(&self, input: &str, file_id: FileId, opts: &ParseOptions) -> BackendParseResult;
203}
204
205// ---------------------------------------------------------------------------
206// Public convenience functions
207// ---------------------------------------------------------------------------
208
209/// Parse a single file with the given backend and options.
210#[instrument(level = "debug", skip(backend, opts))]
211pub fn parse_with_backend<B: ParseBackend>(
212    input: &str,
213    file_id: FileId,
214    backend: &B,
215    opts: &ParseOptions,
216) -> ParseResult {
217    let span = tracing::info_span!("parse_with_backend", backend = backend.name());
218    let _enter = span.enter();
219
220    let backend_result = backend.parse(input, file_id, opts);
221
222    ParseResult {
223        file_id,
224        cst: backend_result.cst,
225        ast: backend_result.ast,
226        diagnostics: backend_result.diagnostics,
227        metrics: backend_result.metrics,
228        recovered: backend_result.recovered,
229    }
230}
231
232/// Parse a single file with the given backend, using default [`ParseOptions`].
233///
234/// This is a thin convenience wrapper over [`parse_with_backend`] for the
235/// common case where the caller does not need to customize parse options
236/// (Oracle 19c target, statement-boundary recovery).
237///
238/// A backend is supplied explicitly: this crate is the *backend-independent*
239/// parsing surface (R2 / R20) and intentionally has no knowledge of any
240/// concrete backend. Callers that need a zero-configuration entry point
241/// construct their chosen backend once and pass it here.
242///
243/// ```
244/// # use plsql_parser::{parse_file, ParseBackend, BackendParseResult,
245/// #     ParseOptions, ParseMetrics, Ast, ConcreteSyntaxTree};
246/// # use plsql_core::FileId;
247/// # struct MyBackend;
248/// # impl ParseBackend for MyBackend {
249/// #     fn name(&self) -> &'static str { "doc" }
250/// #     fn parse(&self, _i: &str, _f: FileId, _o: &ParseOptions) -> BackendParseResult {
251/// #         BackendParseResult {
252/// #             cst: ConcreteSyntaxTree::new(), ast: Ast::new(),
253/// #             diagnostics: Vec::new(), metrics: ParseMetrics::default(),
254/// #             recovered: false,
255/// #         }
256/// #     }
257/// # }
258/// let result = parse_file("BEGIN NULL; END;", FileId::new(1), &MyBackend);
259/// assert!(result.is_clean());
260/// ```
261#[instrument(level = "debug", skip(backend))]
262pub fn parse_file<B: ParseBackend>(input: &str, file_id: FileId, backend: &B) -> ParseResult {
263    parse_with_backend(input, file_id, backend, &ParseOptions::default())
264}
265
266// ---------------------------------------------------------------------------
267// Tests
268// ---------------------------------------------------------------------------
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn parse_options_default_is_19c_with_recovery() {
276        let opts = ParseOptions::default();
277        assert_eq!(opts.oracle_version, OracleTargetVersion::Oracle19c);
278        assert_eq!(opts.recovery, RecoveryMode::RecoverAtStatementBoundary);
279    }
280
281    #[test]
282    fn parse_options_round_trips_through_json() {
283        let opts = ParseOptions::default();
284        let json = serde_json::to_string(&opts).unwrap();
285        let back: ParseOptions = serde_json::from_str(&json).unwrap();
286        assert_eq!(back.oracle_version, OracleTargetVersion::Oracle19c);
287        assert_eq!(back.recovery, RecoveryMode::RecoverAtStatementBoundary);
288    }
289
290    #[test]
291    fn parse_metrics_default_is_zero() {
292        let m = ParseMetrics::default();
293        assert_eq!(m.total_tokens, 0);
294        assert_eq!(m.trivia_count, 0);
295        assert_eq!(m.diagnostic_count, 0);
296        assert_eq!(m.recovery_count, 0);
297        assert_eq!(m.source_bytes, 0);
298    }
299
300    // -----------------------------------------------------------------
301    // parse_file — convenience entry point over an explicit backend
302    // -----------------------------------------------------------------
303
304    /// A faithful in-test [`ParseBackend`] that records every [`ParseOptions`]
305    /// value it is handed, so tests can prove `parse_file` forwards the
306    /// expected defaults rather than fabricating them.
307    struct RecordingBackend {
308        seen_opts: std::sync::Mutex<Vec<ParseOptions>>,
309    }
310
311    impl RecordingBackend {
312        fn new() -> Self {
313            Self {
314                seen_opts: std::sync::Mutex::new(Vec::new()),
315            }
316        }
317    }
318
319    impl ParseBackend for RecordingBackend {
320        fn name(&self) -> &'static str {
321            "recording"
322        }
323
324        fn parse(&self, input: &str, _file_id: FileId, opts: &ParseOptions) -> BackendParseResult {
325            self.seen_opts
326                .lock()
327                .expect("opts mutex poisoned")
328                .push(opts.clone());
329            BackendParseResult {
330                cst: ConcreteSyntaxTree::new(),
331                ast: Ast::new(),
332                diagnostics: Vec::new(),
333                metrics: ParseMetrics {
334                    source_bytes: input.len() as u64,
335                    ..ParseMetrics::default()
336                },
337                recovered: false,
338            }
339        }
340    }
341
342    #[test]
343    fn parse_file_forwards_default_parse_options() {
344        let backend = RecordingBackend::new();
345        let _ = parse_file("BEGIN NULL; END;", FileId::new(1), &backend);
346
347        let seen = backend.seen_opts.lock().expect("opts mutex poisoned");
348        assert_eq!(seen.len(), 1, "backend must be invoked exactly once");
349        assert_eq!(seen[0].oracle_version, OracleTargetVersion::Oracle19c);
350        assert_eq!(seen[0].recovery, RecoveryMode::RecoverAtStatementBoundary);
351    }
352
353    #[test]
354    fn parse_file_pairs_result_with_its_file_id() {
355        let backend = RecordingBackend::new();
356        let result = parse_file("SELECT 1 FROM dual;", FileId::new(42), &backend);
357        assert_eq!(result.file_id, FileId::new(42));
358    }
359
360    #[test]
361    fn parse_file_propagates_backend_metrics() {
362        let backend = RecordingBackend::new();
363        let input = "CREATE PACKAGE p IS END;";
364        let result = parse_file(input, FileId::new(7), &backend);
365        assert_eq!(result.metrics.source_bytes, input.len() as u64);
366        assert!(
367            result.is_clean(),
368            "a clean recording parse carries no error diagnostics"
369        );
370        assert!(!result.was_recovered());
371    }
372
373    #[test]
374    fn parse_file_handles_empty_input_without_panicking() {
375        let backend = RecordingBackend::new();
376        let result = parse_file("", FileId::new(0), &backend);
377        assert_eq!(result.metrics.source_bytes, 0);
378        assert_eq!(result.file_id, FileId::new(0));
379    }
380}