plsql_parser/lib.rs
1#![forbid(unsafe_code)]
2
3//! PL/SQL parser frontend.
4//!
5//! This crate defines the backend-independent parsing API that all downstream
6//! crates consume. No ANTLR-generated types or grammar rule names escape this
7//! boundary (R2 / R20).
8//!
9//! # Design
10//!
11//! A [`ParseBackend`] implementation converts raw source text into a
12//! [`BackendParseResult`] containing the lossless **token tape**, a **CST**
13//! (concrete syntax tree), and a typed **AST** (abstract syntax tree).
14//!
15//! The public [`parse_file`] / [`parse_with_backend`] functions wrap
16//! [`BackendParseResult`] into a [`ParseResult`] that pairs the output with
17//! the originating [`FileId`].
18//!
19//! # Lossless contract
20//!
21//! The token tape is the source of truth for round-tripping. Every token and
22//! trivia element carries a byte-offset span. The AST is a *semantic*
23//! projection — it is NOT required to preserve whitespace or comments.
24
25pub mod ast;
26pub mod dialect;
27pub mod tokens;
28pub mod visit;
29
30use plsql_core::{Diagnostic, FileId};
31use serde::{Deserialize, Serialize};
32use tracing::instrument;
33
34pub use dialect::{
35 UNSUPPORTED_DIALECT_FEATURE_CODE, unsupported_dialect_feature_diagnostic,
36 unsupported_dialect_feature_remediation,
37};
38
39pub use ast::{
40 Ast, AstDecl, AstExpr, AstStatement, AstTypeDecl, ConcreteSyntaxTree, CstNodeId, SourceFile,
41 SourceMap, Spanned,
42};
43pub use tokens::{Token, TokenKind, TokenTape, Trivia, TriviaTable};
44
45// ---------------------------------------------------------------------------
46// ParseOptions
47// ---------------------------------------------------------------------------
48
49/// Configuration knobs passed to every parse invocation.
50#[derive(Clone, Debug, Serialize, Deserialize)]
51pub struct ParseOptions {
52 /// Which Oracle version to target (affects feature-gating in later passes).
53 pub oracle_version: OracleTargetVersion,
54 /// Whether the backend should attempt error recovery on syntax errors.
55 pub recovery: RecoveryMode,
56}
57
58impl Default for ParseOptions {
59 fn default() -> Self {
60 Self {
61 oracle_version: OracleTargetVersion::Oracle19c,
62 recovery: RecoveryMode::RecoverAtStatementBoundary,
63 }
64 }
65}
66
67/// Simplified Oracle version targeting for the parser.
68///
69/// This is intentionally *not* the same as `plsql_core::OracleVersion` — the
70/// parser uses a smaller enum that only covers what the grammar supports.
71/// Full version/feature policy lives in `AnalysisProfile`.
72#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
73pub enum OracleTargetVersion {
74 Oracle11g,
75 Oracle12c,
76 #[default]
77 Oracle19c,
78 Oracle21c,
79 Oracle23ai,
80 Oracle26ai,
81}
82
83/// Error-recovery strategy.
84#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash, Serialize, Deserialize)]
85pub enum RecoveryMode {
86 /// Stop at the first syntax error.
87 FailFast,
88 /// Skip to the next statement boundary (`;` or `/`) and continue.
89 #[default]
90 RecoverAtStatementBoundary,
91 /// Aggressively recover at any plausible boundary (for corpus fuzzing).
92 AggressiveRecovery,
93}
94
95// ---------------------------------------------------------------------------
96// ParseMetrics
97// ---------------------------------------------------------------------------
98
99/// Observability counters emitted alongside every parse result.
100#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
101pub struct ParseMetrics {
102 /// Total tokens produced by the lexer.
103 pub total_tokens: u64,
104 /// Number of trivia elements (whitespace, comments) captured.
105 pub trivia_count: u64,
106 /// Number of diagnostics emitted.
107 pub diagnostic_count: u64,
108 /// Number of recovery sites used (0 for a clean parse).
109 pub recovery_count: u64,
110 /// Number of bytes in the original source.
111 pub source_bytes: u64,
112}
113
114// ---------------------------------------------------------------------------
115// BackendParseResult
116// ---------------------------------------------------------------------------
117
118/// Raw output from a [`ParseBackend`] implementation.
119///
120/// This is the backend's *internal* result type. The public API wraps it in
121/// [`ParseResult`], which adds the originating `FileId`.
122#[derive(Debug)]
123pub struct BackendParseResult {
124 /// The lossless concrete syntax tree.
125 pub cst: ConcreteSyntaxTree,
126 /// The typed abstract syntax tree (semantic projection).
127 pub ast: Ast,
128 /// Diagnostics emitted during lexing and parsing.
129 pub diagnostics: Vec<Diagnostic>,
130 /// Observability counters.
131 pub metrics: ParseMetrics,
132 /// `true` if error recovery was used at least once.
133 pub recovered: bool,
134}
135
136// ---------------------------------------------------------------------------
137// ParseResult
138// ---------------------------------------------------------------------------
139
140/// Public-facing parse result, paired with the file that produced it.
141#[derive(Debug)]
142pub struct ParseResult {
143 /// Which file this result came from.
144 pub file_id: FileId,
145 /// The lossless concrete syntax tree.
146 pub cst: ConcreteSyntaxTree,
147 /// The typed abstract syntax tree.
148 pub ast: Ast,
149 /// Diagnostics emitted during lexing and parsing.
150 pub diagnostics: Vec<Diagnostic>,
151 /// Observability counters.
152 pub metrics: ParseMetrics,
153 /// `true` if error recovery was used at least once.
154 pub recovered: bool,
155}
156
157impl ParseResult {
158 /// Returns `true` if the parse completed without any diagnostics at
159 /// [`Severity::Error`](plsql_core::Severity::Error) or above.
160 #[must_use]
161 #[instrument(level = "trace", skip(self))]
162 pub fn is_clean(&self) -> bool {
163 !self
164 .diagnostics
165 .iter()
166 .any(|d| d.severity >= plsql_core::Severity::Error)
167 }
168
169 /// Returns `true` if error recovery was used.
170 #[must_use]
171 #[instrument(level = "trace", skip(self))]
172 pub fn was_recovered(&self) -> bool {
173 self.recovered
174 }
175}
176
177// ---------------------------------------------------------------------------
178// ParseBackend trait
179// ---------------------------------------------------------------------------
180
181/// Backend-independent parser interface (R2 / R20).
182///
183/// Parser backends implement this trait behind the R20 isolation boundary.
184/// Backend-internal types (ANTLR parse trees, grammar rule names) are strictly
185/// private to the implementing crate.
186///
187/// The conformance test suite in `tests/conformance.rs` validates that all
188/// backends behave identically on a canonical fixture set.
189pub trait ParseBackend: Send + Sync {
190 /// Human-readable backend name (e.g. `"antlr4rust"`).
191 fn name(&self) -> &'static str;
192
193 /// Parse the given source text and return a [`BackendParseResult`].
194 ///
195 /// # Contract
196 ///
197 /// - MUST NOT panic on any input (adversarial or otherwise).
198 /// - MUST populate `cst.token_tape` such that `reconstruct(tape) == input`
199 /// byte-for-byte (the lossless round-trip property).
200 /// - MUST emit at least one diagnostic per syntax error encountered.
201 /// - MUST set `recovered = true` if recovery was used.
202 fn parse(&self, input: &str, file_id: FileId, opts: &ParseOptions) -> BackendParseResult;
203}
204
205// ---------------------------------------------------------------------------
206// Public convenience functions
207// ---------------------------------------------------------------------------
208
209/// Parse a single file with the given backend and options.
210#[instrument(level = "debug", skip(backend, opts))]
211pub fn parse_with_backend<B: ParseBackend>(
212 input: &str,
213 file_id: FileId,
214 backend: &B,
215 opts: &ParseOptions,
216) -> ParseResult {
217 let span = tracing::info_span!("parse_with_backend", backend = backend.name());
218 let _enter = span.enter();
219
220 let backend_result = backend.parse(input, file_id, opts);
221
222 ParseResult {
223 file_id,
224 cst: backend_result.cst,
225 ast: backend_result.ast,
226 diagnostics: backend_result.diagnostics,
227 metrics: backend_result.metrics,
228 recovered: backend_result.recovered,
229 }
230}
231
232/// Parse a single file with the given backend, using default [`ParseOptions`].
233///
234/// This is a thin convenience wrapper over [`parse_with_backend`] for the
235/// common case where the caller does not need to customize parse options
236/// (Oracle 19c target, statement-boundary recovery).
237///
238/// A backend is supplied explicitly: this crate is the *backend-independent*
239/// parsing surface (R2 / R20) and intentionally has no knowledge of any
240/// concrete backend. Callers that need a zero-configuration entry point
241/// construct their chosen backend once and pass it here.
242///
243/// ```
244/// # use plsql_parser::{parse_file, ParseBackend, BackendParseResult,
245/// # ParseOptions, ParseMetrics, Ast, ConcreteSyntaxTree};
246/// # use plsql_core::FileId;
247/// # struct MyBackend;
248/// # impl ParseBackend for MyBackend {
249/// # fn name(&self) -> &'static str { "doc" }
250/// # fn parse(&self, _i: &str, _f: FileId, _o: &ParseOptions) -> BackendParseResult {
251/// # BackendParseResult {
252/// # cst: ConcreteSyntaxTree::new(), ast: Ast::new(),
253/// # diagnostics: Vec::new(), metrics: ParseMetrics::default(),
254/// # recovered: false,
255/// # }
256/// # }
257/// # }
258/// let result = parse_file("BEGIN NULL; END;", FileId::new(1), &MyBackend);
259/// assert!(result.is_clean());
260/// ```
261#[instrument(level = "debug", skip(backend))]
262pub fn parse_file<B: ParseBackend>(input: &str, file_id: FileId, backend: &B) -> ParseResult {
263 parse_with_backend(input, file_id, backend, &ParseOptions::default())
264}
265
266// ---------------------------------------------------------------------------
267// Tests
268// ---------------------------------------------------------------------------
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn parse_options_default_is_19c_with_recovery() {
276 let opts = ParseOptions::default();
277 assert_eq!(opts.oracle_version, OracleTargetVersion::Oracle19c);
278 assert_eq!(opts.recovery, RecoveryMode::RecoverAtStatementBoundary);
279 }
280
281 #[test]
282 fn parse_options_round_trips_through_json() {
283 let opts = ParseOptions::default();
284 let json = serde_json::to_string(&opts).unwrap();
285 let back: ParseOptions = serde_json::from_str(&json).unwrap();
286 assert_eq!(back.oracle_version, OracleTargetVersion::Oracle19c);
287 assert_eq!(back.recovery, RecoveryMode::RecoverAtStatementBoundary);
288 }
289
290 #[test]
291 fn parse_metrics_default_is_zero() {
292 let m = ParseMetrics::default();
293 assert_eq!(m.total_tokens, 0);
294 assert_eq!(m.trivia_count, 0);
295 assert_eq!(m.diagnostic_count, 0);
296 assert_eq!(m.recovery_count, 0);
297 assert_eq!(m.source_bytes, 0);
298 }
299
300 // -----------------------------------------------------------------
301 // parse_file — convenience entry point over an explicit backend
302 // -----------------------------------------------------------------
303
304 /// A faithful in-test [`ParseBackend`] that records every [`ParseOptions`]
305 /// value it is handed, so tests can prove `parse_file` forwards the
306 /// expected defaults rather than fabricating them.
307 struct RecordingBackend {
308 seen_opts: std::sync::Mutex<Vec<ParseOptions>>,
309 }
310
311 impl RecordingBackend {
312 fn new() -> Self {
313 Self {
314 seen_opts: std::sync::Mutex::new(Vec::new()),
315 }
316 }
317 }
318
319 impl ParseBackend for RecordingBackend {
320 fn name(&self) -> &'static str {
321 "recording"
322 }
323
324 fn parse(&self, input: &str, _file_id: FileId, opts: &ParseOptions) -> BackendParseResult {
325 self.seen_opts
326 .lock()
327 .expect("opts mutex poisoned")
328 .push(opts.clone());
329 BackendParseResult {
330 cst: ConcreteSyntaxTree::new(),
331 ast: Ast::new(),
332 diagnostics: Vec::new(),
333 metrics: ParseMetrics {
334 source_bytes: input.len() as u64,
335 ..ParseMetrics::default()
336 },
337 recovered: false,
338 }
339 }
340 }
341
342 #[test]
343 fn parse_file_forwards_default_parse_options() {
344 let backend = RecordingBackend::new();
345 let _ = parse_file("BEGIN NULL; END;", FileId::new(1), &backend);
346
347 let seen = backend.seen_opts.lock().expect("opts mutex poisoned");
348 assert_eq!(seen.len(), 1, "backend must be invoked exactly once");
349 assert_eq!(seen[0].oracle_version, OracleTargetVersion::Oracle19c);
350 assert_eq!(seen[0].recovery, RecoveryMode::RecoverAtStatementBoundary);
351 }
352
353 #[test]
354 fn parse_file_pairs_result_with_its_file_id() {
355 let backend = RecordingBackend::new();
356 let result = parse_file("SELECT 1 FROM dual;", FileId::new(42), &backend);
357 assert_eq!(result.file_id, FileId::new(42));
358 }
359
360 #[test]
361 fn parse_file_propagates_backend_metrics() {
362 let backend = RecordingBackend::new();
363 let input = "CREATE PACKAGE p IS END;";
364 let result = parse_file(input, FileId::new(7), &backend);
365 assert_eq!(result.metrics.source_bytes, input.len() as u64);
366 assert!(
367 result.is_clean(),
368 "a clean recording parse carries no error diagnostics"
369 );
370 assert!(!result.was_recovered());
371 }
372
373 #[test]
374 fn parse_file_handles_empty_input_without_panicking() {
375 let backend = RecordingBackend::new();
376 let result = parse_file("", FileId::new(0), &backend);
377 assert_eq!(result.metrics.source_bytes, 0);
378 assert_eq!(result.file_id, FileId::new(0));
379 }
380}