syntaqlite_syntax/parser/mod.rs
1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4use std::cell::RefCell;
5use std::ffi::CStr;
6use std::marker::PhantomData;
7use std::ptr::NonNull;
8use std::rc::Rc;
9
10use crate::any::{AnyNodeTag, AnyTokenType};
11use crate::ast::{AnyNodeId, ArenaNode, GrammarNodeType, GrammarTokenType, RawNodeList};
12use crate::grammar::{AnyGrammar, TypedGrammar};
13
14mod config;
15mod ffi;
16mod incremental;
17#[cfg(feature = "sqlite")]
18mod session;
19mod types;
20
21pub use config::ParserConfig;
22#[cfg(feature = "sqlite")]
23pub use incremental::IncrementalParseSession;
24pub use incremental::{AnyIncrementalParseSession, TypedIncrementalParseSession};
25#[cfg(feature = "sqlite")]
26pub use session::{ParseError, ParseSession, ParsedStatement, Parser, ParserToken};
27pub use types::{
28 AnyParserToken, Comment, CommentKind, CommentSpan, CompletionContext, MacroRegion,
29 ParseOutcome, ParserTokenFlags, TypedParserToken,
30};
31
32/// Indicates whether parsing can continue after an error.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34#[repr(u32)]
35pub enum ParseErrorKind {
36 /// Parsing recovered to the next statement boundary.
37 ///
38 /// In plain terms: this statement had a syntax error, but the parser was
39 /// still able to skip forward (usually to the next `;`) and continue with
40 /// later statements.
41 ///
42 /// The current statement can include `Error` AST nodes where invalid input
43 /// was skipped.
44 ///
45 /// A partial AST may still be available for diagnostics.
46 Recovered = 1,
47 /// Parsing could not recover for this statement/input.
48 ///
49 /// In plain terms: the parser hit a syntax error and could not find a safe
50 /// point to continue from.
51 ///
52 /// No reliable tree is available, and callers should usually stop reading
53 /// further results from this session.
54 Fatal = 2,
55}
56
57/// Parser API parameterized by grammar type `G`.
58///
59/// Primarily for library/framework code over generated grammars.
60///
61/// - Use this when grammar type is known at compile time.
62/// - Use top-level [`Parser`] for typical `SQLite` SQL app code.
63pub struct TypedParser<G: TypedGrammar> {
64 inner: Rc<RefCell<Option<ParserInner>>>,
65 grammar: AnyGrammar,
66 _marker: PhantomData<G>,
67}
68
69impl<G: TypedGrammar> TypedParser<G> {
70 /// Create a parser for grammar `G` with default [`ParserConfig`].
71 pub fn new(grammar: G) -> Self {
72 Self::with_config(grammar, &ParserConfig::default())
73 }
74
75 /// Create a parser for grammar `G` with custom [`ParserConfig`].
76 ///
77 /// # Panics
78 /// Panics if parser allocation fails (out of memory).
79 pub fn with_config(grammar: G, config: &ParserConfig) -> Self {
80 let grammar_raw: AnyGrammar = grammar.into();
81 // SAFETY: create(NULL, grammar_raw.inner) allocates a new parser with
82 // default malloc/free. The C side copies the grammar.
83 let mut raw = NonNull::new(unsafe { CParser::create(std::ptr::null(), grammar_raw.inner) })
84 .expect("parser allocation failed");
85
86 // SAFETY: raw is freshly created (not sealed), so these calls always return 0.
87 unsafe {
88 raw.as_mut().set_trace(u32::from(config.trace()));
89 raw.as_mut()
90 .set_collect_tokens(u32::from(config.collect_tokens()));
91 raw.as_mut()
92 .set_macro_fallback(u32::from(config.macro_fallback()));
93 }
94
95 TypedParser {
96 inner: Rc::new(RefCell::new(Some(ParserInner {
97 raw,
98 source_buf: Vec::new(),
99 }))),
100 grammar: grammar_raw,
101 _marker: PhantomData,
102 }
103 }
104
105 /// Parse a SQL script and return a typed statement session.
106 ///
107 /// # Examples
108 ///
109 /// ```rust
110 /// use syntaqlite_syntax::typed::{grammar, TypedParser};
111 /// use syntaqlite_syntax::ParseOutcome;
112 ///
113 /// let parser = TypedParser::new(grammar());
114 /// let mut session = parser.parse("SELECT 1;");
115 /// let stmt = match session.next() {
116 /// ParseOutcome::Ok(stmt) => stmt,
117 /// ParseOutcome::Done => panic!("expected statement"),
118 /// ParseOutcome::Err(err) => panic!("unexpected parse error: {err}"),
119 /// };
120 /// assert!(stmt.root().is_some());
121 /// ```
122 ///
123 /// # Panics
124 ///
125 /// Panics if another session from this parser is still active.
126 /// Drop the previous session before starting a new one.
127 pub fn parse(&self, source: &str) -> TypedParseSession<G> {
128 let mut inner = self
129 .inner
130 .borrow_mut()
131 .take()
132 .expect("TypedParser::parse called while a session is still active");
133 // SAFETY: inner.raw is valid (owned via ParserInner); source is
134 // copied into source_buf which will be owned by the session.
135 unsafe { reset_parser(inner.raw.as_ptr(), &mut inner.source_buf, source) };
136 TypedParseSession {
137 grammar: self.grammar.clone(),
138 inner: Some(inner),
139 slot: Rc::clone(&self.inner),
140 _marker: PhantomData,
141 }
142 }
143
144 /// Start incremental parsing for grammar `G`.
145 ///
146 /// Use this when tokens arrive over time (editor completion, interactive
147 /// parsing, macro-expansion pipelines).
148 ///
149 /// # Examples
150 ///
151 /// ```rust
152 /// use syntaqlite_syntax::typed::{grammar, TypedParser};
153 /// use syntaqlite_syntax::TokenType;
154 ///
155 /// let parser = TypedParser::new(grammar());
156 /// let mut session = parser.incremental_parse("SELECT 1");
157 ///
158 /// let _ = session.feed_token(TokenType::Select, 0..6);
159 /// let _ = session.feed_token(TokenType::Integer, 7..8);
160 /// let _ = session.finish();
161 /// ```
162 ///
163 /// # Panics
164 ///
165 /// Panics if another session from this parser is still active.
166 /// Drop the previous session before starting a new one.
167 /// Register a template macro with the parser.
168 ///
169 /// The macro `name` will be expanded when `name!(args)` is encountered
170 /// during batch parsing (`parse()`). The `body` uses `$param` placeholders
171 /// that are substituted with the corresponding arguments.
172 ///
173 /// # Panics
174 ///
175 /// Panics if another session from this parser is still active.
176 pub fn register_macro(&mut self, name: &str, params: &[&str], body: &str) {
177 let mut inner_ref = self.inner.borrow_mut();
178 let inner = inner_ref
179 .as_mut()
180 .expect("register_macro called while a session is still active");
181 // The C side uses strlen() on param names, so they must be NUL-terminated.
182 let param_cstrings: Vec<std::ffi::CString> = params
183 .iter()
184 .map(|p| std::ffi::CString::new(*p).expect("param name must not contain NUL"))
185 .collect();
186 let param_ptrs: Vec<*const std::ffi::c_char> =
187 param_cstrings.iter().map(|c| c.as_ptr()).collect();
188 // SAFETY: inner.raw is valid; all string pointers are valid for the
189 // duration of the C call (which copies them).
190 #[expect(clippy::cast_possible_truncation)]
191 unsafe {
192 inner.raw.as_mut().register_macro(
193 name.as_ptr().cast(),
194 name.len() as u32,
195 param_ptrs.as_ptr(),
196 params.len() as u32,
197 body.as_ptr().cast(),
198 body.len() as u32,
199 );
200 }
201 }
202
203 /// Deregister a macro by name.
204 ///
205 /// Returns `true` if the macro was found and removed.
206 ///
207 /// # Panics
208 ///
209 /// Panics if another session from this parser is still active.
210 pub fn deregister_macro(&mut self, name: &str) -> bool {
211 let mut inner_ref = self.inner.borrow_mut();
212 let inner = inner_ref
213 .as_mut()
214 .expect("deregister_macro called while a session is still active");
215 #[expect(clippy::cast_possible_truncation)]
216 // SAFETY: inner.raw is valid; name pointer is valid for the C call duration.
217 let rc = unsafe {
218 inner
219 .raw
220 .as_mut()
221 .deregister_macro(name.as_ptr().cast(), name.len() as u32)
222 };
223 rc == 0
224 }
225
226 /// Start incremental parsing for grammar `G`.
227 ///
228 /// Use this when tokens arrive over time (editor completion, interactive
229 /// parsing, macro-expansion pipelines).
230 ///
231 /// # Panics
232 ///
233 /// Panics if another session from this parser is still active.
234 /// Drop the previous session before starting a new one.
235 pub fn incremental_parse(&self, source: &str) -> TypedIncrementalParseSession<G> {
236 let mut inner = self
237 .inner
238 .borrow_mut()
239 .take()
240 .expect("TypedParser::incremental_parse called while a session is still active");
241 // SAFETY: inner.raw is valid (owned via ParserInner); source is
242 // copied into source_buf.
243 unsafe { reset_parser(inner.raw.as_ptr(), &mut inner.source_buf, source) };
244 let c_source_ptr =
245 NonNull::new(inner.source_buf.as_mut_ptr()).expect("source_buf is non-empty");
246 TypedIncrementalParseSession::new(
247 c_source_ptr,
248 self.grammar.clone(),
249 inner,
250 Rc::clone(&self.inner),
251 )
252 }
253}
254
255/// Cursor over statements parsed by a [`TypedParser`].
256///
257/// Designed for multi-statement SQL input.
258///
259/// - Iterates statement-by-statement.
260/// - Surfaces failures per statement.
261/// - Can continue after recoverable errors.
262pub struct TypedParseSession<G: TypedGrammar> {
263 grammar: AnyGrammar,
264 /// Checked-out parser state. Returned to `slot` on drop.
265 inner: Option<ParserInner>,
266 /// Slot to return `inner` to when this session is dropped.
267 slot: Rc<RefCell<Option<ParserInner>>>,
268 _marker: PhantomData<G>,
269}
270
271impl<G: TypedGrammar> Drop for TypedParseSession<G> {
272 fn drop(&mut self) {
273 if let Some(inner) = self.inner.take() {
274 *self.slot.borrow_mut() = Some(inner);
275 }
276 }
277}
278
279impl<G: TypedGrammar> TypedParseSession<G> {
280 /// Register a template macro with the parser during an active session.
281 ///
282 /// This is used by the formatter to auto-register macros defined by
283 /// `CREATE PERFETTO MACRO` statements so that subsequent macro calls
284 /// are expanded correctly.
285 ///
286 /// # Panics
287 ///
288 /// Panics if the session has already finished.
289 pub fn register_macro(&mut self, name: &str, params: &[&str], body: &str) {
290 let inner = self
291 .inner
292 .as_mut()
293 .expect("register_macro called on finished session");
294 let param_cstrings: Vec<std::ffi::CString> = params
295 .iter()
296 .map(|p| std::ffi::CString::new(*p).expect("param name must not contain NUL"))
297 .collect();
298 let param_ptrs: Vec<*const std::ffi::c_char> =
299 param_cstrings.iter().map(|c| c.as_ptr()).collect();
300 // SAFETY: inner.raw is valid; all string pointers are valid for the
301 // duration of the C call (which copies them).
302 #[expect(clippy::cast_possible_truncation)]
303 unsafe {
304 inner.raw.as_mut().register_macro(
305 name.as_ptr().cast(),
306 name.len() as u32,
307 param_ptrs.as_ptr(),
308 params.len() as u32,
309 body.as_ptr().cast(),
310 body.len() as u32,
311 );
312 }
313 }
314
315 /// Parse and return the next statement as a tri-state outcome.
316 ///
317 /// Mirrors C parser return codes directly:
318 /// - [`ParseOutcome::Done`] -> `SYNTAQLITE_PARSE_DONE`
319 /// - [`ParseOutcome::Ok`] -> `SYNTAQLITE_PARSE_OK`
320 /// - [`ParseOutcome::Err`] -> `SYNTAQLITE_PARSE_ERROR`
321 ///
322 /// Use [`ParseOutcome::transpose`] for `?`-friendly
323 /// `Result<Option<_>, _>` control flow.
324 ///
325 /// # Panics
326 ///
327 /// Panics if called after the session is finished.
328 #[expect(clippy::should_implement_trait)]
329 pub fn next(&mut self) -> ParseOutcome<TypedParsedStatement<'_, G>, TypedParseError<'_, G>> {
330 // SAFETY: raw is valid and exclusively borrowed via &mut self.
331 let rc = unsafe {
332 self.inner
333 .as_mut()
334 .expect("inner is Some while session is not finished")
335 .raw
336 .as_mut()
337 .next()
338 };
339
340 if rc == ffi::PARSE_DONE {
341 return ParseOutcome::Done;
342 }
343
344 let inner = self
345 .inner
346 .as_ref()
347 .expect("inner is Some while session is not finished");
348 let source_len = inner.source_buf.len().saturating_sub(1);
349 // SAFETY: source_buf was populated from valid UTF-8 (&str) in
350 // reset_parser. The first source_len bytes are the original source.
351 let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
352 // SAFETY: inner.raw is valid (owned via ParserInner, not yet destroyed).
353 let result =
354 unsafe { TypedParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) };
355 if rc == ffi::PARSE_OK {
356 ParseOutcome::Ok(result)
357 } else {
358 // ERROR (may still carry a recovery tree)
359 ParseOutcome::Err(TypedParseError(result))
360 }
361 }
362
363 /// Original SQL source bound to this session.
364 ///
365 /// # Panics
366 ///
367 /// Panics only if session invariants were violated.
368 pub fn source(&self) -> &str {
369 let inner = self
370 .inner
371 .as_ref()
372 .expect("inner is Some while session is not finished");
373 let source_len = inner.source_buf.len().saturating_sub(1);
374 // SAFETY: source_buf was populated from valid UTF-8 (&str) in
375 // reset_parser.
376 unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) }
377 }
378
379 /// Get a grammar-agnostic view of this session's current arena state.
380 ///
381 /// Allows reading node data and source text after all statements have been
382 /// consumed via [`next`](Self::next). The returned
383 /// result borrows from `&self` and is valid as long as this session is alive.
384 ///
385 /// # Panics
386 /// Panics only if session invariants were violated.
387 pub fn arena_result(&self) -> AnyParsedStatement<'_> {
388 let inner = self
389 .inner
390 .as_ref()
391 .expect("inner is Some while session is alive");
392 let source_len = inner.source_buf.len().saturating_sub(1);
393 // SAFETY: source_buf was populated from valid UTF-8 (&str) in
394 // reset_parser; inner.raw is valid (owned via ParserInner).
395 let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
396 // SAFETY: inner.raw is valid for 'self; source is valid UTF-8 for 'self.
397 unsafe { AnyParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) }
398 }
399}
400
401/// Parser alias for grammar-independent code that picks grammar at runtime.
402pub type AnyParser = TypedParser<AnyGrammar>;
403
404/// Session alias paired with [`AnyParser`].
405pub type AnyParseSession = TypedParseSession<AnyGrammar>;
406
407/// Grammar-erased view of a parsed statement.
408///
409/// Cheap to borrow — holds a raw parser pointer, source reference, and grammar
410/// handle. Nodes and lists store `&'a AnyParsedStatement<'a>` rather than an
411/// owned copy, making them `Copy` and eliminating grammar-handle clones.
412#[derive(Clone)]
413pub struct AnyParsedStatement<'a> {
414 pub(crate) raw: NonNull<CParser>,
415 pub(crate) source: &'a str,
416 pub(crate) grammar: AnyGrammar,
417}
418
419impl<'a> AnyParsedStatement<'a> {
420 /// Construct from raw parts.
421 ///
422 /// # Safety
423 /// `raw` must be a valid, non-null parser pointer that remains valid for `'a`.
424 pub(crate) unsafe fn new(raw: *mut CParser, source: &'a str, grammar: AnyGrammar) -> Self {
425 AnyParsedStatement {
426 // SAFETY: caller guarantees raw is non-null.
427 raw: unsafe { NonNull::new_unchecked(raw) },
428 source,
429 grammar,
430 }
431 }
432
433 /// Root node ID for the current statement (`AnyNodeId::NULL` if absent).
434 pub fn root_id(&self) -> AnyNodeId {
435 // SAFETY: self.raw is a valid, non-null parser pointer for lifetime 'a.
436 AnyNodeId(unsafe { self.raw.as_ref().result_root() })
437 }
438
439 /// Macro expansion call-site spans recorded during parsing.
440 pub fn macro_regions(&self) -> impl Iterator<Item = MacroRegion> + use<'_> {
441 // SAFETY: self.raw is valid for 'a; the slice lives for the parser lifetime.
442 let raw: &[ffi::CMacroRegion] = unsafe { self.raw.as_ref().result_macros() };
443 raw.iter().map(|r| MacroRegion {
444 call_offset: r.call_offset,
445 call_length: r.call_length,
446 })
447 }
448
449 /// The source text bound to this result.
450 pub fn source(&self) -> &'a str {
451 self.source
452 }
453
454 /// Raw token spans `(offset, length)` for all collected tokens.
455 ///
456 /// Returns an empty iterator if `collect_tokens` was not enabled.
457 /// Always non-empty when the result comes from [`TypedParser::incremental_parse`],
458 /// which unconditionally enables token collection.
459 pub fn token_spans(&self) -> impl Iterator<Item = (u32, u32)> + use<'_> {
460 // SAFETY: self.raw is valid for 'a; the returned slice lives for 'a.
461 let raw: &[ffi::CParserToken] = unsafe { self.raw.as_ref().result_tokens() };
462 raw.iter().map(|t| (t.offset, t.length))
463 }
464
465 /// Lightweight comment descriptors without source text borrows.
466 ///
467 /// Returns an empty iterator if `collect_tokens` was not enabled.
468 pub fn comment_spans(&self) -> impl Iterator<Item = CommentSpan> + use<'_> {
469 // SAFETY: self.raw is valid for 'a; the returned slice lives for 'a.
470 let raw: &[ffi::CComment] = unsafe { self.raw.as_ref().result_comments() };
471 raw.iter().map(|c| {
472 let kind = match c.kind {
473 ffi::CCommentKind::LineComment => CommentKind::Line,
474 ffi::CCommentKind::BlockComment => CommentKind::Block,
475 };
476 CommentSpan::new(c.offset, c.length, kind)
477 })
478 }
479
480 /// Extract reflective node data (`tag` + field values) for `id`.
481 pub fn extract_fields(
482 &self,
483 id: AnyNodeId,
484 ) -> Option<(AnyNodeTag, crate::ast::NodeFields<'a>)> {
485 let (ptr, tag) = self.node_ptr(id)?;
486 let mut fields = crate::ast::NodeFields::new();
487 for meta in self.grammar.field_meta(tag) {
488 // SAFETY: ptr is a valid arena node pointer valid for 'a;
489 // meta describes a field within that node's struct layout.
490 let val = unsafe { extract_field_value(ptr, &meta, self.source) };
491 fields.push(val);
492 }
493 Some((tag, fields))
494 }
495
496 /// Return child node IDs if `id` is a list node.
497 pub fn list_children(&self, id: AnyNodeId) -> Option<&'a [AnyNodeId]> {
498 let (_, tag) = self.node_ptr(id)?;
499 if !self.grammar.is_list(tag) {
500 return None;
501 }
502 #[expect(clippy::redundant_closure_for_method_calls)]
503 self.resolve_list(id).map(|l| l.children())
504 }
505
506 /// Iterate direct child node IDs for the node at `id`.
507 pub fn child_node_ids(&self, id: AnyNodeId) -> impl Iterator<Item = AnyNodeId> + '_ {
508 let mut out = Vec::new();
509 if let Some((_, fields)) = self.extract_fields(id) {
510 for i in 0..fields.len() {
511 if let crate::ast::FieldValue::NodeId(child_id) = fields[i] {
512 if child_id.is_null() {
513 continue;
514 }
515 if let Some(children) = self.list_children(child_id) {
516 out.extend(children.iter().copied().filter(|id| !id.is_null()));
517 } else {
518 out.push(child_id);
519 }
520 }
521 }
522 }
523 out.into_iter()
524 }
525
526 /// Resolve a `AnyNodeId` to a typed reference, validating the tag.
527 pub(crate) fn resolve_as<T: ArenaNode>(&self, id: AnyNodeId) -> Option<&'a T> {
528 let (ptr, tag) = self.node_ptr(id)?;
529 if tag.0 != T::TAG {
530 return None;
531 }
532 // SAFETY: tag matches T::TAG, confirming the arena node has type T.
533 // ptr is valid for 'a. T is #[repr(C)] with a u32 tag as its first
534 // field, matching the arena layout.
535 Some(unsafe { &*ptr.cast::<T>() })
536 }
537
538 /// Resolve a `AnyNodeId` as a [`RawNodeList`] (for list nodes).
539 pub(crate) fn resolve_list(&self, id: AnyNodeId) -> Option<&'a RawNodeList> {
540 let (ptr, _) = self.node_ptr(id)?;
541 // SAFETY: ptr is valid for 'a. List nodes have RawNodeList layout.
542 #[expect(clippy::cast_ptr_alignment)]
543 Some(unsafe { &*ptr.cast::<RawNodeList>() })
544 }
545
546 /// Get a raw pointer to a node in the arena. Returns `(pointer, tag)`.
547 pub(crate) fn node_ptr(&self, id: AnyNodeId) -> Option<(*const u8, AnyNodeTag)> {
548 if id.is_null() {
549 return None;
550 }
551 // SAFETY: self.raw is valid for 'a. The returned pointer is
552 // null-checked; all arena nodes start with a u32 tag.
553 unsafe {
554 let ptr = self.raw.as_ref().node(id.0);
555 if ptr.is_null() {
556 return None;
557 }
558 let tag = AnyNodeTag(*ptr);
559 Some((ptr.cast::<u8>(), tag))
560 }
561 }
562
563 /// Return the root node as an [`AnyNode`](crate::ast::AnyNode), or `None`
564 /// if the parse result has no root (e.g. empty input or fatal parse error).
565 ///
566 /// When the `serde` feature is enabled, the returned
567 /// [`AnyNode`](crate::ast::AnyNode) implements `serde::Serialize` using
568 /// the same structure as `dump_node`.
569 pub fn root_node(&self) -> Option<crate::ast::AnyNode<'_>> {
570 let id = self.root_id();
571 if id.is_null() {
572 return None;
573 }
574 Some(crate::ast::AnyNode {
575 id,
576 stmt_result: self,
577 })
578 }
579
580 /// Dump an AST node tree as indented text into `out`.
581 pub(crate) fn dump_node(&self, id: AnyNodeId, out: &mut String, indent: usize) {
582 unsafe extern "C" {
583 fn free(ptr: *mut std::ffi::c_void);
584 }
585 // SAFETY: raw is valid; dump_node returns a malloc'd NUL-terminated string.
586 #[expect(clippy::cast_possible_truncation)]
587 unsafe {
588 let ptr = self.raw.as_ref().dump_node(id.0, indent as u32);
589 if !ptr.is_null() {
590 out.push_str(&CStr::from_ptr(ptr).to_string_lossy());
591 free(ptr.cast::<std::ffi::c_void>());
592 }
593 }
594 }
595}
596
597/// Parse result for one statement from a [`TypedParseSession`].
598///
599/// Main hand-off point to:
600///
601/// - AST traversal (`root()`).
602/// - Token/comment-aware tooling (`tokens()`, `comments()`).
603/// - Grammar-agnostic pipelines (`erase()`).
604#[derive(Clone)]
605pub struct TypedParsedStatement<'a, G: TypedGrammar> {
606 pub(crate) any: AnyParsedStatement<'a>,
607 _marker: PhantomData<G>,
608}
609
610impl<'a, G: TypedGrammar> TypedParsedStatement<'a, G> {
611 /// Construct from raw parts.
612 ///
613 /// # Safety
614 /// `raw` must be a valid, non-null parser pointer that remains valid for `'a`.
615 pub(crate) unsafe fn new(raw: *mut CParser, source: &'a str, grammar: AnyGrammar) -> Self {
616 TypedParsedStatement {
617 any: AnyParsedStatement {
618 // SAFETY: caller guarantees raw is non-null.
619 raw: unsafe { NonNull::new_unchecked(raw) },
620 source,
621 grammar,
622 },
623 _marker: PhantomData,
624 }
625 }
626
627 /// Convert to the grammar-agnostic [`AnyParsedStatement`] view.
628 pub fn erase(self) -> AnyParsedStatement<'a> {
629 self.any
630 }
631
632 /// Typed AST root for this statement, if available.
633 ///
634 /// Borrows `self` for `'a` so that returned nodes can hold `&'a AnyParsedStatement<'a>`
635 /// without cloning. Drop the returned node to release the borrow.
636 pub fn root(&'a self) -> Option<G::Node<'a>> {
637 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
638 let id = AnyNodeId(unsafe { self.any.raw.as_ref().result_root() });
639 if id.is_null() {
640 return None;
641 }
642 G::Node::from_result(&self.any, id)
643 }
644
645 /// Dump the AST as indented text into `out`.
646 pub fn dump(&self, out: &mut String, indent: usize) {
647 self.any.dump_node(self.any.root_id(), out, indent);
648 }
649
650 /// Serialize the AST to a JSON string using the `serde-json` feature.
651 ///
652 /// The JSON structure mirrors the text dump format: nodes become
653 /// `{"type":"NodeName","field":value,...}` and lists become
654 /// `{"type":"ListName","count":N,"children":[...]}`.
655 ///
656 /// # Errors
657 /// Returns `Err` if JSON serialization fails.
658 #[cfg(feature = "serde-json")]
659 pub fn dump_json(&self) -> Result<String, serde_json::Error> {
660 serde_json::to_string(&self.any.root_node())
661 }
662
663 /// The source text bound to this result.
664 pub fn source(&self) -> &'a str {
665 self.any.source
666 }
667
668 /// Macro expansion call-site spans recorded during parsing.
669 ///
670 /// Each [`MacroRegion`] describes a byte range in the original source
671 /// that was identified as a macro invocation (e.g. `name!(args)`).
672 /// Populated automatically when the grammar's `macro_style` is set.
673 pub fn macro_regions(&self) -> impl Iterator<Item = MacroRegion> + use<'_, 'a, G> {
674 self.any.macro_regions()
675 }
676
677 /// Statement-local token stream for this parse result.
678 ///
679 /// Requires `collect_tokens: true` and skips unknown token ordinals for `G`.
680 pub fn tokens(&self) -> impl Iterator<Item = TypedParserToken<'a, G>> {
681 let source = self.any.source;
682 // SAFETY: self.any.raw is valid for 'a; the returned slice lives for 'a.
683 let raw: &'a [ffi::CParserToken] = unsafe { self.any.raw.as_ref().result_tokens() };
684 raw.iter().filter_map(move |t| {
685 let token_type = G::Token::from_token_type(AnyTokenType(t.type_))?;
686 let text = &source[t.offset as usize..(t.offset + t.length) as usize];
687 Some(TypedParserToken::new(
688 text,
689 token_type,
690 ParserTokenFlags::from_raw(t.flags),
691 t.offset,
692 t.length,
693 ))
694 })
695 }
696
697 /// Comments attached to this statement.
698 ///
699 /// Requires `collect_tokens: true` in [`ParserConfig`].
700 pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
701 let source = self.any.source;
702 // SAFETY: self.any.raw is valid for 'a; the returned slice lives for 'a.
703 let raw: &'a [ffi::CComment] = unsafe { self.any.raw.as_ref().result_comments() };
704 raw.iter().map(move |c| {
705 let text = &source[c.offset as usize..(c.offset + c.length) as usize];
706 let kind = match c.kind {
707 ffi::CCommentKind::LineComment => CommentKind::Line,
708 ffi::CCommentKind::BlockComment => CommentKind::Block,
709 };
710 Comment::new(text, kind, c.offset, c.length)
711 })
712 }
713
714 // ── Result accessors (mirror syntaqlite_result_*) ──────────────────────
715
716 /// Human-readable error message, or `None`.
717 pub(crate) fn error_msg(&self) -> Option<&str> {
718 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
719 unsafe {
720 let ptr = self.any.raw.as_ref().result_error_msg();
721 if ptr.is_null() {
722 None
723 } else {
724 Some(CStr::from_ptr(ptr).to_str().unwrap_or("parse error"))
725 }
726 }
727 }
728
729 /// Byte offset of the error token, or `None` if unknown.
730 pub(crate) fn error_offset(&self) -> Option<usize> {
731 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
732 let v = unsafe { self.any.raw.as_ref().result_error_offset() };
733 if v == 0xFFFF_FFFF {
734 None
735 } else {
736 Some(v as usize)
737 }
738 }
739
740 /// Byte length of the error token, or `None` if unknown.
741 pub(crate) fn error_length(&self) -> Option<usize> {
742 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
743 let v = unsafe { self.any.raw.as_ref().result_error_length() };
744 if v == 0 { None } else { Some(v as usize) }
745 }
746
747 /// Error classification for the current result.
748 pub(crate) fn error_kind(&self) -> ParseErrorKind {
749 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
750 let recovery_root = AnyNodeId(unsafe { self.any.raw.as_ref().result_recovery_root() });
751 if recovery_root.is_null() {
752 ParseErrorKind::Fatal
753 } else {
754 ParseErrorKind::Recovered
755 }
756 }
757
758 /// Typed recovery AST root for this statement, if available.
759 pub(crate) fn recovery_root(&'a self) -> Option<G::Node<'a>> {
760 // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
761 let id = AnyNodeId(unsafe { self.any.raw.as_ref().result_recovery_root() });
762 if id.is_null() {
763 return None;
764 }
765 G::Node::from_result(&self.any, id)
766 }
767}
768
769/// Extract a single [`crate::ast::FieldValue`] from a raw arena node pointer.
770///
771/// # Safety
772/// `ptr` must point to a valid arena node struct whose field at `meta.offset()`
773/// has the type indicated by `meta.kind()`, and must be valid for lifetime `'a`.
774#[expect(clippy::cast_ptr_alignment)]
775unsafe fn extract_field_value<'a>(
776 ptr: *const u8,
777 meta: &crate::grammar::FieldMeta<'_>,
778 source: &'a str,
779) -> crate::ast::FieldValue<'a> {
780 use crate::ast::{FieldValue, SourceSpan};
781 use crate::grammar::FieldKind;
782 // SAFETY: covered by function-level contract; ptr and meta are consistent.
783 unsafe {
784 let field_ptr = ptr.add(meta.offset() as usize);
785 match meta.kind() {
786 FieldKind::NodeId => FieldValue::NodeId(AnyNodeId(*(field_ptr.cast::<u32>()))),
787 FieldKind::Span => {
788 let span = &*(field_ptr.cast::<SourceSpan>());
789 if span.length == 0 {
790 FieldValue::Span("")
791 } else {
792 FieldValue::Span(span.as_str(source))
793 }
794 }
795 FieldKind::Bool => FieldValue::Bool(*(field_ptr.cast::<u32>()) != 0),
796 FieldKind::Flags => FieldValue::Flags(*field_ptr),
797 FieldKind::Enum => FieldValue::Enum(*(field_ptr.cast::<u32>())),
798 }
799 }
800}
801
802/// Parse failure for a single statement in grammar `G`.
803///
804/// Designed for diagnostics:
805///
806/// - Message text (`message()`).
807/// - Optional source location (`offset()`, `length()`).
808/// - Severity/recovery status (`kind()`).
809/// - Optional recovery tree (`recovery_root()`).
810///
811/// Recovery model:
812///
813/// - `Recovered`: this statement is invalid, but the parser skipped ahead
814/// (usually to the next `;`) so it can continue with later statements.
815/// - The returned `recovery_root()` can still be useful for diagnostics, but may
816/// contain error placeholders where input was skipped.
817/// - `Fatal`: the parser could not find a safe point to continue from.
818pub struct TypedParseError<'a, G: TypedGrammar>(TypedParsedStatement<'a, G>);
819
820impl<'a, G: TypedGrammar> TypedParseError<'a, G> {
821 pub(crate) fn new(result: TypedParsedStatement<'a, G>) -> Self {
822 TypedParseError(result)
823 }
824
825 /// Whether parsing recovered to a statement boundary.
826 pub fn kind(&self) -> ParseErrorKind {
827 self.0.error_kind()
828 }
829
830 /// True if this error was recovered and yielded a partial tree.
831 pub fn is_recovered(&self) -> bool {
832 self.kind() == ParseErrorKind::Recovered
833 }
834
835 /// True if this error is fatal (unrecoverable).
836 pub fn is_fatal(&self) -> bool {
837 self.kind() == ParseErrorKind::Fatal
838 }
839
840 /// Human-readable diagnostic text.
841 pub fn message(&self) -> &str {
842 self.0.error_msg().unwrap_or("parse error")
843 }
844 /// Returns the byte offset of the error token, or `None` if unknown.
845 pub fn offset(&self) -> Option<usize> {
846 self.0.error_offset()
847 }
848 /// Returns the byte length of the error token, or `None` if unknown.
849 pub fn length(&self) -> Option<usize> {
850 self.0.error_length()
851 }
852 /// The partial recovery tree, if error recovery produced one.
853 pub fn recovery_root(&'a self) -> Option<G::Node<'a>> {
854 self.0.recovery_root()
855 }
856
857 /// The source text bound to this result.
858 pub fn parse_source(&self) -> &'a str {
859 self.0.source()
860 }
861
862 /// Tokens collected during the (partial) parse, if `collect_tokens` was enabled.
863 pub fn tokens(&self) -> impl Iterator<Item = TypedParserToken<'a, G>> {
864 self.0.tokens()
865 }
866
867 /// Comments collected during the (partial) parse, if `collect_tokens` was enabled.
868 pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
869 self.0.comments()
870 }
871}
872
873impl<G: TypedGrammar> std::fmt::Debug for TypedParseError<'_, G> {
874 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
875 f.debug_struct("TypedParseError")
876 .field("kind", &self.kind())
877 .field("message", &self.message())
878 .field("offset", &self.offset())
879 .field("length", &self.length())
880 .finish()
881 }
882}
883
884impl<G: TypedGrammar> std::fmt::Display for TypedParseError<'_, G> {
885 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
886 write!(f, "{}", self.message())
887 }
888}
889
890impl<G: TypedGrammar> std::error::Error for TypedParseError<'_, G> {}
891
892/// Parse-error alias for grammar-independent pipelines.
893pub type AnyParseError<'a> = TypedParseError<'a, AnyGrammar>;
894
895// ── Crate-internal ───────────────────────────────────────────────────────────
896
897/// Holds the C parser handle and mutable state. Checked out by sessions at
898/// runtime and returned on [`Drop`].
899pub(crate) struct ParserInner {
900 pub(crate) raw: NonNull<CParser>,
901 pub(crate) source_buf: Vec<u8>,
902}
903
904impl Drop for ParserInner {
905 fn drop(&mut self) {
906 // SAFETY: self.raw was allocated by CParser::create and has not been
907 // freed (Drop runs exactly once).
908 unsafe { CParser::destroy(self.raw.as_ptr()) }
909 }
910}
911
912/// Copy source into `source_buf` (with null terminator) and reset the C parser.
913///
914/// # Safety
915/// `raw` must be a valid parser pointer owned by the caller.
916pub(crate) unsafe fn reset_parser(raw: *mut CParser, source_buf: &mut Vec<u8>, source: &str) {
917 source_buf.clear();
918 source_buf.reserve(source.len() + 1);
919 source_buf.extend_from_slice(source.as_bytes());
920 source_buf.push(0);
921
922 // source_buf has at least one byte (the null terminator just pushed).
923 let c_source_ptr = source_buf.as_ptr();
924 // SAFETY: raw is valid (caller owns it); c_source_ptr points to
925 // source_buf which is null-terminated.
926 #[expect(clippy::cast_possible_truncation)]
927 unsafe {
928 (*raw).reset(c_source_ptr.cast(), source.len() as u32);
929 }
930}
931
932pub(crate) use ffi::CParser;