Skip to main content

syntaqlite_syntax/parser/
mod.rs

1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4use std::cell::RefCell;
5use std::ffi::CStr;
6use std::marker::PhantomData;
7use std::ptr::NonNull;
8use std::rc::Rc;
9
10use crate::any::{AnyNodeTag, AnyTokenType};
11use crate::ast::{AnyNodeId, ArenaNode, GrammarNodeType, GrammarTokenType, RawNodeList};
12use crate::grammar::{AnyGrammar, TypedGrammar};
13
14mod config;
15mod ffi;
16mod incremental;
17#[cfg(feature = "sqlite")]
18mod session;
19mod types;
20
21pub use config::ParserConfig;
22#[cfg(feature = "sqlite")]
23pub use incremental::IncrementalParseSession;
24pub use incremental::{AnyIncrementalParseSession, TypedIncrementalParseSession};
25#[cfg(feature = "sqlite")]
26pub use session::{ParseError, ParseSession, ParsedStatement, Parser, ParserToken};
27pub use types::{
28    AnyParserToken, Comment, CommentKind, CommentSpan, CompletionContext, MacroRegion,
29    ParseOutcome, ParserTokenFlags, TypedParserToken,
30};
31
32/// Indicates whether parsing can continue after an error.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34#[repr(u32)]
35pub enum ParseErrorKind {
36    /// Parsing recovered to the next statement boundary.
37    ///
38    /// In plain terms: this statement had a syntax error, but the parser was
39    /// still able to skip forward (usually to the next `;`) and continue with
40    /// later statements.
41    ///
42    /// The current statement can include `Error` AST nodes where invalid input
43    /// was skipped.
44    ///
45    /// A partial AST may still be available for diagnostics.
46    Recovered = 1,
47    /// Parsing could not recover for this statement/input.
48    ///
49    /// In plain terms: the parser hit a syntax error and could not find a safe
50    /// point to continue from.
51    ///
52    /// No reliable tree is available, and callers should usually stop reading
53    /// further results from this session.
54    Fatal = 2,
55}
56
57/// Parser API parameterized by grammar type `G`.
58///
59/// Primarily for library/framework code over generated grammars.
60///
61/// - Use this when grammar type is known at compile time.
62/// - Use top-level [`Parser`] for typical `SQLite` SQL app code.
63pub struct TypedParser<G: TypedGrammar> {
64    inner: Rc<RefCell<Option<ParserInner>>>,
65    grammar: AnyGrammar,
66    _marker: PhantomData<G>,
67}
68
69impl<G: TypedGrammar> TypedParser<G> {
70    /// Create a parser for grammar `G` with default [`ParserConfig`].
71    pub fn new(grammar: G) -> Self {
72        Self::with_config(grammar, &ParserConfig::default())
73    }
74
75    /// Create a parser for grammar `G` with custom [`ParserConfig`].
76    ///
77    /// # Panics
78    /// Panics if parser allocation fails (out of memory).
79    pub fn with_config(grammar: G, config: &ParserConfig) -> Self {
80        let grammar_raw: AnyGrammar = grammar.into();
81        // SAFETY: create(NULL, grammar_raw.inner) allocates a new parser with
82        // default malloc/free. The C side copies the grammar.
83        let mut raw = NonNull::new(unsafe { CParser::create(std::ptr::null(), grammar_raw.inner) })
84            .expect("parser allocation failed");
85
86        // SAFETY: raw is freshly created (not sealed), so these calls always return 0.
87        unsafe {
88            raw.as_mut().set_trace(u32::from(config.trace()));
89            raw.as_mut()
90                .set_collect_tokens(u32::from(config.collect_tokens()));
91            raw.as_mut()
92                .set_macro_fallback(u32::from(config.macro_fallback()));
93        }
94
95        TypedParser {
96            inner: Rc::new(RefCell::new(Some(ParserInner {
97                raw,
98                source_buf: Vec::new(),
99            }))),
100            grammar: grammar_raw,
101            _marker: PhantomData,
102        }
103    }
104
105    /// Parse a SQL script and return a typed statement session.
106    ///
107    /// # Examples
108    ///
109    /// ```rust
110    /// use syntaqlite_syntax::typed::{grammar, TypedParser};
111    /// use syntaqlite_syntax::ParseOutcome;
112    ///
113    /// let parser = TypedParser::new(grammar());
114    /// let mut session = parser.parse("SELECT 1;");
115    /// let stmt = match session.next() {
116    ///     ParseOutcome::Ok(stmt) => stmt,
117    ///     ParseOutcome::Done => panic!("expected statement"),
118    ///     ParseOutcome::Err(err) => panic!("unexpected parse error: {err}"),
119    /// };
120    /// assert!(stmt.root().is_some());
121    /// ```
122    ///
123    /// # Panics
124    ///
125    /// Panics if another session from this parser is still active.
126    /// Drop the previous session before starting a new one.
127    pub fn parse(&self, source: &str) -> TypedParseSession<G> {
128        let mut inner = self
129            .inner
130            .borrow_mut()
131            .take()
132            .expect("TypedParser::parse called while a session is still active");
133        // SAFETY: inner.raw is valid (owned via ParserInner); source is
134        // copied into source_buf which will be owned by the session.
135        unsafe { reset_parser(inner.raw.as_ptr(), &mut inner.source_buf, source) };
136        TypedParseSession {
137            grammar: self.grammar.clone(),
138            inner: Some(inner),
139            slot: Rc::clone(&self.inner),
140            _marker: PhantomData,
141        }
142    }
143
144    /// Start incremental parsing for grammar `G`.
145    ///
146    /// Use this when tokens arrive over time (editor completion, interactive
147    /// parsing, macro-expansion pipelines).
148    ///
149    /// # Examples
150    ///
151    /// ```rust
152    /// use syntaqlite_syntax::typed::{grammar, TypedParser};
153    /// use syntaqlite_syntax::TokenType;
154    ///
155    /// let parser = TypedParser::new(grammar());
156    /// let mut session = parser.incremental_parse("SELECT 1");
157    ///
158    /// let _ = session.feed_token(TokenType::Select, 0..6);
159    /// let _ = session.feed_token(TokenType::Integer, 7..8);
160    /// let _ = session.finish();
161    /// ```
162    ///
163    /// # Panics
164    ///
165    /// Panics if another session from this parser is still active.
166    /// Drop the previous session before starting a new one.
167    /// Register a template macro with the parser.
168    ///
169    /// The macro `name` will be expanded when `name!(args)` is encountered
170    /// during batch parsing (`parse()`). The `body` uses `$param` placeholders
171    /// that are substituted with the corresponding arguments.
172    ///
173    /// # Panics
174    ///
175    /// Panics if another session from this parser is still active.
176    pub fn register_macro(&mut self, name: &str, params: &[&str], body: &str) {
177        let mut inner_ref = self.inner.borrow_mut();
178        let inner = inner_ref
179            .as_mut()
180            .expect("register_macro called while a session is still active");
181        // The C side uses strlen() on param names, so they must be NUL-terminated.
182        let param_cstrings: Vec<std::ffi::CString> = params
183            .iter()
184            .map(|p| std::ffi::CString::new(*p).expect("param name must not contain NUL"))
185            .collect();
186        let param_ptrs: Vec<*const std::ffi::c_char> =
187            param_cstrings.iter().map(|c| c.as_ptr()).collect();
188        // SAFETY: inner.raw is valid; all string pointers are valid for the
189        // duration of the C call (which copies them).
190        #[expect(clippy::cast_possible_truncation)]
191        unsafe {
192            inner.raw.as_mut().register_macro(
193                name.as_ptr().cast(),
194                name.len() as u32,
195                param_ptrs.as_ptr(),
196                params.len() as u32,
197                body.as_ptr().cast(),
198                body.len() as u32,
199            );
200        }
201    }
202
203    /// Deregister a macro by name.
204    ///
205    /// Returns `true` if the macro was found and removed.
206    ///
207    /// # Panics
208    ///
209    /// Panics if another session from this parser is still active.
210    pub fn deregister_macro(&mut self, name: &str) -> bool {
211        let mut inner_ref = self.inner.borrow_mut();
212        let inner = inner_ref
213            .as_mut()
214            .expect("deregister_macro called while a session is still active");
215        #[expect(clippy::cast_possible_truncation)]
216        // SAFETY: inner.raw is valid; name pointer is valid for the C call duration.
217        let rc = unsafe {
218            inner
219                .raw
220                .as_mut()
221                .deregister_macro(name.as_ptr().cast(), name.len() as u32)
222        };
223        rc == 0
224    }
225
226    /// Start incremental parsing for grammar `G`.
227    ///
228    /// Use this when tokens arrive over time (editor completion, interactive
229    /// parsing, macro-expansion pipelines).
230    ///
231    /// # Panics
232    ///
233    /// Panics if another session from this parser is still active.
234    /// Drop the previous session before starting a new one.
235    pub fn incremental_parse(&self, source: &str) -> TypedIncrementalParseSession<G> {
236        let mut inner = self
237            .inner
238            .borrow_mut()
239            .take()
240            .expect("TypedParser::incremental_parse called while a session is still active");
241        // SAFETY: inner.raw is valid (owned via ParserInner); source is
242        // copied into source_buf.
243        unsafe { reset_parser(inner.raw.as_ptr(), &mut inner.source_buf, source) };
244        let c_source_ptr =
245            NonNull::new(inner.source_buf.as_mut_ptr()).expect("source_buf is non-empty");
246        TypedIncrementalParseSession::new(
247            c_source_ptr,
248            self.grammar.clone(),
249            inner,
250            Rc::clone(&self.inner),
251        )
252    }
253}
254
255/// Cursor over statements parsed by a [`TypedParser`].
256///
257/// Designed for multi-statement SQL input.
258///
259/// - Iterates statement-by-statement.
260/// - Surfaces failures per statement.
261/// - Can continue after recoverable errors.
262pub struct TypedParseSession<G: TypedGrammar> {
263    grammar: AnyGrammar,
264    /// Checked-out parser state. Returned to `slot` on drop.
265    inner: Option<ParserInner>,
266    /// Slot to return `inner` to when this session is dropped.
267    slot: Rc<RefCell<Option<ParserInner>>>,
268    _marker: PhantomData<G>,
269}
270
271impl<G: TypedGrammar> Drop for TypedParseSession<G> {
272    fn drop(&mut self) {
273        if let Some(inner) = self.inner.take() {
274            *self.slot.borrow_mut() = Some(inner);
275        }
276    }
277}
278
279impl<G: TypedGrammar> TypedParseSession<G> {
280    /// Register a template macro with the parser during an active session.
281    ///
282    /// This is used by the formatter to auto-register macros defined by
283    /// `CREATE PERFETTO MACRO` statements so that subsequent macro calls
284    /// are expanded correctly.
285    ///
286    /// # Panics
287    ///
288    /// Panics if the session has already finished.
289    pub fn register_macro(&mut self, name: &str, params: &[&str], body: &str) {
290        let inner = self
291            .inner
292            .as_mut()
293            .expect("register_macro called on finished session");
294        let param_cstrings: Vec<std::ffi::CString> = params
295            .iter()
296            .map(|p| std::ffi::CString::new(*p).expect("param name must not contain NUL"))
297            .collect();
298        let param_ptrs: Vec<*const std::ffi::c_char> =
299            param_cstrings.iter().map(|c| c.as_ptr()).collect();
300        // SAFETY: inner.raw is valid; all string pointers are valid for the
301        // duration of the C call (which copies them).
302        #[expect(clippy::cast_possible_truncation)]
303        unsafe {
304            inner.raw.as_mut().register_macro(
305                name.as_ptr().cast(),
306                name.len() as u32,
307                param_ptrs.as_ptr(),
308                params.len() as u32,
309                body.as_ptr().cast(),
310                body.len() as u32,
311            );
312        }
313    }
314
315    /// Parse and return the next statement as a tri-state outcome.
316    ///
317    /// Mirrors C parser return codes directly:
318    /// - [`ParseOutcome::Done`]  -> `SYNTAQLITE_PARSE_DONE`
319    /// - [`ParseOutcome::Ok`]    -> `SYNTAQLITE_PARSE_OK`
320    /// - [`ParseOutcome::Err`]   -> `SYNTAQLITE_PARSE_ERROR`
321    ///
322    /// Use [`ParseOutcome::transpose`] for `?`-friendly
323    /// `Result<Option<_>, _>` control flow.
324    ///
325    /// # Panics
326    ///
327    /// Panics if called after the session is finished.
328    #[expect(clippy::should_implement_trait)]
329    pub fn next(&mut self) -> ParseOutcome<TypedParsedStatement<'_, G>, TypedParseError<'_, G>> {
330        // SAFETY: raw is valid and exclusively borrowed via &mut self.
331        let rc = unsafe {
332            self.inner
333                .as_mut()
334                .expect("inner is Some while session is not finished")
335                .raw
336                .as_mut()
337                .next()
338        };
339
340        if rc == ffi::PARSE_DONE {
341            return ParseOutcome::Done;
342        }
343
344        let inner = self
345            .inner
346            .as_ref()
347            .expect("inner is Some while session is not finished");
348        let source_len = inner.source_buf.len().saturating_sub(1);
349        // SAFETY: source_buf was populated from valid UTF-8 (&str) in
350        // reset_parser. The first source_len bytes are the original source.
351        let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
352        // SAFETY: inner.raw is valid (owned via ParserInner, not yet destroyed).
353        let result =
354            unsafe { TypedParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) };
355        if rc == ffi::PARSE_OK {
356            ParseOutcome::Ok(result)
357        } else {
358            // ERROR (may still carry a recovery tree)
359            ParseOutcome::Err(TypedParseError(result))
360        }
361    }
362
363    /// Original SQL source bound to this session.
364    ///
365    /// # Panics
366    ///
367    /// Panics only if session invariants were violated.
368    pub fn source(&self) -> &str {
369        let inner = self
370            .inner
371            .as_ref()
372            .expect("inner is Some while session is not finished");
373        let source_len = inner.source_buf.len().saturating_sub(1);
374        // SAFETY: source_buf was populated from valid UTF-8 (&str) in
375        // reset_parser.
376        unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) }
377    }
378
379    /// Get a grammar-agnostic view of this session's current arena state.
380    ///
381    /// Allows reading node data and source text after all statements have been
382    /// consumed via [`next`](Self::next). The returned
383    /// result borrows from `&self` and is valid as long as this session is alive.
384    ///
385    /// # Panics
386    /// Panics only if session invariants were violated.
387    pub fn arena_result(&self) -> AnyParsedStatement<'_> {
388        let inner = self
389            .inner
390            .as_ref()
391            .expect("inner is Some while session is alive");
392        let source_len = inner.source_buf.len().saturating_sub(1);
393        // SAFETY: source_buf was populated from valid UTF-8 (&str) in
394        // reset_parser; inner.raw is valid (owned via ParserInner).
395        let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
396        // SAFETY: inner.raw is valid for 'self; source is valid UTF-8 for 'self.
397        unsafe { AnyParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) }
398    }
399}
400
401/// Parser alias for grammar-independent code that picks grammar at runtime.
402pub type AnyParser = TypedParser<AnyGrammar>;
403
404/// Session alias paired with [`AnyParser`].
405pub type AnyParseSession = TypedParseSession<AnyGrammar>;
406
407/// Grammar-erased view of a parsed statement.
408///
409/// Cheap to borrow — holds a raw parser pointer, source reference, and grammar
410/// handle. Nodes and lists store `&'a AnyParsedStatement<'a>` rather than an
411/// owned copy, making them `Copy` and eliminating grammar-handle clones.
412#[derive(Clone)]
413pub struct AnyParsedStatement<'a> {
414    pub(crate) raw: NonNull<CParser>,
415    pub(crate) source: &'a str,
416    pub(crate) grammar: AnyGrammar,
417}
418
419impl<'a> AnyParsedStatement<'a> {
420    /// Construct from raw parts.
421    ///
422    /// # Safety
423    /// `raw` must be a valid, non-null parser pointer that remains valid for `'a`.
424    pub(crate) unsafe fn new(raw: *mut CParser, source: &'a str, grammar: AnyGrammar) -> Self {
425        AnyParsedStatement {
426            // SAFETY: caller guarantees raw is non-null.
427            raw: unsafe { NonNull::new_unchecked(raw) },
428            source,
429            grammar,
430        }
431    }
432
433    /// Root node ID for the current statement (`AnyNodeId::NULL` if absent).
434    pub fn root_id(&self) -> AnyNodeId {
435        // SAFETY: self.raw is a valid, non-null parser pointer for lifetime 'a.
436        AnyNodeId(unsafe { self.raw.as_ref().result_root() })
437    }
438
439    /// Macro expansion call-site spans recorded during parsing.
440    pub fn macro_regions(&self) -> impl Iterator<Item = MacroRegion> + use<'_> {
441        // SAFETY: self.raw is valid for 'a; the slice lives for the parser lifetime.
442        let raw: &[ffi::CMacroRegion] = unsafe { self.raw.as_ref().result_macros() };
443        raw.iter().map(|r| MacroRegion {
444            call_offset: r.call_offset,
445            call_length: r.call_length,
446        })
447    }
448
449    /// The source text bound to this result.
450    pub fn source(&self) -> &'a str {
451        self.source
452    }
453
454    /// Raw token spans `(offset, length)` for all collected tokens.
455    ///
456    /// Returns an empty iterator if `collect_tokens` was not enabled.
457    /// Always non-empty when the result comes from [`TypedParser::incremental_parse`],
458    /// which unconditionally enables token collection.
459    pub fn token_spans(&self) -> impl Iterator<Item = (u32, u32)> + use<'_> {
460        // SAFETY: self.raw is valid for 'a; the returned slice lives for 'a.
461        let raw: &[ffi::CParserToken] = unsafe { self.raw.as_ref().result_tokens() };
462        raw.iter().map(|t| (t.offset, t.length))
463    }
464
465    /// Lightweight comment descriptors without source text borrows.
466    ///
467    /// Returns an empty iterator if `collect_tokens` was not enabled.
468    pub fn comment_spans(&self) -> impl Iterator<Item = CommentSpan> + use<'_> {
469        // SAFETY: self.raw is valid for 'a; the returned slice lives for 'a.
470        let raw: &[ffi::CComment] = unsafe { self.raw.as_ref().result_comments() };
471        raw.iter().map(|c| {
472            let kind = match c.kind {
473                ffi::CCommentKind::LineComment => CommentKind::Line,
474                ffi::CCommentKind::BlockComment => CommentKind::Block,
475            };
476            CommentSpan::new(c.offset, c.length, kind)
477        })
478    }
479
480    /// Extract reflective node data (`tag` + field values) for `id`.
481    pub fn extract_fields(
482        &self,
483        id: AnyNodeId,
484    ) -> Option<(AnyNodeTag, crate::ast::NodeFields<'a>)> {
485        let (ptr, tag) = self.node_ptr(id)?;
486        let mut fields = crate::ast::NodeFields::new();
487        for meta in self.grammar.field_meta(tag) {
488            // SAFETY: ptr is a valid arena node pointer valid for 'a;
489            // meta describes a field within that node's struct layout.
490            let val = unsafe { extract_field_value(ptr, &meta, self.source) };
491            fields.push(val);
492        }
493        Some((tag, fields))
494    }
495
496    /// Return child node IDs if `id` is a list node.
497    pub fn list_children(&self, id: AnyNodeId) -> Option<&'a [AnyNodeId]> {
498        let (_, tag) = self.node_ptr(id)?;
499        if !self.grammar.is_list(tag) {
500            return None;
501        }
502        #[expect(clippy::redundant_closure_for_method_calls)]
503        self.resolve_list(id).map(|l| l.children())
504    }
505
506    /// Iterate direct child node IDs for the node at `id`.
507    pub fn child_node_ids(&self, id: AnyNodeId) -> impl Iterator<Item = AnyNodeId> + '_ {
508        let mut out = Vec::new();
509        if let Some((_, fields)) = self.extract_fields(id) {
510            for i in 0..fields.len() {
511                if let crate::ast::FieldValue::NodeId(child_id) = fields[i] {
512                    if child_id.is_null() {
513                        continue;
514                    }
515                    if let Some(children) = self.list_children(child_id) {
516                        out.extend(children.iter().copied().filter(|id| !id.is_null()));
517                    } else {
518                        out.push(child_id);
519                    }
520                }
521            }
522        }
523        out.into_iter()
524    }
525
526    /// Resolve a `AnyNodeId` to a typed reference, validating the tag.
527    pub(crate) fn resolve_as<T: ArenaNode>(&self, id: AnyNodeId) -> Option<&'a T> {
528        let (ptr, tag) = self.node_ptr(id)?;
529        if tag.0 != T::TAG {
530            return None;
531        }
532        // SAFETY: tag matches T::TAG, confirming the arena node has type T.
533        // ptr is valid for 'a. T is #[repr(C)] with a u32 tag as its first
534        // field, matching the arena layout.
535        Some(unsafe { &*ptr.cast::<T>() })
536    }
537
538    /// Resolve a `AnyNodeId` as a [`RawNodeList`] (for list nodes).
539    pub(crate) fn resolve_list(&self, id: AnyNodeId) -> Option<&'a RawNodeList> {
540        let (ptr, _) = self.node_ptr(id)?;
541        // SAFETY: ptr is valid for 'a. List nodes have RawNodeList layout.
542        #[expect(clippy::cast_ptr_alignment)]
543        Some(unsafe { &*ptr.cast::<RawNodeList>() })
544    }
545
546    /// Get a raw pointer to a node in the arena. Returns `(pointer, tag)`.
547    pub(crate) fn node_ptr(&self, id: AnyNodeId) -> Option<(*const u8, AnyNodeTag)> {
548        if id.is_null() {
549            return None;
550        }
551        // SAFETY: self.raw is valid for 'a. The returned pointer is
552        // null-checked; all arena nodes start with a u32 tag.
553        unsafe {
554            let ptr = self.raw.as_ref().node(id.0);
555            if ptr.is_null() {
556                return None;
557            }
558            let tag = AnyNodeTag(*ptr);
559            Some((ptr.cast::<u8>(), tag))
560        }
561    }
562
563    /// Return the root node as an [`AnyNode`](crate::ast::AnyNode), or `None`
564    /// if the parse result has no root (e.g. empty input or fatal parse error).
565    ///
566    /// When the `serde` feature is enabled, the returned
567    /// [`AnyNode`](crate::ast::AnyNode) implements `serde::Serialize` using
568    /// the same structure as `dump_node`.
569    pub fn root_node(&self) -> Option<crate::ast::AnyNode<'_>> {
570        let id = self.root_id();
571        if id.is_null() {
572            return None;
573        }
574        Some(crate::ast::AnyNode {
575            id,
576            stmt_result: self,
577        })
578    }
579
580    /// Dump an AST node tree as indented text into `out`.
581    pub(crate) fn dump_node(&self, id: AnyNodeId, out: &mut String, indent: usize) {
582        unsafe extern "C" {
583            fn free(ptr: *mut std::ffi::c_void);
584        }
585        // SAFETY: raw is valid; dump_node returns a malloc'd NUL-terminated string.
586        #[expect(clippy::cast_possible_truncation)]
587        unsafe {
588            let ptr = self.raw.as_ref().dump_node(id.0, indent as u32);
589            if !ptr.is_null() {
590                out.push_str(&CStr::from_ptr(ptr).to_string_lossy());
591                free(ptr.cast::<std::ffi::c_void>());
592            }
593        }
594    }
595}
596
597/// Parse result for one statement from a [`TypedParseSession`].
598///
599/// Main hand-off point to:
600///
601/// - AST traversal (`root()`).
602/// - Token/comment-aware tooling (`tokens()`, `comments()`).
603/// - Grammar-agnostic pipelines (`erase()`).
604#[derive(Clone)]
605pub struct TypedParsedStatement<'a, G: TypedGrammar> {
606    pub(crate) any: AnyParsedStatement<'a>,
607    _marker: PhantomData<G>,
608}
609
610impl<'a, G: TypedGrammar> TypedParsedStatement<'a, G> {
611    /// Construct from raw parts.
612    ///
613    /// # Safety
614    /// `raw` must be a valid, non-null parser pointer that remains valid for `'a`.
615    pub(crate) unsafe fn new(raw: *mut CParser, source: &'a str, grammar: AnyGrammar) -> Self {
616        TypedParsedStatement {
617            any: AnyParsedStatement {
618                // SAFETY: caller guarantees raw is non-null.
619                raw: unsafe { NonNull::new_unchecked(raw) },
620                source,
621                grammar,
622            },
623            _marker: PhantomData,
624        }
625    }
626
627    /// Convert to the grammar-agnostic [`AnyParsedStatement`] view.
628    pub fn erase(self) -> AnyParsedStatement<'a> {
629        self.any
630    }
631
632    /// Typed AST root for this statement, if available.
633    ///
634    /// Borrows `self` for `'a` so that returned nodes can hold `&'a AnyParsedStatement<'a>`
635    /// without cloning. Drop the returned node to release the borrow.
636    pub fn root(&'a self) -> Option<G::Node<'a>> {
637        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
638        let id = AnyNodeId(unsafe { self.any.raw.as_ref().result_root() });
639        if id.is_null() {
640            return None;
641        }
642        G::Node::from_result(&self.any, id)
643    }
644
645    /// Dump the AST as indented text into `out`.
646    pub fn dump(&self, out: &mut String, indent: usize) {
647        self.any.dump_node(self.any.root_id(), out, indent);
648    }
649
650    /// Serialize the AST to a JSON string using the `serde-json` feature.
651    ///
652    /// The JSON structure mirrors the text dump format: nodes become
653    /// `{"type":"NodeName","field":value,...}` and lists become
654    /// `{"type":"ListName","count":N,"children":[...]}`.
655    ///
656    /// # Errors
657    /// Returns `Err` if JSON serialization fails.
658    #[cfg(feature = "serde-json")]
659    pub fn dump_json(&self) -> Result<String, serde_json::Error> {
660        serde_json::to_string(&self.any.root_node())
661    }
662
663    /// The source text bound to this result.
664    pub fn source(&self) -> &'a str {
665        self.any.source
666    }
667
668    /// Macro expansion call-site spans recorded during parsing.
669    ///
670    /// Each [`MacroRegion`] describes a byte range in the original source
671    /// that was identified as a macro invocation (e.g. `name!(args)`).
672    /// Populated automatically when the grammar's `macro_style` is set.
673    pub fn macro_regions(&self) -> impl Iterator<Item = MacroRegion> + use<'_, 'a, G> {
674        self.any.macro_regions()
675    }
676
677    /// Statement-local token stream for this parse result.
678    ///
679    /// Requires `collect_tokens: true` and skips unknown token ordinals for `G`.
680    pub fn tokens(&self) -> impl Iterator<Item = TypedParserToken<'a, G>> {
681        let source = self.any.source;
682        // SAFETY: self.any.raw is valid for 'a; the returned slice lives for 'a.
683        let raw: &'a [ffi::CParserToken] = unsafe { self.any.raw.as_ref().result_tokens() };
684        raw.iter().filter_map(move |t| {
685            let token_type = G::Token::from_token_type(AnyTokenType(t.type_))?;
686            let text = &source[t.offset as usize..(t.offset + t.length) as usize];
687            Some(TypedParserToken::new(
688                text,
689                token_type,
690                ParserTokenFlags::from_raw(t.flags),
691                t.offset,
692                t.length,
693            ))
694        })
695    }
696
697    /// Comments attached to this statement.
698    ///
699    /// Requires `collect_tokens: true` in [`ParserConfig`].
700    pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
701        let source = self.any.source;
702        // SAFETY: self.any.raw is valid for 'a; the returned slice lives for 'a.
703        let raw: &'a [ffi::CComment] = unsafe { self.any.raw.as_ref().result_comments() };
704        raw.iter().map(move |c| {
705            let text = &source[c.offset as usize..(c.offset + c.length) as usize];
706            let kind = match c.kind {
707                ffi::CCommentKind::LineComment => CommentKind::Line,
708                ffi::CCommentKind::BlockComment => CommentKind::Block,
709            };
710            Comment::new(text, kind, c.offset, c.length)
711        })
712    }
713
714    // ── Result accessors (mirror syntaqlite_result_*) ──────────────────────
715
716    /// Human-readable error message, or `None`.
717    pub(crate) fn error_msg(&self) -> Option<&str> {
718        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
719        unsafe {
720            let ptr = self.any.raw.as_ref().result_error_msg();
721            if ptr.is_null() {
722                None
723            } else {
724                Some(CStr::from_ptr(ptr).to_str().unwrap_or("parse error"))
725            }
726        }
727    }
728
729    /// Byte offset of the error token, or `None` if unknown.
730    pub(crate) fn error_offset(&self) -> Option<usize> {
731        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
732        let v = unsafe { self.any.raw.as_ref().result_error_offset() };
733        if v == 0xFFFF_FFFF {
734            None
735        } else {
736            Some(v as usize)
737        }
738    }
739
740    /// Byte length of the error token, or `None` if unknown.
741    pub(crate) fn error_length(&self) -> Option<usize> {
742        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
743        let v = unsafe { self.any.raw.as_ref().result_error_length() };
744        if v == 0 { None } else { Some(v as usize) }
745    }
746
747    /// Error classification for the current result.
748    pub(crate) fn error_kind(&self) -> ParseErrorKind {
749        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
750        let recovery_root = AnyNodeId(unsafe { self.any.raw.as_ref().result_recovery_root() });
751        if recovery_root.is_null() {
752            ParseErrorKind::Fatal
753        } else {
754            ParseErrorKind::Recovered
755        }
756    }
757
758    /// Typed recovery AST root for this statement, if available.
759    pub(crate) fn recovery_root(&'a self) -> Option<G::Node<'a>> {
760        // SAFETY: self.any.raw is a valid, non-null parser pointer for lifetime 'a.
761        let id = AnyNodeId(unsafe { self.any.raw.as_ref().result_recovery_root() });
762        if id.is_null() {
763            return None;
764        }
765        G::Node::from_result(&self.any, id)
766    }
767}
768
769/// Extract a single [`crate::ast::FieldValue`] from a raw arena node pointer.
770///
771/// # Safety
772/// `ptr` must point to a valid arena node struct whose field at `meta.offset()`
773/// has the type indicated by `meta.kind()`, and must be valid for lifetime `'a`.
774#[expect(clippy::cast_ptr_alignment)]
775unsafe fn extract_field_value<'a>(
776    ptr: *const u8,
777    meta: &crate::grammar::FieldMeta<'_>,
778    source: &'a str,
779) -> crate::ast::FieldValue<'a> {
780    use crate::ast::{FieldValue, SourceSpan};
781    use crate::grammar::FieldKind;
782    // SAFETY: covered by function-level contract; ptr and meta are consistent.
783    unsafe {
784        let field_ptr = ptr.add(meta.offset() as usize);
785        match meta.kind() {
786            FieldKind::NodeId => FieldValue::NodeId(AnyNodeId(*(field_ptr.cast::<u32>()))),
787            FieldKind::Span => {
788                let span = &*(field_ptr.cast::<SourceSpan>());
789                if span.length == 0 {
790                    FieldValue::Span("")
791                } else {
792                    FieldValue::Span(span.as_str(source))
793                }
794            }
795            FieldKind::Bool => FieldValue::Bool(*(field_ptr.cast::<u32>()) != 0),
796            FieldKind::Flags => FieldValue::Flags(*field_ptr),
797            FieldKind::Enum => FieldValue::Enum(*(field_ptr.cast::<u32>())),
798        }
799    }
800}
801
802/// Parse failure for a single statement in grammar `G`.
803///
804/// Designed for diagnostics:
805///
806/// - Message text (`message()`).
807/// - Optional source location (`offset()`, `length()`).
808/// - Severity/recovery status (`kind()`).
809/// - Optional recovery tree (`recovery_root()`).
810///
811/// Recovery model:
812///
813/// - `Recovered`: this statement is invalid, but the parser skipped ahead
814///   (usually to the next `;`) so it can continue with later statements.
815/// - The returned `recovery_root()` can still be useful for diagnostics, but may
816///   contain error placeholders where input was skipped.
817/// - `Fatal`: the parser could not find a safe point to continue from.
818pub struct TypedParseError<'a, G: TypedGrammar>(TypedParsedStatement<'a, G>);
819
820impl<'a, G: TypedGrammar> TypedParseError<'a, G> {
821    pub(crate) fn new(result: TypedParsedStatement<'a, G>) -> Self {
822        TypedParseError(result)
823    }
824
825    /// Whether parsing recovered to a statement boundary.
826    pub fn kind(&self) -> ParseErrorKind {
827        self.0.error_kind()
828    }
829
830    /// True if this error was recovered and yielded a partial tree.
831    pub fn is_recovered(&self) -> bool {
832        self.kind() == ParseErrorKind::Recovered
833    }
834
835    /// True if this error is fatal (unrecoverable).
836    pub fn is_fatal(&self) -> bool {
837        self.kind() == ParseErrorKind::Fatal
838    }
839
840    /// Human-readable diagnostic text.
841    pub fn message(&self) -> &str {
842        self.0.error_msg().unwrap_or("parse error")
843    }
844    /// Returns the byte offset of the error token, or `None` if unknown.
845    pub fn offset(&self) -> Option<usize> {
846        self.0.error_offset()
847    }
848    /// Returns the byte length of the error token, or `None` if unknown.
849    pub fn length(&self) -> Option<usize> {
850        self.0.error_length()
851    }
852    /// The partial recovery tree, if error recovery produced one.
853    pub fn recovery_root(&'a self) -> Option<G::Node<'a>> {
854        self.0.recovery_root()
855    }
856
857    /// The source text bound to this result.
858    pub fn parse_source(&self) -> &'a str {
859        self.0.source()
860    }
861
862    /// Tokens collected during the (partial) parse, if `collect_tokens` was enabled.
863    pub fn tokens(&self) -> impl Iterator<Item = TypedParserToken<'a, G>> {
864        self.0.tokens()
865    }
866
867    /// Comments collected during the (partial) parse, if `collect_tokens` was enabled.
868    pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
869        self.0.comments()
870    }
871}
872
873impl<G: TypedGrammar> std::fmt::Debug for TypedParseError<'_, G> {
874    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
875        f.debug_struct("TypedParseError")
876            .field("kind", &self.kind())
877            .field("message", &self.message())
878            .field("offset", &self.offset())
879            .field("length", &self.length())
880            .finish()
881    }
882}
883
884impl<G: TypedGrammar> std::fmt::Display for TypedParseError<'_, G> {
885    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
886        write!(f, "{}", self.message())
887    }
888}
889
890impl<G: TypedGrammar> std::error::Error for TypedParseError<'_, G> {}
891
892/// Parse-error alias for grammar-independent pipelines.
893pub type AnyParseError<'a> = TypedParseError<'a, AnyGrammar>;
894
895// ── Crate-internal ───────────────────────────────────────────────────────────
896
897/// Holds the C parser handle and mutable state. Checked out by sessions at
898/// runtime and returned on [`Drop`].
899pub(crate) struct ParserInner {
900    pub(crate) raw: NonNull<CParser>,
901    pub(crate) source_buf: Vec<u8>,
902}
903
904impl Drop for ParserInner {
905    fn drop(&mut self) {
906        // SAFETY: self.raw was allocated by CParser::create and has not been
907        // freed (Drop runs exactly once).
908        unsafe { CParser::destroy(self.raw.as_ptr()) }
909    }
910}
911
912/// Copy source into `source_buf` (with null terminator) and reset the C parser.
913///
914/// # Safety
915/// `raw` must be a valid parser pointer owned by the caller.
916pub(crate) unsafe fn reset_parser(raw: *mut CParser, source_buf: &mut Vec<u8>, source: &str) {
917    source_buf.clear();
918    source_buf.reserve(source.len() + 1);
919    source_buf.extend_from_slice(source.as_bytes());
920    source_buf.push(0);
921
922    // source_buf has at least one byte (the null terminator just pushed).
923    let c_source_ptr = source_buf.as_ptr();
924    // SAFETY: raw is valid (caller owns it); c_source_ptr points to
925    // source_buf which is null-terminated.
926    #[expect(clippy::cast_possible_truncation)]
927    unsafe {
928        (*raw).reset(c_source_ptr.cast(), source.len() as u32);
929    }
930}
931
932pub(crate) use ffi::CParser;