Skip to main content

syntaqlite_syntax/parser/
incremental.rs

1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4use std::cell::RefCell;
5use std::marker::PhantomData;
6use std::ops::Range;
7use std::ptr::NonNull;
8use std::rc::Rc;
9
10use crate::ast::GrammarTokenType;
11use crate::grammar::{AnyGrammar, TypedGrammar};
12
13use super::{
14    AnyParsedStatement, CParser, CompletionContext, ParserInner, TypedParseError,
15    TypedParsedStatement, ffi,
16};
17#[cfg(feature = "sqlite")]
18use super::{ParseError, ParsedStatement};
19
20/// Incremental parser state machine for grammar `G`.
21///
22/// Use this for interactive/editor workflows where input arrives token by
23/// token and you need expected-token or completion-context feedback.
24///
25/// Obtained from [`super::TypedParser::incremental_parse`].
26pub struct TypedIncrementalParseSession<G: TypedGrammar> {
27    /// Base pointer into the internal source buffer. `feed_token` uses this
28    /// to compute the C-side token pointer from byte-offset spans.
29    c_source_ptr: NonNull<u8>,
30    grammar: AnyGrammar,
31    /// Checked-out parser state. Returned to `slot` on drop.
32    inner: Option<ParserInner>,
33    /// Slot to return `inner` to when this session is dropped.
34    slot: Rc<RefCell<Option<ParserInner>>>,
35    finished: bool,
36    _marker: PhantomData<G>,
37}
38
39impl<G: TypedGrammar> Drop for TypedIncrementalParseSession<G> {
40    fn drop(&mut self) {
41        if let Some(inner) = self.inner.take() {
42            *self.slot.borrow_mut() = Some(inner);
43        }
44    }
45}
46
47impl<G: TypedGrammar> TypedIncrementalParseSession<G> {
48    pub(crate) fn new(
49        c_source_ptr: NonNull<u8>,
50        grammar: AnyGrammar,
51        inner: ParserInner,
52        slot: Rc<RefCell<Option<ParserInner>>>,
53    ) -> Self {
54        TypedIncrementalParseSession {
55            c_source_ptr,
56            grammar,
57            inner: Some(inner),
58            slot,
59            finished: false,
60            _marker: PhantomData,
61        }
62    }
63
64    fn assert_not_finished(&self) {
65        assert!(
66            !self.finished,
67            "TypedIncrementalParseSession used after finish()"
68        );
69    }
70
71    fn raw_ptr(&self) -> *mut CParser {
72        self.inner
73            .as_ref()
74            .expect("inner taken after finish()")
75            .raw
76            .as_ptr()
77    }
78
79    fn typed_stmt_result(&self) -> TypedParsedStatement<'_, G> {
80        let inner = self.inner.as_ref().expect("inner taken after finish()");
81        let source_len = inner.source_buf.len().saturating_sub(1);
82        // SAFETY: source_buf was populated from valid UTF-8 (&str) in
83        // reset_parser. The first source_len bytes are the original source.
84        let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
85        // SAFETY: inner.raw is valid (owned via ParserInner, not yet destroyed).
86        unsafe { TypedParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) }
87    }
88
89    fn result_from_rc(
90        &self,
91        rc: i32,
92    ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
93        if rc == 0 {
94            return None;
95        }
96        let result = self.typed_stmt_result();
97        if rc == 1 {
98            Some(Ok(result))
99        } else {
100            Some(Err(TypedParseError::new(result)))
101        }
102    }
103
104    /// Feed one token from the bound source into the parser.
105    ///
106    /// Whitespace/comments are handled automatically; callers can focus on
107    /// meaningful tokens and source spans.
108    ///
109    /// Returns:
110    /// - `None` — keep going, statement not yet complete.
111    /// - `Some(Ok(result))` — statement parsed cleanly; use
112    ///   [`TypedParsedStatement::root`] to access the typed AST.
113    /// - `Some(Err(err))` — parse error; `err.recovery_root()` may contain a partial
114    ///   recovery tree.
115    ///
116    /// `span` is a byte range into the source text bound by this session.
117    /// `token_type` is the grammar's typed token enum.
118    ///
119    /// # Examples
120    ///
121    /// ```rust
122    /// use syntaqlite_syntax::typed::{grammar, TypedParser};
123    /// use syntaqlite_syntax::TokenType;
124    ///
125    /// let parser = TypedParser::new(grammar());
126    /// let mut session = parser.incremental_parse("SELECT 1");
127    ///
128    /// assert!(session.feed_token(TokenType::Select, 0..6).is_none());
129    /// assert!(session.feed_token(TokenType::Integer, 7..8).is_none());
130    /// assert!(session.finish().is_some());
131    /// ```
132    pub fn feed_token(
133        &mut self,
134        token_type: G::Token,
135        span: Range<usize>,
136    ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
137        self.assert_not_finished();
138        // SAFETY: c_source_ptr is valid for the source length; raw is valid.
139        let rc = unsafe {
140            let c_text = self.c_source_ptr.as_ptr().add(span.start);
141            let raw_token_type: u32 = token_type.into();
142            #[expect(clippy::cast_possible_truncation)]
143            (*self.raw_ptr()).feed_token(raw_token_type, c_text as *const _, span.len() as u32)
144        };
145        self.result_from_rc(rc)
146    }
147
148    /// Finalize parsing for the current input and flush any pending statement.
149    ///
150    /// Returns:
151    /// - `None` — nothing was pending (empty input or bare semicolons only).
152    /// - `Some(Ok(result))` — final statement parsed cleanly.
153    /// - `Some(Err(err))` — parse error; `err.recovery_root()` may contain a partial
154    ///   recovery tree.
155    ///
156    /// No further methods may be called after `finish()`.
157    pub fn finish(
158        &mut self,
159    ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
160        self.assert_not_finished();
161        self.finished = true;
162        // SAFETY: raw is valid.
163        let rc = unsafe { (*self.raw_ptr()).finish() };
164        self.result_from_rc(rc)
165    }
166
167    /// Return token types that are currently valid next inputs.
168    ///
169    /// Useful for completion engines after feeding known prefix tokens.
170    ///
171    /// # Examples
172    ///
173    /// ```rust
174    /// use syntaqlite_syntax::typed::{grammar, TypedParser};
175    /// use syntaqlite_syntax::TokenType;
176    ///
177    /// let parser = TypedParser::new(grammar());
178    /// let mut session = parser.incremental_parse("SELECT x FROM t");
179    /// let _ = session.feed_token(TokenType::Select, 0..6);
180    ///
181    /// let expected: Vec<_> = session.expected_tokens().collect();
182    /// assert!(!expected.is_empty());
183    /// ```
184    pub fn expected_tokens(&self) -> impl Iterator<Item = <G as TypedGrammar>::Token> {
185        self.assert_not_finished();
186        let raw = self.raw_ptr();
187        let mut stack_buf = [0u32; 256];
188        // SAFETY: raw is valid and exclusively borrowed via &self; stack_buf is
189        // a valid output buffer.
190        #[expect(clippy::cast_possible_truncation)]
191        let total =
192            unsafe { (*raw).expected_tokens(stack_buf.as_mut_ptr(), stack_buf.len() as u32) };
193        let raw_tokens: Vec<u32> = if total == 0 {
194            Vec::new()
195        } else {
196            let count = total as usize;
197            if count <= stack_buf.len() {
198                stack_buf[..count].to_vec()
199            } else {
200                let mut heap_buf = vec![0u32; count];
201                // SAFETY: raw is valid; heap_buf is sized to hold `total` entries.
202                let written = unsafe { (*raw).expected_tokens(heap_buf.as_mut_ptr(), total) };
203                let len = written.clamp(0, total) as usize;
204                heap_buf.truncate(len);
205                heap_buf
206            }
207        };
208        raw_tokens
209            .into_iter()
210            .map(crate::any::AnyTokenType)
211            .filter_map(<G as TypedGrammar>::Token::from_token_type)
212    }
213
214    /// Return the semantic completion context for the current parser state.
215    pub fn completion_context(&self) -> CompletionContext {
216        self.assert_not_finished();
217        // SAFETY: raw is valid and exclusively borrowed via &self.
218        unsafe { (*self.raw_ptr()).completion_context() }
219    }
220
221    /// Return how many arena nodes have been built so far.
222    pub fn node_count(&self) -> u32 {
223        // SAFETY: raw is valid and exclusively borrowed via &self.
224        unsafe { (*self.raw_ptr()).node_count() }
225    }
226
227    /// Mark subsequent fed tokens as originating from a macro expansion.
228    ///
229    /// `span` describes the macro call's byte range in the original source.
230    /// Calls may nest (for nested macro expansions).
231    ///
232    /// # Panics
233    ///
234    /// Panics if `span.start` or `span.len()` does not fit in `u32`.
235    pub fn begin_macro(&mut self, span: Range<usize>) {
236        self.assert_not_finished();
237        let call_offset = u32::try_from(span.start).expect("macro span start exceeds u32");
238        let call_length = u32::try_from(span.len()).expect("macro span length exceeds u32");
239        // SAFETY: raw is valid and exclusively borrowed via &mut self.
240        unsafe { (*self.raw_ptr()).begin_macro(call_offset, call_length) }
241    }
242
243    /// End the innermost macro expansion region.
244    pub fn end_macro(&mut self) {
245        self.assert_not_finished();
246        // SAFETY: raw is valid and exclusively borrowed via &mut self.
247        unsafe { (*self.raw_ptr()).end_macro() }
248    }
249
250    pub(crate) fn stmt_result(&self) -> AnyParsedStatement<'_> {
251        self.typed_stmt_result().erase()
252    }
253
254    pub(crate) fn comments(&self) -> &[ffi::CComment] {
255        // SAFETY: raw is valid (owned via ParserInner, valid for &self).
256        unsafe { (*self.raw_ptr()).result_comments() }
257    }
258
259    pub(crate) fn tokens(&self) -> &[ffi::CParserToken] {
260        // SAFETY: raw is valid (owned via ParserInner, valid for &self).
261        unsafe { (*self.raw_ptr()).result_tokens() }
262    }
263
264    pub(crate) fn macro_regions(&self) -> &[ffi::CMacroRegion] {
265        // SAFETY: raw is valid (owned via ParserInner, valid for &self).
266        unsafe { (*self.raw_ptr()).result_macros() }
267    }
268}
269
270/// Type-erased incremental parser for runtime-selected grammars.
271pub type AnyIncrementalParseSession = TypedIncrementalParseSession<AnyGrammar>;
272
273/// Incremental parsing API for the built-in `SQLite` grammar.
274///
275/// Produced by [`super::Parser::incremental_parse`].
276///
277/// Feed tokens one at a time via [`feed_token`](Self::feed_token) and signal
278/// end of input with [`finish`](Self::finish).
279///
280/// Ideal for editor-like flows that parse as the user types.
281#[cfg(feature = "sqlite")]
282pub struct IncrementalParseSession(TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>);
283
284#[cfg(feature = "sqlite")]
285impl IncrementalParseSession {
286    /// Feed one source token into the parser.
287    ///
288    /// Returns:
289    /// - `None` — keep going, statement not yet complete.
290    /// - `Some(Ok(result))` — statement parsed cleanly.
291    /// - `Some(Err(e))` — parse error; `e.recovery_root()` may contain a partial
292    ///   recovery tree.
293    ///
294    /// - `span` is a byte range into the source text bound by this session.
295    ///
296    /// # Examples
297    ///
298    /// ```rust
299    /// use syntaqlite_syntax::{Parser, TokenType};
300    ///
301    /// let parser = Parser::new();
302    /// let mut session = parser.incremental_parse("SELECT 1");
303    ///
304    /// assert!(session.feed_token(TokenType::Select, 0..6).is_none());
305    /// assert!(session.feed_token(TokenType::Integer, 7..8).is_none());
306    /// ```
307    pub fn feed_token(
308        &mut self,
309        token_type: crate::sqlite::tokens::TokenType,
310        span: Range<usize>,
311    ) -> Option<Result<ParsedStatement<'_>, ParseError<'_>>> {
312        Some(match self.0.feed_token(token_type, span)? {
313            Ok(result) => Ok(ParsedStatement(result)),
314            Err(err) => Err(ParseError(err)),
315        })
316    }
317
318    /// Finalize parsing for the current input.
319    ///
320    /// Returns:
321    /// - `None` — nothing was pending.
322    /// - `Some(Ok(result))` — final statement parsed cleanly.
323    /// - `Some(Err(e))` — parse error; `e.recovery_root()` may contain a partial
324    ///   recovery tree.
325    ///
326    /// No further methods may be called after `finish()`.
327    ///
328    /// # Examples
329    ///
330    /// ```rust
331    /// use syntaqlite_syntax::{Parser, TokenType};
332    ///
333    /// let parser = Parser::new();
334    /// let mut session = parser.incremental_parse("SELECT 1");
335    /// let _ = session.feed_token(TokenType::Select, 0..6);
336    /// let _ = session.feed_token(TokenType::Integer, 7..8);
337    ///
338    /// let stmt = session.finish().and_then(Result::ok).unwrap();
339    /// let _ = stmt.root();
340    /// ```
341    pub fn finish(&mut self) -> Option<Result<ParsedStatement<'_>, ParseError<'_>>> {
342        Some(match self.0.finish()? {
343            Ok(result) => Ok(ParsedStatement(result)),
344            Err(err) => Err(ParseError(err)),
345        })
346    }
347
348    /// Return token types that are currently valid lookaheads.
349    pub fn expected_tokens(&self) -> impl Iterator<Item = crate::sqlite::tokens::TokenType> {
350        self.0.expected_tokens()
351    }
352
353    /// Return the semantic completion context for the current parser state.
354    pub fn completion_context(&self) -> CompletionContext {
355        self.0.completion_context()
356    }
357
358    /// Return how many arena nodes have been built so far.
359    pub fn node_count(&self) -> u32 {
360        self.0.node_count()
361    }
362
363    /// Mark subsequent fed tokens as originating from a macro expansion.
364    pub fn begin_macro(&mut self, span: Range<usize>) {
365        self.0.begin_macro(span);
366    }
367
368    /// End the innermost macro expansion region.
369    pub fn end_macro(&mut self) {
370        self.0.end_macro();
371    }
372
373    #[expect(dead_code)]
374    pub(crate) fn stmt_result(&self) -> AnyParsedStatement<'_> {
375        self.0.stmt_result()
376    }
377
378    #[expect(dead_code)]
379    pub(crate) fn comments(&self) -> &[ffi::CComment] {
380        self.0.comments()
381    }
382
383    #[expect(dead_code)]
384    pub(crate) fn tokens(&self) -> &[ffi::CParserToken] {
385        self.0.tokens()
386    }
387
388    #[expect(dead_code)]
389    pub(crate) fn macro_regions(&self) -> &[ffi::CMacroRegion] {
390        self.0.macro_regions()
391    }
392}
393
394#[cfg(feature = "sqlite")]
395impl From<TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>>
396    for IncrementalParseSession
397{
398    fn from(inner: TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>) -> Self {
399        IncrementalParseSession(inner)
400    }
401}