syntaqlite_syntax/parser/incremental.rs
1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4use std::cell::RefCell;
5use std::marker::PhantomData;
6use std::ops::Range;
7use std::ptr::NonNull;
8use std::rc::Rc;
9
10use crate::ast::GrammarTokenType;
11use crate::grammar::{AnyGrammar, TypedGrammar};
12
13use super::{
14 AnyParsedStatement, CParser, CompletionContext, ParserInner, TypedParseError,
15 TypedParsedStatement, ffi,
16};
17#[cfg(feature = "sqlite")]
18use super::{ParseError, ParsedStatement};
19
20/// Incremental parser state machine for grammar `G`.
21///
22/// Use this for interactive/editor workflows where input arrives token by
23/// token and you need expected-token or completion-context feedback.
24///
25/// Obtained from [`super::TypedParser::incremental_parse`].
26pub struct TypedIncrementalParseSession<G: TypedGrammar> {
27 /// Base pointer into the internal source buffer. `feed_token` uses this
28 /// to compute the C-side token pointer from byte-offset spans.
29 c_source_ptr: NonNull<u8>,
30 grammar: AnyGrammar,
31 /// Checked-out parser state. Returned to `slot` on drop.
32 inner: Option<ParserInner>,
33 /// Slot to return `inner` to when this session is dropped.
34 slot: Rc<RefCell<Option<ParserInner>>>,
35 finished: bool,
36 _marker: PhantomData<G>,
37}
38
39impl<G: TypedGrammar> Drop for TypedIncrementalParseSession<G> {
40 fn drop(&mut self) {
41 if let Some(inner) = self.inner.take() {
42 *self.slot.borrow_mut() = Some(inner);
43 }
44 }
45}
46
47impl<G: TypedGrammar> TypedIncrementalParseSession<G> {
48 pub(crate) fn new(
49 c_source_ptr: NonNull<u8>,
50 grammar: AnyGrammar,
51 inner: ParserInner,
52 slot: Rc<RefCell<Option<ParserInner>>>,
53 ) -> Self {
54 TypedIncrementalParseSession {
55 c_source_ptr,
56 grammar,
57 inner: Some(inner),
58 slot,
59 finished: false,
60 _marker: PhantomData,
61 }
62 }
63
64 fn assert_not_finished(&self) {
65 assert!(
66 !self.finished,
67 "TypedIncrementalParseSession used after finish()"
68 );
69 }
70
71 fn raw_ptr(&self) -> *mut CParser {
72 self.inner
73 .as_ref()
74 .expect("inner taken after finish()")
75 .raw
76 .as_ptr()
77 }
78
79 fn typed_stmt_result(&self) -> TypedParsedStatement<'_, G> {
80 let inner = self.inner.as_ref().expect("inner taken after finish()");
81 let source_len = inner.source_buf.len().saturating_sub(1);
82 // SAFETY: source_buf was populated from valid UTF-8 (&str) in
83 // reset_parser. The first source_len bytes are the original source.
84 let source = unsafe { std::str::from_utf8_unchecked(&inner.source_buf[..source_len]) };
85 // SAFETY: inner.raw is valid (owned via ParserInner, not yet destroyed).
86 unsafe { TypedParsedStatement::new(inner.raw.as_ptr(), source, self.grammar.clone()) }
87 }
88
89 fn result_from_rc(
90 &self,
91 rc: i32,
92 ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
93 if rc == 0 {
94 return None;
95 }
96 let result = self.typed_stmt_result();
97 if rc == 1 {
98 Some(Ok(result))
99 } else {
100 Some(Err(TypedParseError::new(result)))
101 }
102 }
103
104 /// Feed one token from the bound source into the parser.
105 ///
106 /// Whitespace/comments are handled automatically; callers can focus on
107 /// meaningful tokens and source spans.
108 ///
109 /// Returns:
110 /// - `None` — keep going, statement not yet complete.
111 /// - `Some(Ok(result))` — statement parsed cleanly; use
112 /// [`TypedParsedStatement::root`] to access the typed AST.
113 /// - `Some(Err(err))` — parse error; `err.recovery_root()` may contain a partial
114 /// recovery tree.
115 ///
116 /// `span` is a byte range into the source text bound by this session.
117 /// `token_type` is the grammar's typed token enum.
118 ///
119 /// # Examples
120 ///
121 /// ```rust
122 /// use syntaqlite_syntax::typed::{grammar, TypedParser};
123 /// use syntaqlite_syntax::TokenType;
124 ///
125 /// let parser = TypedParser::new(grammar());
126 /// let mut session = parser.incremental_parse("SELECT 1");
127 ///
128 /// assert!(session.feed_token(TokenType::Select, 0..6).is_none());
129 /// assert!(session.feed_token(TokenType::Integer, 7..8).is_none());
130 /// assert!(session.finish().is_some());
131 /// ```
132 pub fn feed_token(
133 &mut self,
134 token_type: G::Token,
135 span: Range<usize>,
136 ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
137 self.assert_not_finished();
138 // SAFETY: c_source_ptr is valid for the source length; raw is valid.
139 let rc = unsafe {
140 let c_text = self.c_source_ptr.as_ptr().add(span.start);
141 let raw_token_type: u32 = token_type.into();
142 #[expect(clippy::cast_possible_truncation)]
143 (*self.raw_ptr()).feed_token(raw_token_type, c_text as *const _, span.len() as u32)
144 };
145 self.result_from_rc(rc)
146 }
147
148 /// Finalize parsing for the current input and flush any pending statement.
149 ///
150 /// Returns:
151 /// - `None` — nothing was pending (empty input or bare semicolons only).
152 /// - `Some(Ok(result))` — final statement parsed cleanly.
153 /// - `Some(Err(err))` — parse error; `err.recovery_root()` may contain a partial
154 /// recovery tree.
155 ///
156 /// No further methods may be called after `finish()`.
157 pub fn finish(
158 &mut self,
159 ) -> Option<Result<TypedParsedStatement<'_, G>, TypedParseError<'_, G>>> {
160 self.assert_not_finished();
161 self.finished = true;
162 // SAFETY: raw is valid.
163 let rc = unsafe { (*self.raw_ptr()).finish() };
164 self.result_from_rc(rc)
165 }
166
167 /// Return token types that are currently valid next inputs.
168 ///
169 /// Useful for completion engines after feeding known prefix tokens.
170 ///
171 /// # Examples
172 ///
173 /// ```rust
174 /// use syntaqlite_syntax::typed::{grammar, TypedParser};
175 /// use syntaqlite_syntax::TokenType;
176 ///
177 /// let parser = TypedParser::new(grammar());
178 /// let mut session = parser.incremental_parse("SELECT x FROM t");
179 /// let _ = session.feed_token(TokenType::Select, 0..6);
180 ///
181 /// let expected: Vec<_> = session.expected_tokens().collect();
182 /// assert!(!expected.is_empty());
183 /// ```
184 pub fn expected_tokens(&self) -> impl Iterator<Item = <G as TypedGrammar>::Token> {
185 self.assert_not_finished();
186 let raw = self.raw_ptr();
187 let mut stack_buf = [0u32; 256];
188 // SAFETY: raw is valid and exclusively borrowed via &self; stack_buf is
189 // a valid output buffer.
190 #[expect(clippy::cast_possible_truncation)]
191 let total =
192 unsafe { (*raw).expected_tokens(stack_buf.as_mut_ptr(), stack_buf.len() as u32) };
193 let raw_tokens: Vec<u32> = if total == 0 {
194 Vec::new()
195 } else {
196 let count = total as usize;
197 if count <= stack_buf.len() {
198 stack_buf[..count].to_vec()
199 } else {
200 let mut heap_buf = vec![0u32; count];
201 // SAFETY: raw is valid; heap_buf is sized to hold `total` entries.
202 let written = unsafe { (*raw).expected_tokens(heap_buf.as_mut_ptr(), total) };
203 let len = written.clamp(0, total) as usize;
204 heap_buf.truncate(len);
205 heap_buf
206 }
207 };
208 raw_tokens
209 .into_iter()
210 .map(crate::any::AnyTokenType)
211 .filter_map(<G as TypedGrammar>::Token::from_token_type)
212 }
213
214 /// Return the semantic completion context for the current parser state.
215 pub fn completion_context(&self) -> CompletionContext {
216 self.assert_not_finished();
217 // SAFETY: raw is valid and exclusively borrowed via &self.
218 unsafe { (*self.raw_ptr()).completion_context() }
219 }
220
221 /// Return how many arena nodes have been built so far.
222 pub fn node_count(&self) -> u32 {
223 // SAFETY: raw is valid and exclusively borrowed via &self.
224 unsafe { (*self.raw_ptr()).node_count() }
225 }
226
227 /// Mark subsequent fed tokens as originating from a macro expansion.
228 ///
229 /// `span` describes the macro call's byte range in the original source.
230 /// Calls may nest (for nested macro expansions).
231 ///
232 /// # Panics
233 ///
234 /// Panics if `span.start` or `span.len()` does not fit in `u32`.
235 pub fn begin_macro(&mut self, span: Range<usize>) {
236 self.assert_not_finished();
237 let call_offset = u32::try_from(span.start).expect("macro span start exceeds u32");
238 let call_length = u32::try_from(span.len()).expect("macro span length exceeds u32");
239 // SAFETY: raw is valid and exclusively borrowed via &mut self.
240 unsafe { (*self.raw_ptr()).begin_macro(call_offset, call_length) }
241 }
242
243 /// End the innermost macro expansion region.
244 pub fn end_macro(&mut self) {
245 self.assert_not_finished();
246 // SAFETY: raw is valid and exclusively borrowed via &mut self.
247 unsafe { (*self.raw_ptr()).end_macro() }
248 }
249
250 pub(crate) fn stmt_result(&self) -> AnyParsedStatement<'_> {
251 self.typed_stmt_result().erase()
252 }
253
254 pub(crate) fn comments(&self) -> &[ffi::CComment] {
255 // SAFETY: raw is valid (owned via ParserInner, valid for &self).
256 unsafe { (*self.raw_ptr()).result_comments() }
257 }
258
259 pub(crate) fn tokens(&self) -> &[ffi::CParserToken] {
260 // SAFETY: raw is valid (owned via ParserInner, valid for &self).
261 unsafe { (*self.raw_ptr()).result_tokens() }
262 }
263
264 pub(crate) fn macro_regions(&self) -> &[ffi::CMacroRegion] {
265 // SAFETY: raw is valid (owned via ParserInner, valid for &self).
266 unsafe { (*self.raw_ptr()).result_macros() }
267 }
268}
269
270/// Type-erased incremental parser for runtime-selected grammars.
271pub type AnyIncrementalParseSession = TypedIncrementalParseSession<AnyGrammar>;
272
273/// Incremental parsing API for the built-in `SQLite` grammar.
274///
275/// Produced by [`super::Parser::incremental_parse`].
276///
277/// Feed tokens one at a time via [`feed_token`](Self::feed_token) and signal
278/// end of input with [`finish`](Self::finish).
279///
280/// Ideal for editor-like flows that parse as the user types.
281#[cfg(feature = "sqlite")]
282pub struct IncrementalParseSession(TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>);
283
284#[cfg(feature = "sqlite")]
285impl IncrementalParseSession {
286 /// Feed one source token into the parser.
287 ///
288 /// Returns:
289 /// - `None` — keep going, statement not yet complete.
290 /// - `Some(Ok(result))` — statement parsed cleanly.
291 /// - `Some(Err(e))` — parse error; `e.recovery_root()` may contain a partial
292 /// recovery tree.
293 ///
294 /// - `span` is a byte range into the source text bound by this session.
295 ///
296 /// # Examples
297 ///
298 /// ```rust
299 /// use syntaqlite_syntax::{Parser, TokenType};
300 ///
301 /// let parser = Parser::new();
302 /// let mut session = parser.incremental_parse("SELECT 1");
303 ///
304 /// assert!(session.feed_token(TokenType::Select, 0..6).is_none());
305 /// assert!(session.feed_token(TokenType::Integer, 7..8).is_none());
306 /// ```
307 pub fn feed_token(
308 &mut self,
309 token_type: crate::sqlite::tokens::TokenType,
310 span: Range<usize>,
311 ) -> Option<Result<ParsedStatement<'_>, ParseError<'_>>> {
312 Some(match self.0.feed_token(token_type, span)? {
313 Ok(result) => Ok(ParsedStatement(result)),
314 Err(err) => Err(ParseError(err)),
315 })
316 }
317
318 /// Finalize parsing for the current input.
319 ///
320 /// Returns:
321 /// - `None` — nothing was pending.
322 /// - `Some(Ok(result))` — final statement parsed cleanly.
323 /// - `Some(Err(e))` — parse error; `e.recovery_root()` may contain a partial
324 /// recovery tree.
325 ///
326 /// No further methods may be called after `finish()`.
327 ///
328 /// # Examples
329 ///
330 /// ```rust
331 /// use syntaqlite_syntax::{Parser, TokenType};
332 ///
333 /// let parser = Parser::new();
334 /// let mut session = parser.incremental_parse("SELECT 1");
335 /// let _ = session.feed_token(TokenType::Select, 0..6);
336 /// let _ = session.feed_token(TokenType::Integer, 7..8);
337 ///
338 /// let stmt = session.finish().and_then(Result::ok).unwrap();
339 /// let _ = stmt.root();
340 /// ```
341 pub fn finish(&mut self) -> Option<Result<ParsedStatement<'_>, ParseError<'_>>> {
342 Some(match self.0.finish()? {
343 Ok(result) => Ok(ParsedStatement(result)),
344 Err(err) => Err(ParseError(err)),
345 })
346 }
347
348 /// Return token types that are currently valid lookaheads.
349 pub fn expected_tokens(&self) -> impl Iterator<Item = crate::sqlite::tokens::TokenType> {
350 self.0.expected_tokens()
351 }
352
353 /// Return the semantic completion context for the current parser state.
354 pub fn completion_context(&self) -> CompletionContext {
355 self.0.completion_context()
356 }
357
358 /// Return how many arena nodes have been built so far.
359 pub fn node_count(&self) -> u32 {
360 self.0.node_count()
361 }
362
363 /// Mark subsequent fed tokens as originating from a macro expansion.
364 pub fn begin_macro(&mut self, span: Range<usize>) {
365 self.0.begin_macro(span);
366 }
367
368 /// End the innermost macro expansion region.
369 pub fn end_macro(&mut self) {
370 self.0.end_macro();
371 }
372
373 #[expect(dead_code)]
374 pub(crate) fn stmt_result(&self) -> AnyParsedStatement<'_> {
375 self.0.stmt_result()
376 }
377
378 #[expect(dead_code)]
379 pub(crate) fn comments(&self) -> &[ffi::CComment] {
380 self.0.comments()
381 }
382
383 #[expect(dead_code)]
384 pub(crate) fn tokens(&self) -> &[ffi::CParserToken] {
385 self.0.tokens()
386 }
387
388 #[expect(dead_code)]
389 pub(crate) fn macro_regions(&self) -> &[ffi::CMacroRegion] {
390 self.0.macro_regions()
391 }
392}
393
394#[cfg(feature = "sqlite")]
395impl From<TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>>
396 for IncrementalParseSession
397{
398 fn from(inner: TypedIncrementalParseSession<crate::sqlite::grammar::Grammar>) -> Self {
399 IncrementalParseSession(inner)
400 }
401}