Skip to main content

syntaqlite_syntax/parser/
session.rs

1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4#[cfg(feature = "sqlite")]
5use super::{
6    AnyParsedStatement, Comment, IncrementalParseSession, ParseErrorKind, ParseOutcome,
7    ParserConfig, ParserTokenFlags, TypedParseError, TypedParseSession, TypedParsedStatement,
8    TypedParser, TypedParserToken,
9};
10
11/// High-level entry point for parsing `SQLite` SQL into typed AST statements.
12///
13/// Use this in most applications.
14///
15/// - Hides grammar setup and returns SQLite SQL-native result types.
16/// - Reusable across many SQL inputs.
17/// - Supports batch/script parsing via [`parse`](Self::parse).
18/// - Supports editor-style token feeds via [`incremental_parse`](Self::incremental_parse).
19///
20/// Advanced generic APIs exist in [`crate::typed`] and [`crate::any`].
21#[cfg(feature = "sqlite")]
22#[doc(hidden)]
23pub struct Parser(pub(super) TypedParser<crate::sqlite::grammar::Grammar>);
24
25#[cfg(feature = "sqlite")]
26impl Parser {
27    /// Create a parser for the `SQLite` grammar with default configuration.
28    pub fn new() -> Self {
29        Parser(TypedParser::new(crate::sqlite::grammar::grammar()))
30    }
31
32    /// Create a parser for the `SQLite` grammar with custom configuration.
33    pub fn with_config(config: &ParserConfig) -> Self {
34        Parser(TypedParser::with_config(
35            crate::sqlite::grammar::grammar(),
36            config,
37        ))
38    }
39
40    /// Register a template macro with the parser.
41    ///
42    /// The macro body uses `$param` placeholders (e.g. `"$x + 1"`).
43    /// All strings are copied; the caller may free them after this call returns.
44    pub fn register_macro(&mut self, name: &str, params: &[&str], body: &str) {
45        self.0.register_macro(name, params, body);
46    }
47
48    /// Deregister a macro by name. Returns `true` if it was found and removed.
49    pub fn deregister_macro(&mut self, name: &str) -> bool {
50        self.0.deregister_macro(name)
51    }
52
53    /// Parse a SQL script and return a statement-by-statement session.
54    ///
55    /// # Examples
56    ///
57    /// ```rust
58    /// use syntaqlite_syntax::{ParseErrorKind, Parser};
59    ///
60    /// let parser = Parser::new();
61    /// let mut session = parser.parse("SELECT 1; SELECT FROM;");
62    /// let mut ok_count = 0;
63    ///
64    /// loop {
65    ///     match session.next() {
66    ///         syntaqlite_syntax::ParseOutcome::Ok(stmt) => {
67    ///             ok_count += 1;
68    ///             let _ = stmt.root();
69    ///         }
70    ///         syntaqlite_syntax::ParseOutcome::Err(err) => {
71    ///             assert!(!err.message().is_empty());
72    ///             if err.kind() == ParseErrorKind::Fatal {
73    ///                 break;
74    ///             }
75    ///         }
76    ///         syntaqlite_syntax::ParseOutcome::Done => break,
77    ///     }
78    /// }
79    ///
80    /// assert!(ok_count >= 1);
81    /// ```
82    ///
83    /// # Panics
84    ///
85    /// Panics if another session from this parser is still active.
86    /// Drop the previous session before starting a new one.
87    pub fn parse(&self, source: &str) -> ParseSession {
88        ParseSession(self.0.parse(source))
89    }
90
91    /// Start an incremental parse session for token-by-token input.
92    ///
93    /// This mode is intended for IDEs, completion engines, and other workflows
94    /// where SQL is consumed progressively.
95    ///
96    /// # Examples
97    ///
98    /// ```rust
99    /// use syntaqlite_syntax::{Parser, TokenType};
100    ///
101    /// let parser = Parser::new();
102    /// let mut session = parser.incremental_parse("SELECT 1");
103    ///
104    /// assert!(session.feed_token(TokenType::Select, 0..6).is_none());
105    /// assert!(session.feed_token(TokenType::Integer, 7..8).is_none());
106    ///
107    /// let stmt = session.finish().and_then(Result::ok).unwrap();
108    /// let _ = stmt.root();
109    /// ```
110    ///
111    /// # Panics
112    ///
113    /// Panics if another session from this parser is still active.
114    /// Drop the previous session before starting a new one.
115    pub fn incremental_parse(&self, source: &str) -> IncrementalParseSession {
116        self.0.incremental_parse(source).into()
117    }
118}
119
120#[cfg(feature = "sqlite")]
121impl Default for Parser {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127/// Cursor over statements parsed from one SQL source string.
128///
129/// Useful for SQL scripts containing multiple statements.
130///
131/// - Returns one statement at a time via [`next`](Self::next).
132/// - Reports errors per statement instead of failing the whole script immediately.
133/// - Can continue after recoverable errors.
134#[cfg(feature = "sqlite")]
135#[doc(hidden)]
136pub struct ParseSession(pub(super) TypedParseSession<crate::sqlite::grammar::Grammar>);
137
138#[cfg(feature = "sqlite")]
139impl ParseSession {
140    /// Parse and return the next statement as a tri-state outcome.
141    ///
142    /// Mirrors C parser return codes directly:
143    /// - [`ParseOutcome::Done`]  -> `SYNTAQLITE_PARSE_DONE`
144    /// - [`ParseOutcome::Ok`]    -> `SYNTAQLITE_PARSE_OK`
145    /// - [`ParseOutcome::Err`]   -> `SYNTAQLITE_PARSE_ERROR`
146    #[expect(clippy::should_implement_trait)]
147    pub fn next(&mut self) -> ParseOutcome<ParsedStatement<'_>, ParseError<'_>> {
148        self.0.next().map(ParsedStatement).map_err(ParseError)
149    }
150
151    /// Original SQL source bound to this session.
152    pub fn source(&self) -> &str {
153        self.0.source()
154    }
155
156    /// Return a grammar-agnostic view over the current parse arena state.
157    ///
158    /// Useful for generic introspection after consuming the session.
159    ///
160    /// # Examples
161    ///
162    /// ```rust
163    /// let parser = syntaqlite_syntax::Parser::new();
164    /// let mut session = parser.parse("SELECT 1;");
165    /// let stmt = match session.next().transpose() {
166    ///     Ok(Some(stmt)) => stmt,
167    ///     Ok(None) => panic!("expected statement"),
168    ///     Err(err) => panic!("unexpected parse error: {err}"),
169    /// };
170    /// let _ = stmt.root();
171    ///
172    /// let any = session.arena_result();
173    /// assert!(!any.root_id().is_null());
174    /// ```
175    pub fn arena_result(&self) -> AnyParsedStatement<'_> {
176        self.0.arena_result()
177    }
178}
179
180/// One parser-observed token from a parsed statement.
181///
182/// Returned by [`ParsedStatement::tokens`]. This is useful when building
183/// token-aware tooling such as:
184///
185/// - Semantic syntax highlighting.
186/// - Identifier/function/type classification.
187/// - Statement-level token diagnostics.
188///
189/// Requires `collect_tokens: true` in [`ParserConfig`].
190///
191/// # Examples
192///
193/// ```rust
194/// use syntaqlite_syntax::{Parser, ParserConfig, TokenType};
195///
196/// let parser = Parser::with_config(&ParserConfig::default().with_collect_tokens(true));
197/// let mut session = parser.parse("SELECT max(x) FROM t;");
198/// let stmt = session.next().transpose().unwrap().unwrap();
199///
200/// let tokens: Vec<_> = stmt.tokens().collect();
201/// assert!(!tokens.is_empty());
202/// assert!(tokens.iter().any(|t| t.token_type() == TokenType::Select));
203///
204/// // Flags expose parser-inferred role information (identifier/function/type).
205/// let _has_semantic_role = tokens.iter().any(|t| {
206///     let f = t.flags();
207///     f.used_as_identifier() || f.used_as_function() || f.used_as_type()
208/// });
209/// ```
210#[cfg(feature = "sqlite")]
211pub struct ParserToken<'a>(pub(super) TypedParserToken<'a, crate::sqlite::grammar::Grammar>);
212
213#[cfg(feature = "sqlite")]
214impl<'a> ParserToken<'a> {
215    /// Exact source text for this token.
216    ///
217    /// Preserves original casing and quoting from input SQL.
218    pub fn text(&self) -> &'a str {
219        self.0.text()
220    }
221
222    /// Token kind from the `SQLite` SQL grammar.
223    ///
224    /// This is the lexical class (keyword, identifier, operator, etc.).
225    pub fn token_type(&self) -> crate::sqlite::tokens::TokenType {
226        self.0.token_type()
227    }
228
229    /// Semantic usage flags inferred by the parser.
230    ///
231    /// Use this to distinguish contextual role, for example:
232    ///
233    /// - Keyword text used as an identifier.
234    /// - Function-call names.
235    /// - Type names.
236    pub fn flags(&self) -> ParserTokenFlags {
237        self.0.flags()
238    }
239
240    /// Byte offset of the token start within the statement source.
241    pub fn offset(&self) -> u32 {
242        self.0.offset()
243    }
244
245    /// Byte length of the token text.
246    pub fn length(&self) -> u32 {
247        self.0.length()
248    }
249}
250
251#[cfg(feature = "sqlite")]
252impl std::fmt::Debug for ParserToken<'_> {
253    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254        f.debug_struct("ParserToken")
255            .field("text", &self.0.text())
256            .field("token_type", &self.0.token_type())
257            .field("flags", &self.0.flags())
258            .finish()
259    }
260}
261
262/// Parse result for one successfully recognized `SQLite` statement.
263///
264/// Contains statement-local data:
265///
266/// - Typed AST root (`root()`).
267/// - Optional token stream (`tokens()`).
268/// - Optional comments (`comments()`).
269/// - Original source slice (`source()`).
270#[cfg(feature = "sqlite")]
271#[doc(hidden)]
272pub struct ParsedStatement<'a>(
273    pub(super) TypedParsedStatement<'a, crate::sqlite::grammar::Grammar>,
274);
275
276#[cfg(feature = "sqlite")]
277impl<'a> ParsedStatement<'a> {
278    /// Typed AST root for the statement.
279    ///
280    /// Returns `None` for comment-only input (valid SQL with no actual
281    /// statement, e.g. `/* no-op */`).
282    ///
283    /// Mirrors C `syntaqlite_result_root` for `PARSE_OK`.
284    pub fn root(&'a self) -> Option<crate::sqlite::ast::Stmt<'a>> {
285        self.0.root()
286    }
287
288    /// The source text bound to this result.
289    pub fn source(&self) -> &'a str {
290        self.0.source()
291    }
292
293    /// Statement-local token stream with parser usage flags.
294    ///
295    /// Requires `collect_tokens: true` in [`ParserConfig`].
296    pub fn tokens(&self) -> impl Iterator<Item = ParserToken<'a>> {
297        self.0.tokens().map(ParserToken)
298    }
299
300    /// Comments that belong to this statement.
301    ///
302    /// Requires `collect_tokens: true` in [`ParserConfig`].
303    pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
304        self.0.comments()
305    }
306
307    /// Convert this result into the grammar-agnostic [`AnyParsedStatement`].
308    ///
309    /// Use this when handing statement data to grammar-independent tooling.
310    pub fn erase(&self) -> AnyParsedStatement<'a> {
311        self.0.clone().erase()
312    }
313
314    /// Macro expansion call-site spans recorded during parsing.
315    pub fn macro_regions(&self) -> impl Iterator<Item = super::MacroRegion> + use<'_, 'a> {
316        self.0.macro_regions()
317    }
318
319    /// Dump the AST as indented text into `out`.
320    pub fn dump(&self, out: &mut String, indent: usize) {
321        self.0.dump(out, indent);
322    }
323}
324
325/// Parse error for one `SQLite` statement.
326///
327/// Includes diagnostics you can show directly to users:
328///
329/// - Error class (`kind()`: recovered vs fatal).
330/// - Error message (`message()`).
331/// - Optional location (`offset()` / `length()`).
332/// - Optional partial recovery tree (`recovery_root()`).
333#[cfg(feature = "sqlite")]
334#[doc(hidden)]
335pub struct ParseError<'a>(pub(super) TypedParseError<'a, crate::sqlite::grammar::Grammar>);
336
337#[cfg(feature = "sqlite")]
338impl<'a> ParseError<'a> {
339    /// Whether parsing recovered (`Recovered`) or fully failed (`Fatal`).
340    pub fn kind(&self) -> ParseErrorKind {
341        self.0.kind()
342    }
343
344    /// True if this error was recovered and yielded a partial tree.
345    pub fn is_recovered(&self) -> bool {
346        self.0.is_recovered()
347    }
348
349    /// True if this error is fatal (unrecoverable).
350    pub fn is_fatal(&self) -> bool {
351        self.0.is_fatal()
352    }
353
354    /// Human-readable diagnostic text.
355    pub fn message(&self) -> &str {
356        self.0.message()
357    }
358
359    /// Byte offset in the original source, if known.
360    pub fn offset(&self) -> Option<usize> {
361        self.0.offset()
362    }
363
364    /// Byte length of the offending range, if known.
365    pub fn length(&self) -> Option<usize> {
366        self.0.length()
367    }
368
369    /// Partial AST recovered from invalid input, if available.
370    ///
371    /// Mirrors C `syntaqlite_result_recovery_root` for `PARSE_ERROR`.
372    pub fn recovery_root(&'a self) -> Option<crate::sqlite::ast::Stmt<'a>> {
373        self.0.recovery_root()
374    }
375
376    /// The source text bound to this result.
377    pub fn parse_source(&self) -> &'a str {
378        self.0.0.source()
379    }
380
381    /// Tokens collected during the (partial) parse, if `collect_tokens` was enabled.
382    pub fn tokens(&self) -> impl Iterator<Item = ParserToken<'a>> {
383        self.0.tokens().map(ParserToken)
384    }
385
386    /// Comments collected during the (partial) parse, if `collect_tokens` was enabled.
387    pub fn comments(&self) -> impl Iterator<Item = Comment<'a>> {
388        self.0.comments()
389    }
390}
391
392#[cfg(feature = "sqlite")]
393impl std::fmt::Debug for ParseError<'_> {
394    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
395        self.0.fmt(f)
396    }
397}
398
399#[cfg(feature = "sqlite")]
400impl std::fmt::Display for ParseError<'_> {
401    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
402        self.0.fmt(f)
403    }
404}
405
406#[cfg(feature = "sqlite")]
407impl std::error::Error for ParseError<'_> {}
408
409#[cfg(all(test, feature = "sqlite"))]
410mod tests {
411    use std::panic::{self, AssertUnwindSafe};
412
413    use super::{ParseErrorKind, ParseOutcome, Parser, ParserConfig};
414    use crate::{CommentKind, TokenType};
415
416    #[test]
417    fn parser_continues_after_statement_error() {
418        let parser = Parser::new();
419        let mut session = parser.parse("SELECT 1; SELECT ; SELECT 2;");
420
421        let first = match session.next() {
422            ParseOutcome::Ok(stmt) => stmt,
423            ParseOutcome::Done => panic!("first statement missing"),
424            ParseOutcome::Err(err) => panic!("first statement should parse: {err}"),
425        };
426        let _ = first.root();
427
428        let error = match session.next() {
429            ParseOutcome::Err(err) => err,
430            ParseOutcome::Done => panic!("second statement missing"),
431            ParseOutcome::Ok(_) => panic!("second statement should fail"),
432        };
433        assert!(!error.message().is_empty());
434        assert_ne!(error.is_fatal(), error.is_recovered());
435        assert!(matches!(
436            error.kind(),
437            ParseErrorKind::Recovered | ParseErrorKind::Fatal
438        ));
439
440        let third = match session.next() {
441            ParseOutcome::Ok(stmt) => stmt,
442            ParseOutcome::Done => panic!("third statement missing"),
443            ParseOutcome::Err(err) => panic!("third statement should parse: {err}"),
444        };
445        let _ = third.root();
446        assert!(matches!(session.next(), ParseOutcome::Done));
447    }
448
449    #[test]
450    fn parser_collect_tokens_and_comments() {
451        let parser = Parser::with_config(&ParserConfig::default().with_collect_tokens(true));
452        let mut session = parser.parse("/* lead */ SELECT 1 -- tail\n;");
453
454        let statement = match session.next() {
455            ParseOutcome::Ok(stmt) => stmt,
456            ParseOutcome::Done => panic!("statement is missing"),
457            ParseOutcome::Err(err) => panic!("statement should parse: {err}"),
458        };
459
460        let token_types: Vec<_> = statement.tokens().map(|token| token.token_type()).collect();
461        assert!(token_types.contains(&TokenType::Select));
462        assert!(token_types.contains(&TokenType::Integer));
463
464        let comments: Vec<_> = statement.comments().collect();
465        assert!(
466            comments
467                .iter()
468                .any(|comment| comment.kind() == CommentKind::Block
469                    && comment.text().contains("lead"))
470        );
471        assert!(
472            comments
473                .iter()
474                .any(|comment| comment.kind() == CommentKind::Line
475                    && comment.text().contains("tail"))
476        );
477    }
478
479    #[test]
480    fn parser_collect_tokens_includes_semi() {
481        let parser = Parser::with_config(&ParserConfig::default().with_collect_tokens(true));
482        let mut session = parser.parse("SELECT 1;");
483
484        let statement = match session.next() {
485            ParseOutcome::Ok(stmt) => stmt,
486            ParseOutcome::Done => panic!("statement is missing"),
487            ParseOutcome::Err(err) => panic!("statement should parse: {err}"),
488        };
489
490        let token_types: Vec<_> = statement.tokens().map(|t| t.token_type()).collect();
491        assert!(
492            token_types.contains(&TokenType::Semi),
493            "Semi token should be in collected tokens, got: {token_types:?}"
494        );
495    }
496
497    #[test]
498    fn parser_allows_only_one_live_session() {
499        let parser = Parser::new();
500        let session = parser.parse("SELECT 1;");
501
502        let reentrant_attempt = panic::catch_unwind(AssertUnwindSafe(|| {
503            let _session = parser.parse("SELECT 2;");
504        }));
505        assert!(reentrant_attempt.is_err());
506
507        drop(session);
508
509        let mut second = parser.parse("SELECT 2;");
510        let result = match second.next() {
511            ParseOutcome::Ok(stmt) => stmt,
512            ParseOutcome::Done => panic!("statement is missing"),
513            ParseOutcome::Err(err) => panic!("statement should parse: {err}"),
514        };
515        let _ = result.root();
516    }
517
518    #[test]
519    fn parser_next_exposes_done_ok_err_states() {
520        let parser = Parser::new();
521        let mut ok_session = parser.parse("SELECT 1;");
522        match ok_session.next() {
523            ParseOutcome::Ok(stmt) => {
524                let _ = stmt.root();
525            }
526            ParseOutcome::Done => panic!("expected statement"),
527            ParseOutcome::Err(err) => panic!("unexpected error: {}", err.message()),
528        }
529        assert!(matches!(ok_session.next(), ParseOutcome::Done));
530        drop(ok_session);
531
532        let mut err_session = parser.parse("abc");
533        match err_session.next() {
534            ParseOutcome::Err(err) => assert!(err.is_fatal()),
535            ParseOutcome::Done => panic!("expected fatal error"),
536            ParseOutcome::Ok(_) => panic!("expected parse error"),
537        }
538    }
539
540    #[test]
541    fn parser_next_transposes_parse_outcome() {
542        let parser = Parser::new();
543        let mut ok_session = parser.parse("SELECT 1; SELECT 2;");
544        let first = ok_session
545            .next()
546            .transpose()
547            .expect("first should not error");
548        let first = first.expect("first statement should exist");
549        let _ = first.root();
550        let second = ok_session
551            .next()
552            .transpose()
553            .expect("second should not error");
554        let second = second.expect("second statement should exist");
555        let _ = second.root();
556        assert!(
557            ok_session
558                .next()
559                .transpose()
560                .expect("done should not error")
561                .is_none()
562        );
563        drop(ok_session);
564
565        let mut err_session = parser.parse("abc");
566        match err_session.next().transpose() {
567            Err(err) => assert!(err.is_fatal()),
568            Ok(_) => panic!("fatal error expected"),
569        }
570    }
571
572    #[test]
573    fn macro_expansion_simple_template() {
574        let mut parser = Parser::new();
575        parser.register_macro("double", &["x"], "($x + $x)");
576
577        let mut session = parser.parse("SELECT double!(1);");
578        let stmt = match session.next() {
579            ParseOutcome::Ok(stmt) => stmt,
580            ParseOutcome::Done => panic!("expected statement"),
581            ParseOutcome::Err(err) => panic!("unexpected error: {}", err.message()),
582        };
583
584        // The macro call `double!(1)` should expand to `(1 + 1)`.
585        let mut dump = String::new();
586        stmt.dump(&mut dump, 0);
587        assert!(
588            dump.contains("PLUS"),
589            "expanded AST should contain a PLUS op, got:\n{dump}"
590        );
591    }
592
593    #[test]
594    fn macro_expansion_records_macro_region() {
595        let mut parser = Parser::new();
596        parser.register_macro("id", &["x"], "$x");
597
598        let source = "SELECT id!(42);";
599        let mut session = parser.parse(source);
600        let stmt = match session.next() {
601            ParseOutcome::Ok(stmt) => stmt,
602            ParseOutcome::Done => panic!("expected statement"),
603            ParseOutcome::Err(err) => panic!("unexpected error: {}", err.message()),
604        };
605
606        let regions: Vec<_> = stmt.macro_regions().collect();
607        assert_eq!(regions.len(), 1, "expected exactly one macro region");
608        let r = &regions[0];
609        let call_text = &source[r.call_offset as usize..(r.call_offset + r.call_length) as usize];
610        assert_eq!(call_text, "id!(42)");
611    }
612
613    #[test]
614    fn macro_expansion_multi_param() {
615        let mut parser = Parser::new();
616        parser.register_macro("sum2", &["a", "b"], "($a + $b)");
617
618        let mut session = parser.parse("SELECT sum2!(1, 2);");
619        let stmt = match session.next() {
620            ParseOutcome::Ok(stmt) => stmt,
621            ParseOutcome::Done => panic!("expected statement"),
622            ParseOutcome::Err(err) => panic!("unexpected error: {}", err.message()),
623        };
624
625        let mut dump = String::new();
626        stmt.dump(&mut dump, 0);
627        assert!(
628            dump.contains("PLUS"),
629            "expanded AST should contain PLUS, got:\n{dump}"
630        );
631    }
632
633    #[test]
634    fn macro_deregister_falls_back_to_legacy() {
635        let mut parser = Parser::new();
636        parser.register_macro("foo", &["x"], "$x");
637        assert!(parser.deregister_macro("foo"));
638
639        // After deregistering, the macro call should not expand.
640        // Legacy behavior: `foo` is parsed as a plain identifier.
641        let mut session = parser.parse("SELECT foo!(1);");
642        let _outcome = session.next();
643        // We don't assert specific behavior here — just that it doesn't crash.
644    }
645
646    #[test]
647    fn macro_deregister_nonexistent_returns_false() {
648        let mut parser = Parser::new();
649        assert!(!parser.deregister_macro("nonexistent"));
650    }
651}