Skip to main content

eventql_parser/
parser.rs

1//! Syntactic analysis (parsing) for EventQL.
2//!
3//! This module provides the parser that converts a sequence of tokens into
4//! an abstract syntax tree (AST). The parser implements a recursive descent
5//! parser with operator precedence for expressions.
6//!
7//! # Main Function
8//!
9//! - [`parse`] - Convert a slice of tokens into a Query AST
10
11use crate::arena::Arena;
12use crate::ast::{
13    Access, App, Attrs, Binary, Field, Limit, Order, OrderBy, Query, Source, SourceKind, Unary,
14    Value,
15};
16use crate::error::ParserError;
17use crate::token::{Operator, Sym, Symbol, Token};
18use crate::{Binding, ExprRef, GroupBy, Raw, StrRef};
19
20/// Result type for parser operations.
21///
22/// This is a convenience alias for `Result<T, ParserError>`.
23pub type ParseResult<A> = Result<A, ParserError>;
24
25/// A parser for EventQL expressions and queries.
26///
27/// The parser takes a stream of tokens and builds an abstract syntax tree (AST)
28/// representing the structure of the EventQL query or expression.
29pub struct Parser<'a> {
30    input: &'a [Token<'a>],
31    arena: &'a mut Arena,
32    offset: usize,
33}
34
35impl<'a> Parser<'a> {
36    /// Creates a new parser from a slice of tokens.
37    ///
38    /// # Example
39    ///
40    /// ```rust
41    /// use eventql_parser::Session;
42    ///
43    /// let session = Session::builder().build();
44    /// let tokens = session.tokenize("1 + 2").unwrap();
45    /// ```
46    pub fn new(arena: &'a mut Arena, input: &'a [Token<'a>]) -> Self {
47        Self {
48            arena,
49            input,
50            offset: 0,
51        }
52    }
53
54    fn peek<'b>(&'b self) -> Token<'a> {
55        self.input[self.offset]
56    }
57
58    fn shift<'b>(&'b mut self) -> Token<'a> {
59        let res = self.input[self.offset];
60
61        if self.offset + 1 < self.input.len() {
62            self.offset += 1;
63        }
64
65        res
66    }
67
68    fn parse_ident(&mut self) -> ParseResult<StrRef> {
69        let token = self.shift();
70
71        if let Sym::Id(id) = token.sym {
72            return Ok(self.arena.strings.alloc(id));
73        }
74
75        Err(ParserError::ExpectedIdent(
76            token.line,
77            token.col,
78            token.sym.to_string(),
79        ))
80    }
81
82    fn parse_source_kind(&mut self) -> ParseResult<SourceKind<Raw>> {
83        let token = self.shift();
84        match token.sym {
85            Sym::Id(id) => Ok(SourceKind::Name(self.arena.strings.alloc_no_case(id))),
86            Sym::String(sub) => Ok(SourceKind::Subject(self.arena.strings.alloc(sub))),
87            Sym::Symbol(Symbol::OpenParen) => {
88                let query = self.parse_query()?;
89                expect_symbol(self.shift(), Symbol::CloseParen)?;
90
91                Ok(SourceKind::Subquery(Box::new(query)))
92            }
93            _ => Err(ParserError::UnexpectedToken(
94                token.line,
95                token.col,
96                token.sym.to_string(),
97            )),
98        }
99    }
100
101    fn parse_source(&mut self) -> ParseResult<Source<Raw>> {
102        expect_keyword(self.shift(), "from")?;
103
104        let token = self.shift();
105        let binding = if let Sym::Id(name) = token.sym {
106            let name = self.arena.strings.alloc_no_case(name);
107
108            Binding {
109                name,
110                pos: token.into(),
111            }
112        } else {
113            return Err(ParserError::ExpectedIdent(
114                token.line,
115                token.col,
116                token.sym.to_string(),
117            ));
118        };
119
120        expect_keyword(self.shift(), "in")?;
121        let kind = self.parse_source_kind()?;
122
123        Ok(Source { binding, kind })
124    }
125
126    fn parse_where_clause(&mut self) -> ParseResult<ExprRef> {
127        expect_keyword(self.shift(), "where")?;
128        self.parse_expr()
129    }
130
131    fn parse_group_by(&mut self) -> ParseResult<GroupBy> {
132        expect_keyword(self.shift(), "group")?;
133        expect_keyword(self.shift(), "by")?;
134
135        let expr = self.parse_expr()?;
136
137        let predicate = if let Sym::Id(name) = self.peek().sym
138            && name.eq_ignore_ascii_case("having")
139        {
140            self.shift();
141
142            Some(self.parse_expr()?)
143        } else {
144            None
145        };
146
147        Ok(GroupBy { expr, predicate })
148    }
149
150    fn parse_order_by(&mut self) -> ParseResult<OrderBy> {
151        expect_keyword(self.shift(), "order")?;
152        expect_keyword(self.shift(), "by")?;
153
154        let expr = self.parse_expr()?;
155        let token = self.peek();
156
157        if let Sym::Id(name) = token.sym {
158            let order = if name.eq_ignore_ascii_case("asc") {
159                self.shift();
160                Order::Asc
161            } else if name.eq_ignore_ascii_case("desc") {
162                self.shift();
163                Order::Desc
164            } else {
165                Order::Asc
166            };
167
168            return Ok(OrderBy { expr, order });
169        }
170
171        Err(ParserError::UnexpectedToken(
172            token.line,
173            token.col,
174            token.sym.to_string(),
175        ))
176    }
177
178    fn parse_limit(&mut self) -> ParseResult<Limit> {
179        let token = self.shift();
180        let limit = expect_keyword(token, "top")
181            .map(|_| "top")
182            .or_else(|_| expect_keyword(token, "skip").map(|_| "skip"))
183            .map_err(|_| {
184                ParserError::UnexpectedToken(token.line, token.col, token.sym.to_string())
185            })?;
186
187        let token = self.shift();
188        if let Sym::Number(value) = token.sym
189            && value.fract() == 0.0
190        {
191            return match limit {
192                "top" => Ok(Limit::Top(value as u64)),
193                "skip" => Ok(Limit::Skip(value as u64)),
194                _ => unreachable!(),
195            };
196        }
197
198        Err(ParserError::UnexpectedToken(
199            token.line,
200            token.col,
201            token.sym.to_string(),
202        ))
203    }
204
205    /// Parses a single expression from the token stream.
206    ///
207    /// This method can be used to parse individual expressions rather than complete queries.
208    /// It's useful for testing or for parsing expression fragments.
209    ///
210    /// # Returns
211    ///
212    /// Returns the parsed expression, or a parse error if the tokens don't form a valid expression.
213    ///
214    /// # Example
215    ///
216    /// ```rust
217    /// use eventql_parser::Session;
218    ///
219    /// let session = Session::builder().build();
220    /// let tokens = session.tokenize("NOW()").unwrap();
221    /// ```
222    pub fn parse_expr(&mut self) -> ParseResult<ExprRef> {
223        let token = self.peek();
224
225        match token.sym {
226            Sym::Eof => Err(ParserError::UnexpectedEof),
227
228            Sym::Id(_)
229            | Sym::String(_)
230            | Sym::Number(_)
231            | Sym::Symbol(Symbol::OpenParen | Symbol::OpenBracket | Symbol::OpenBrace)
232            | Sym::Operator(Operator::Add | Operator::Sub | Operator::Not) => self.parse_binary(0),
233
234            _ => Err(ParserError::UnexpectedToken(
235                token.line,
236                token.col,
237                token.sym.to_string(),
238            )),
239        }
240    }
241
242    fn parse_primary(&mut self) -> ParseResult<ExprRef> {
243        let token = self.shift();
244
245        let value = match token.sym {
246            Sym::Id(name) => {
247                if name.eq_ignore_ascii_case("true") {
248                    Value::Bool(true)
249                } else if name.eq_ignore_ascii_case("false") {
250                    Value::Bool(false)
251                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::OpenParen)) {
252                    self.shift();
253
254                    let mut args = vec![];
255
256                    if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseParen)) {
257                        args.push(self.parse_expr()?);
258
259                        while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
260                            self.shift();
261                            args.push(self.parse_expr()?);
262                        }
263                    }
264
265                    expect_symbol(self.shift(), Symbol::CloseParen)?;
266                    let args = self.arena.exprs.alloc_vec(args);
267
268                    Value::App(App {
269                        func: self.arena.strings.alloc_no_case(name),
270                        args,
271                    })
272                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
273                    self.shift();
274                    let attrs = token.into();
275                    let name = self.arena.strings.alloc_no_case(name);
276                    let mut access = Access {
277                        target: self.arena.exprs.alloc(attrs, Value::Id(name)),
278                        field: self.parse_ident()?,
279                    };
280
281                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
282                        self.shift();
283                        access = Access {
284                            target: self.arena.exprs.alloc(attrs, Value::Access(access)),
285                            field: self.parse_ident()?,
286                        };
287                    }
288
289                    Value::Access(access)
290                } else {
291                    Value::Id(self.arena.strings.alloc_no_case(name))
292                }
293            }
294
295            Sym::String(s) => Value::String(self.arena.strings.alloc(s)),
296            Sym::Number(n) => Value::Number(n.into()),
297
298            Sym::Symbol(Symbol::OpenParen) => {
299                let expr = self.parse_expr()?;
300                expect_symbol(self.shift(), Symbol::CloseParen)?;
301
302                Value::Group(expr)
303            }
304
305            Sym::Symbol(Symbol::OpenBracket) => {
306                let mut elems = vec![];
307
308                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBracket)) {
309                    elems.push(self.parse_expr()?);
310
311                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
312                        self.shift();
313                        elems.push(self.parse_expr()?);
314                    }
315                }
316
317                expect_symbol(self.shift(), Symbol::CloseBracket)?;
318
319                Value::Array(self.arena.exprs.alloc_vec(elems))
320            }
321
322            Sym::Symbol(Symbol::OpenBrace) => {
323                let mut fields = vec![];
324
325                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBrace)) {
326                    let attrs: Attrs = self.peek().into();
327                    let name = self.parse_ident()?;
328                    expect_symbol(self.shift(), Symbol::Colon)?;
329                    let value = self.parse_expr()?;
330
331                    fields.push(Field {
332                        attrs,
333                        name,
334                        expr: value,
335                    });
336
337                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
338                        self.shift();
339
340                        let attrs: Attrs = self.peek().into();
341                        let name = self.parse_ident()?;
342                        expect_symbol(self.shift(), Symbol::Colon)?;
343                        let value = self.parse_expr()?;
344
345                        fields.push(Field {
346                            attrs,
347                            name,
348                            expr: value,
349                        });
350                    }
351                }
352
353                expect_symbol(self.shift(), Symbol::CloseBrace)?;
354
355                Value::Record(self.arena.exprs.alloc_rec(fields))
356            }
357
358            Sym::Operator(op) if matches!(op, Operator::Add | Operator::Sub | Operator::Not) => {
359                Value::Unary(Unary {
360                    operator: op,
361                    expr: self.parse_expr()?,
362                })
363            }
364
365            _ => {
366                return Err(ParserError::UnexpectedToken(
367                    token.line,
368                    token.col,
369                    token.sym.to_string(),
370                ));
371            }
372        };
373
374        let attrs = token.into();
375
376        Ok(self.arena.exprs.alloc(attrs, value))
377    }
378
379    fn parse_binary(&mut self, min_bind: u64) -> ParseResult<ExprRef> {
380        let mut lhs = self.parse_primary()?;
381        let lhs_attrs = self.arena.exprs.get(lhs).attrs;
382
383        loop {
384            let token = self.peek();
385            let operator = if let Sym::Operator(op) = token.sym {
386                op
387            } else {
388                break;
389            };
390
391            let (lhs_bind, rhs_bind) = binding_pow(operator);
392
393            if lhs_bind < min_bind {
394                break;
395            }
396
397            self.shift();
398            let rhs = self.parse_binary(rhs_bind)?;
399            let node = self.arena.exprs.get(rhs);
400
401            if matches!(operator, Operator::As) && !matches!(node.value, Value::Id(_)) {
402                return Err(ParserError::ExpectedType(
403                    node.attrs.pos.line,
404                    node.attrs.pos.col,
405                ));
406            }
407
408            lhs = self
409                .arena
410                .exprs
411                .alloc(lhs_attrs, Value::Binary(Binary { lhs, operator, rhs }));
412        }
413
414        Ok(lhs)
415    }
416
417    fn parse_query(&mut self) -> ParseResult<Query<Raw>> {
418        let mut sources = vec![];
419        let pos = self.peek().into();
420
421        while let Sym::Id(name) = self.peek().sym
422            && name.eq_ignore_ascii_case("from")
423        {
424            sources.push(self.parse_source()?);
425        }
426
427        if sources.is_empty() {
428            let token = self.peek();
429            return Err(ParserError::MissingFromStatement(token.line, token.col));
430        }
431
432        let predicate = if let Sym::Id(name) = self.peek().sym
433            && name.eq_ignore_ascii_case("where")
434        {
435            Some(self.parse_where_clause()?)
436        } else {
437            None
438        };
439
440        let group_by = if let Sym::Id(name) = self.peek().sym
441            && name.eq_ignore_ascii_case("group")
442        {
443            Some(self.parse_group_by()?)
444        } else {
445            None
446        };
447
448        let order_by = if let Sym::Id(name) = self.peek().sym
449            && name.eq_ignore_ascii_case("order")
450        {
451            Some(self.parse_order_by()?)
452        } else {
453            None
454        };
455
456        let limit = if let Sym::Id(name) = self.peek().sym
457            && (name.eq_ignore_ascii_case("skip") || name.eq_ignore_ascii_case("top"))
458        {
459            Some(self.parse_limit()?)
460        } else {
461            None
462        };
463
464        expect_keyword(self.shift(), "project")?;
465        expect_keyword(self.shift(), "into")?;
466
467        let distinct = if let Sym::Id(name) = self.peek().sym
468            && name.eq_ignore_ascii_case("distinct")
469        {
470            self.shift();
471            true
472        } else {
473            false
474        };
475
476        let projection = self.parse_expr()?;
477
478        Ok(Query {
479            attrs: Attrs::new(pos),
480            sources,
481            predicate,
482            group_by,
483            order_by,
484            limit,
485            projection,
486            distinct,
487            meta: Raw,
488        })
489    }
490}
491
492fn expect_keyword(token: Token, keyword: &'static str) -> ParseResult<()> {
493    if let Sym::Id(id) = token.sym
494        && id.eq_ignore_ascii_case(keyword)
495    {
496        return Ok(());
497    }
498
499    Err(ParserError::ExpectedKeyword(
500        token.line,
501        token.col,
502        keyword,
503        token.sym.to_string(),
504    ))
505}
506
507fn expect_symbol(token: Token, expect: Symbol) -> ParseResult<()> {
508    if let Sym::Symbol(sym) = token.sym
509        && sym == expect
510    {
511        return Ok(());
512    }
513
514    Err(ParserError::ExpectedSymbol(
515        token.line,
516        token.col,
517        expect,
518        token.sym.to_string(),
519    ))
520}
521
522// Pratt parser operator binding power (precedence)
523fn binding_pow(op: Operator) -> (u64, u64) {
524    match op {
525        Operator::Add | Operator::Sub => (20, 21),
526        Operator::Mul | Operator::Div => (30, 31),
527        Operator::Contains => (40, 39),
528        Operator::As => (50, 49),
529        Operator::Eq
530        | Operator::Neq
531        | Operator::Gt
532        | Operator::Lt
533        | Operator::Gte
534        | Operator::Lte => (10, 11),
535        Operator::And | Operator::Or | Operator::Xor | Operator::Not => (1, 2),
536    }
537}
538
539/// Parse a sequence of tokens into a Query AST.
540///
541/// This function performs syntactic analysis on the token stream, constructing
542/// an abstract syntax tree that represents the structure of the EventQL query.
543///
544/// # Grammar
545///
546/// The parser recognizes the following EventQL grammar:
547///
548/// ```text
549/// Query     := FROM+ WHERE? GROUP_BY? ORDER_BY? LIMIT? PROJECT
550/// FROM      := "FROM" Id "IN" SourceKind
551/// SourceKind := Id | String | "(" Query ")"
552/// WHERE     := "WHERE" Expr
553/// GROUP_BY  := "GROUP" "BY" Expr
554/// ORDER_BY  := "ORDER" "BY" Expr ("ASC" | "DESC")
555/// LIMIT     := ("TOP" | "SKIP") Number
556/// PROJECT   := "PROJECT" "INTO" Expr
557/// Expr      := Binary | Unary | Primary
558/// Primary   := Number | String | Bool | Id | Array | Record | Access | App | "(" Expr ")"
559/// ```
560///
561/// # Expression Precedence
562///
563/// The parser uses a Pratt parser for expressions with the following precedence
564/// (from highest to lowest):
565///
566/// 1. Unary operators (`+`, `-`, `NOT`)
567/// 2. Multiplicative (`*`, `/`)
568/// 3. Additive (`+`, `-`)
569/// 4. Comparison (`<`, `<=`, `>`, `>=`, `==`, `!=`)
570/// 5. Logical (`AND`, `OR`, `XOR`)
571pub(crate) fn parse<'a>(arena: &'a mut Arena, input: &'a [Token<'a>]) -> ParseResult<Query<Raw>> {
572    let mut parser = Parser::new(arena, input);
573
574    parser.parse_query()
575}