Skip to main content

eventql_parser/
parser.rs

1//! Syntactic analysis (parsing) for EventQL.
2//!
3//! This module provides the parser that converts a sequence of tokens into
4//! an abstract syntax tree (AST). The parser implements a recursive descent
5//! parser with operator precedence for expressions.
6//!
7//! # Main Function
8//!
9//! - [`parse`] - Convert a slice of tokens into a Query AST
10
11use crate::ast::{
12    Access, App, Attrs, Binary, Expr, Field, Limit, Order, OrderBy, Query, Source, SourceKind,
13    Unary, Value,
14};
15use crate::error::ParserError;
16use crate::token::{Operator, Sym, Symbol, Token};
17use crate::{Binding, GroupBy, Raw};
18
19/// Result type for parser operations.
20///
21/// This is a convenience alias for `Result<T, ParserError>`.
22pub type ParseResult<A> = Result<A, ParserError>;
23
24/// A parser for EventQL expressions and queries.
25///
26/// The parser takes a stream of tokens and builds an abstract syntax tree (AST)
27/// representing the structure of the EventQL query or expression.
28pub struct Parser<'a> {
29    input: &'a [Token<'a>],
30    offset: usize,
31}
32
33impl<'a> Parser<'a> {
34    /// Creates a new parser from a slice of tokens.
35    ///
36    /// # Example
37    ///
38    /// ```rust
39    /// use eventql_parser::prelude::{tokenize, Parser};
40    ///
41    /// let tokens = tokenize("1 + 2").unwrap();
42    /// let parser = Parser::new(tokens.as_slice());
43    /// ```
44    pub fn new(input: &'a [Token<'a>]) -> Self {
45        Self { input, offset: 0 }
46    }
47
48    fn peek<'b>(&'b self) -> Token<'a> {
49        self.input[self.offset]
50    }
51
52    fn shift<'b>(&'b mut self) -> Token<'a> {
53        let res = self.input[self.offset];
54
55        if self.offset + 1 < self.input.len() {
56            self.offset += 1;
57        }
58
59        res
60    }
61
62    fn parse_ident(&mut self) -> ParseResult<String> {
63        let token = self.shift();
64
65        if let Sym::Id(id) = token.sym {
66            return Ok(id.to_owned());
67        }
68
69        Err(ParserError::ExpectedIdent(
70            token.line,
71            token.col,
72            token.sym.to_string(),
73        ))
74    }
75
76    fn parse_source_kind(&mut self) -> ParseResult<SourceKind<Raw>> {
77        let token = self.shift();
78        match token.sym {
79            Sym::Id(id) => Ok(SourceKind::Name(id.to_owned())),
80            Sym::String(sub) => Ok(SourceKind::Subject(sub.to_owned())),
81            Sym::Symbol(Symbol::OpenParen) => {
82                let query = self.parse_query()?;
83                expect_symbol(self.shift(), Symbol::CloseParen)?;
84
85                Ok(SourceKind::Subquery(Box::new(query)))
86            }
87            _ => Err(ParserError::UnexpectedToken(
88                token.line,
89                token.col,
90                token.sym.to_string(),
91            )),
92        }
93    }
94
95    fn parse_source(&mut self) -> ParseResult<Source<Raw>> {
96        expect_keyword(self.shift(), "from")?;
97
98        let token = self.shift();
99        let binding = if let Sym::Id(name) = token.sym {
100            Binding {
101                name: name.to_owned(),
102                pos: token.into(),
103            }
104        } else {
105            return Err(ParserError::ExpectedIdent(
106                token.line,
107                token.col,
108                token.sym.to_string(),
109            ));
110        };
111
112        expect_keyword(self.shift(), "in")?;
113        let kind = self.parse_source_kind()?;
114
115        Ok(Source { binding, kind })
116    }
117
118    fn parse_where_clause(&mut self) -> ParseResult<Expr> {
119        expect_keyword(self.shift(), "where")?;
120        self.parse_expr()
121    }
122
123    fn parse_group_by(&mut self) -> ParseResult<GroupBy> {
124        expect_keyword(self.shift(), "group")?;
125        expect_keyword(self.shift(), "by")?;
126
127        let expr = self.parse_expr()?;
128
129        let predicate = if let Sym::Id(name) = self.peek().sym
130            && name.eq_ignore_ascii_case("having")
131        {
132            self.shift();
133
134            Some(self.parse_expr()?)
135        } else {
136            None
137        };
138
139        Ok(GroupBy { expr, predicate })
140    }
141
142    fn parse_order_by(&mut self) -> ParseResult<OrderBy> {
143        expect_keyword(self.shift(), "order")?;
144        expect_keyword(self.shift(), "by")?;
145
146        let expr = self.parse_expr()?;
147        let token = self.peek();
148
149        if let Sym::Id(name) = token.sym {
150            let order = if name.eq_ignore_ascii_case("asc") {
151                self.shift();
152                Order::Asc
153            } else if name.eq_ignore_ascii_case("desc") {
154                self.shift();
155                Order::Desc
156            } else {
157                Order::Asc
158            };
159
160            return Ok(OrderBy { expr, order });
161        }
162
163        Err(ParserError::UnexpectedToken(
164            token.line,
165            token.col,
166            token.sym.to_string(),
167        ))
168    }
169
170    fn parse_limit(&mut self) -> ParseResult<Limit> {
171        let token = self.shift();
172        let limit = expect_keyword(token, "top")
173            .map(|_| "top")
174            .or_else(|_| expect_keyword(token, "skip").map(|_| "skip"))
175            .map_err(|_| {
176                ParserError::UnexpectedToken(token.line, token.col, token.sym.to_string())
177            })?;
178
179        let token = self.shift();
180        if let Sym::Number(value) = token.sym
181            && value.fract() == 0.0
182        {
183            return match limit {
184                "top" => Ok(Limit::Top(value as u64)),
185                "skip" => Ok(Limit::Skip(value as u64)),
186                _ => unreachable!(),
187            };
188        }
189
190        Err(ParserError::UnexpectedToken(
191            token.line,
192            token.col,
193            token.sym.to_string(),
194        ))
195    }
196
197    /// Parses a single expression from the token stream.
198    ///
199    /// This method can be used to parse individual expressions rather than complete queries.
200    /// It's useful for testing or for parsing expression fragments.
201    ///
202    /// # Returns
203    ///
204    /// Returns the parsed expression, or a parse error if the tokens don't form a valid expression.
205    ///
206    /// # Example
207    ///
208    /// ```rust
209    /// use eventql_parser::prelude::{tokenize, Parser};
210    ///
211    /// let tokens = tokenize("NOW()").unwrap();
212    /// let expr = Parser::new(tokens.as_slice()).parse_expr().unwrap();
213    /// ```
214    pub fn parse_expr(&mut self) -> ParseResult<Expr> {
215        let token = self.peek();
216
217        match token.sym {
218            Sym::Eof => Err(ParserError::UnexpectedEof),
219
220            Sym::Id(_)
221            | Sym::String(_)
222            | Sym::Number(_)
223            | Sym::Symbol(Symbol::OpenParen | Symbol::OpenBracket | Symbol::OpenBrace)
224            | Sym::Operator(Operator::Add | Operator::Sub | Operator::Not) => self.parse_binary(0),
225
226            _ => Err(ParserError::UnexpectedToken(
227                token.line,
228                token.col,
229                token.sym.to_string(),
230            )),
231        }
232    }
233
234    fn parse_primary(&mut self) -> ParseResult<Expr> {
235        let token = self.shift();
236
237        let value = match token.sym {
238            Sym::Id(name) => {
239                if name.eq_ignore_ascii_case("true") {
240                    Value::Bool(true)
241                } else if name.eq_ignore_ascii_case("false") {
242                    Value::Bool(false)
243                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::OpenParen)) {
244                    self.shift();
245
246                    let mut args = vec![];
247
248                    if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseParen)) {
249                        args.push(self.parse_expr()?);
250
251                        while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
252                            self.shift();
253                            args.push(self.parse_expr()?);
254                        }
255                    }
256
257                    expect_symbol(self.shift(), Symbol::CloseParen)?;
258
259                    Value::App(App {
260                        func: name.to_owned(),
261                        args,
262                    })
263                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
264                    self.shift();
265                    let mut access = Access {
266                        target: Box::new(Expr {
267                            attrs: Attrs::new(token.into()),
268                            value: Value::Id(name.to_owned()),
269                        }),
270                        field: self.parse_ident()?,
271                    };
272
273                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
274                        self.shift();
275                        access = Access {
276                            target: Box::new(Expr {
277                                attrs: access.target.attrs,
278                                value: Value::Access(access),
279                            }),
280                            field: self.parse_ident()?,
281                        };
282                    }
283
284                    Value::Access(access)
285                } else {
286                    Value::Id(name.to_owned())
287                }
288            }
289
290            Sym::String(s) => Value::String(s.to_owned()),
291            Sym::Number(n) => Value::Number(n),
292
293            Sym::Symbol(Symbol::OpenParen) => {
294                let expr = self.parse_expr()?;
295                expect_symbol(self.shift(), Symbol::CloseParen)?;
296
297                Value::Group(Box::new(expr))
298            }
299
300            Sym::Symbol(Symbol::OpenBracket) => {
301                let mut elems = vec![];
302
303                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBracket)) {
304                    elems.push(self.parse_expr()?);
305
306                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
307                        self.shift();
308                        elems.push(self.parse_expr()?);
309                    }
310                }
311
312                expect_symbol(self.shift(), Symbol::CloseBracket)?;
313
314                Value::Array(elems)
315            }
316
317            Sym::Symbol(Symbol::OpenBrace) => {
318                let mut fields = vec![];
319
320                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBrace)) {
321                    let name = self.parse_ident()?;
322                    expect_symbol(self.shift(), Symbol::Colon)?;
323                    let value = self.parse_expr()?;
324
325                    fields.push(Field { name, value });
326
327                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
328                        self.shift();
329
330                        let name = self.parse_ident()?;
331                        expect_symbol(self.shift(), Symbol::Colon)?;
332                        let value = self.parse_expr()?;
333
334                        fields.push(Field { name, value });
335                    }
336                }
337
338                expect_symbol(self.shift(), Symbol::CloseBrace)?;
339
340                Value::Record(fields)
341            }
342
343            Sym::Operator(op) if matches!(op, Operator::Add | Operator::Sub | Operator::Not) => {
344                Value::Unary(Unary {
345                    operator: op,
346                    expr: Box::new(self.parse_expr()?),
347                })
348            }
349
350            _ => {
351                return Err(ParserError::UnexpectedToken(
352                    token.line,
353                    token.col,
354                    token.sym.to_string(),
355                ));
356            }
357        };
358
359        Ok(Expr {
360            attrs: Attrs::new(token.into()),
361            value,
362        })
363    }
364
365    fn parse_binary(&mut self, min_bind: u64) -> ParseResult<Expr> {
366        let mut lhs = self.parse_primary()?;
367
368        loop {
369            let token = self.peek();
370            let operator = if let Sym::Operator(op) = token.sym {
371                op
372            } else {
373                break;
374            };
375
376            let (lhs_bind, rhs_bind) = binding_pow(operator);
377
378            if lhs_bind < min_bind {
379                break;
380            }
381
382            self.shift();
383            let rhs = self.parse_binary(rhs_bind)?;
384
385            if matches!(operator, Operator::As) && !matches!(rhs.value, Value::Id(_)) {
386                return Err(ParserError::ExpectedType(
387                    rhs.attrs.pos.line,
388                    rhs.attrs.pos.col,
389                ));
390            }
391
392            lhs = Expr {
393                attrs: lhs.attrs,
394                value: Value::Binary(Binary {
395                    lhs: Box::new(lhs),
396                    operator,
397                    rhs: Box::new(rhs),
398                }),
399            };
400        }
401
402        Ok(lhs)
403    }
404
405    fn parse_query(&mut self) -> ParseResult<Query<Raw>> {
406        let mut sources = vec![];
407        let pos = self.peek().into();
408
409        while let Sym::Id(name) = self.peek().sym
410            && name.eq_ignore_ascii_case("from")
411        {
412            sources.push(self.parse_source()?);
413        }
414
415        if sources.is_empty() {
416            let token = self.peek();
417            return Err(ParserError::MissingFromStatement(token.line, token.col));
418        }
419
420        let predicate = if let Sym::Id(name) = self.peek().sym
421            && name.eq_ignore_ascii_case("where")
422        {
423            Some(self.parse_where_clause()?)
424        } else {
425            None
426        };
427
428        let group_by = if let Sym::Id(name) = self.peek().sym
429            && name.eq_ignore_ascii_case("group")
430        {
431            Some(self.parse_group_by()?)
432        } else {
433            None
434        };
435
436        let order_by = if let Sym::Id(name) = self.peek().sym
437            && name.eq_ignore_ascii_case("order")
438        {
439            Some(self.parse_order_by()?)
440        } else {
441            None
442        };
443
444        let limit = if let Sym::Id(name) = self.peek().sym
445            && (name.eq_ignore_ascii_case("skip") || name.eq_ignore_ascii_case("top"))
446        {
447            Some(self.parse_limit()?)
448        } else {
449            None
450        };
451
452        expect_keyword(self.shift(), "project")?;
453        expect_keyword(self.shift(), "into")?;
454
455        let distinct = if let Sym::Id(name) = self.peek().sym
456            && name.eq_ignore_ascii_case("distinct")
457        {
458            self.shift();
459            true
460        } else {
461            false
462        };
463
464        let projection = self.parse_expr()?;
465
466        Ok(Query {
467            attrs: Attrs::new(pos),
468            sources,
469            predicate,
470            group_by,
471            order_by,
472            limit,
473            projection,
474            distinct,
475            meta: Raw,
476        })
477    }
478}
479
480fn expect_keyword(token: Token, keyword: &'static str) -> ParseResult<()> {
481    if let Sym::Id(id) = token.sym
482        && id.eq_ignore_ascii_case(keyword)
483    {
484        return Ok(());
485    }
486
487    Err(ParserError::ExpectedKeyword(
488        token.line,
489        token.col,
490        keyword,
491        token.sym.to_string(),
492    ))
493}
494
495fn expect_symbol(token: Token, expect: Symbol) -> ParseResult<()> {
496    if let Sym::Symbol(sym) = token.sym
497        && sym == expect
498    {
499        return Ok(());
500    }
501
502    Err(ParserError::ExpectedSymbol(
503        token.line,
504        token.col,
505        expect,
506        token.sym.to_string(),
507    ))
508}
509
510// Pratt parser operator binding power (precedence)
511fn binding_pow(op: Operator) -> (u64, u64) {
512    match op {
513        Operator::Add | Operator::Sub => (20, 21),
514        Operator::Mul | Operator::Div => (30, 31),
515        Operator::Contains => (40, 39),
516        Operator::As => (50, 49),
517        Operator::Eq
518        | Operator::Neq
519        | Operator::Gt
520        | Operator::Lt
521        | Operator::Gte
522        | Operator::Lte => (10, 11),
523        Operator::And | Operator::Or | Operator::Xor | Operator::Not => (1, 2),
524    }
525}
526
527/// Parse a sequence of tokens into a Query AST.
528///
529/// This function performs syntactic analysis on the token stream, constructing
530/// an abstract syntax tree that represents the structure of the EventQL query.
531///
532/// # Grammar
533///
534/// The parser recognizes the following EventQL grammar:
535///
536/// ```text
537/// Query     := FROM+ WHERE? GROUP_BY? ORDER_BY? LIMIT? PROJECT
538/// FROM      := "FROM" Id "IN" SourceKind
539/// SourceKind := Id | String | "(" Query ")"
540/// WHERE     := "WHERE" Expr
541/// GROUP_BY  := "GROUP" "BY" Expr
542/// ORDER_BY  := "ORDER" "BY" Expr ("ASC" | "DESC")
543/// LIMIT     := ("TOP" | "SKIP") Number
544/// PROJECT   := "PROJECT" "INTO" Expr
545/// Expr      := Binary | Unary | Primary
546/// Primary   := Number | String | Bool | Id | Array | Record | Access | App | "(" Expr ")"
547/// ```
548///
549/// # Expression Precedence
550///
551/// The parser uses a Pratt parser for expressions with the following precedence
552/// (from highest to lowest):
553///
554/// 1. Unary operators (`+`, `-`, `NOT`)
555/// 2. Multiplicative (`*`, `/`)
556/// 3. Additive (`+`, `-`)
557/// 4. Comparison (`<`, `<=`, `>`, `>=`, `==`, `!=`)
558/// 5. Logical (`AND`, `OR`, `XOR`)
559pub fn parse<'a>(input: &'a [Token<'a>]) -> ParseResult<Query<Raw>> {
560    let mut parser = Parser::new(input);
561
562    parser.parse_query()
563}