eventql_parser/
parser.rs

1//! Syntactic analysis (parsing) for EventQL.
2//!
3//! This module provides the parser that converts a sequence of tokens into
4//! an abstract syntax tree (AST). The parser implements a recursive descent
5//! parser with operator precedence for expressions.
6//!
7//! # Main Function
8//!
9//! - [`parse`] - Convert a slice of tokens into a Query AST
10
11use crate::ast::{
12    Access, App, Attrs, Binary, Expr, Field, Limit, Order, OrderBy, Query, Source, SourceKind,
13    Unary, Value,
14};
15use crate::error::ParserError;
16use crate::token::{Operator, Sym, Symbol, Token};
17use crate::{Binding, GroupBy, Raw};
18
19/// Result type for parser operations.
20///
21/// This is a convenience alias for `Result<T, ParserError>`.
22pub type ParseResult<A> = Result<A, ParserError>;
23
24struct Parser<'a> {
25    input: &'a [Token<'a>],
26    offset: usize,
27}
28
29impl<'a> Parser<'a> {
30    fn new(input: &'a [Token<'a>]) -> Self {
31        Self { input, offset: 0 }
32    }
33
34    fn peek<'b>(&'b self) -> Token<'a> {
35        self.input[self.offset]
36    }
37
38    fn shift<'b>(&'b mut self) -> Token<'a> {
39        let res = self.input[self.offset];
40
41        if self.offset + 1 < self.input.len() {
42            self.offset += 1;
43        }
44
45        res
46    }
47
48    fn parse_ident(&mut self) -> ParseResult<String> {
49        let token = self.shift();
50
51        if let Sym::Id(id) = token.sym {
52            return Ok(id.to_owned());
53        }
54
55        Err(ParserError::ExpectedIdent(
56            token.line,
57            token.col,
58            token.sym.to_string(),
59        ))
60    }
61
62    fn parse_source_kind(&mut self) -> ParseResult<SourceKind<Raw>> {
63        let token = self.shift();
64        match token.sym {
65            Sym::Id(id) => Ok(SourceKind::Name(id.to_owned())),
66            Sym::String(sub) => Ok(SourceKind::Subject(sub.to_owned())),
67            Sym::Symbol(Symbol::OpenParen) => {
68                let query = self.parse_query()?;
69                expect_symbol(self.shift(), Symbol::CloseParen)?;
70
71                Ok(SourceKind::Subquery(Box::new(query)))
72            }
73            _ => Err(ParserError::UnexpectedToken(
74                token.line,
75                token.col,
76                token.sym.to_string(),
77            )),
78        }
79    }
80
81    fn parse_source(&mut self) -> ParseResult<Source<Raw>> {
82        expect_keyword(self.shift(), "from")?;
83
84        let token = self.shift();
85        let binding = if let Sym::Id(name) = token.sym {
86            Binding {
87                name: name.to_owned(),
88                pos: token.into(),
89            }
90        } else {
91            return Err(ParserError::ExpectedIdent(
92                token.line,
93                token.col,
94                token.sym.to_string(),
95            ));
96        };
97
98        expect_keyword(self.shift(), "in")?;
99        let kind = self.parse_source_kind()?;
100
101        Ok(Source { binding, kind })
102    }
103
104    fn parse_where_clause(&mut self) -> ParseResult<Expr> {
105        expect_keyword(self.shift(), "where")?;
106        self.parse_expr()
107    }
108
109    fn parse_group_by(&mut self) -> ParseResult<GroupBy> {
110        expect_keyword(self.shift(), "group")?;
111        expect_keyword(self.shift(), "by")?;
112
113        let expr = self.parse_expr()?;
114
115        let predicate = if let Sym::Id(name) = self.peek().sym
116            && name.eq_ignore_ascii_case("having")
117        {
118            self.shift();
119
120            Some(self.parse_expr()?)
121        } else {
122            None
123        };
124
125        Ok(GroupBy { expr, predicate })
126    }
127
128    fn parse_order_by(&mut self) -> ParseResult<OrderBy> {
129        expect_keyword(self.shift(), "order")?;
130        expect_keyword(self.shift(), "by")?;
131
132        let expr = self.parse_expr()?;
133        let token = self.shift();
134
135        if let Sym::Id(name) = token.sym {
136            let order = if name.eq_ignore_ascii_case("asc") {
137                Order::Asc
138            } else if name.eq_ignore_ascii_case("desc") {
139                Order::Desc
140            } else {
141                return Err(ParserError::UnexpectedToken(
142                    token.line,
143                    token.col,
144                    name.to_owned(),
145                ));
146            };
147
148            return Ok(OrderBy { expr, order });
149        }
150
151        Err(ParserError::UnexpectedToken(
152            token.line,
153            token.col,
154            token.sym.to_string(),
155        ))
156    }
157
158    fn parse_limit(&mut self) -> ParseResult<Limit> {
159        let token = self.shift();
160        let limit = expect_keyword(token, "top")
161            .map(|_| "top")
162            .or_else(|_| expect_keyword(token, "skip").map(|_| "skip"))
163            .map_err(|_| {
164                ParserError::UnexpectedToken(token.line, token.col, token.sym.to_string())
165            })?;
166
167        let token = self.shift();
168        if let Sym::Number(value) = token.sym
169            && value.fract() == 0.0
170        {
171            return match limit {
172                "top" => Ok(Limit::Top(value as u64)),
173                "skip" => Ok(Limit::Skip(value as u64)),
174                _ => unreachable!(),
175            };
176        }
177
178        Err(ParserError::UnexpectedToken(
179            token.line,
180            token.col,
181            token.sym.to_string(),
182        ))
183    }
184
185    fn parse_expr(&mut self) -> ParseResult<Expr> {
186        let token = self.peek();
187
188        match token.sym {
189            Sym::Eof => Err(ParserError::UnexpectedEof),
190
191            Sym::Id(_)
192            | Sym::String(_)
193            | Sym::Number(_)
194            | Sym::Symbol(Symbol::OpenParen | Symbol::OpenBracket | Symbol::OpenBrace)
195            | Sym::Operator(Operator::Add | Operator::Sub | Operator::Not) => self.parse_binary(0),
196
197            _ => Err(ParserError::UnexpectedToken(
198                token.line,
199                token.col,
200                token.sym.to_string(),
201            )),
202        }
203    }
204
205    fn parse_primary(&mut self) -> ParseResult<Expr> {
206        let token = self.shift();
207
208        let value = match token.sym {
209            Sym::Id(name) => {
210                if name.eq_ignore_ascii_case("true") {
211                    Value::Bool(true)
212                } else if name.eq_ignore_ascii_case("false") {
213                    Value::Bool(false)
214                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::OpenParen)) {
215                    self.shift();
216
217                    let mut args = vec![];
218
219                    if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseParen)) {
220                        args.push(self.parse_expr()?);
221
222                        while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
223                            self.shift();
224                            args.push(self.parse_expr()?);
225                        }
226                    }
227
228                    expect_symbol(self.shift(), Symbol::CloseParen)?;
229
230                    Value::App(App {
231                        func: name.to_owned(),
232                        args,
233                    })
234                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
235                    self.shift();
236                    let mut access = Access {
237                        target: Box::new(Expr {
238                            attrs: Attrs::new(token.into()),
239                            value: Value::Id(name.to_owned()),
240                        }),
241                        field: self.parse_ident()?,
242                    };
243
244                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
245                        self.shift();
246                        access = Access {
247                            target: Box::new(Expr {
248                                attrs: access.target.attrs,
249                                value: Value::Access(access),
250                            }),
251                            field: self.parse_ident()?,
252                        };
253                    }
254
255                    Value::Access(access)
256                } else {
257                    Value::Id(name.to_owned())
258                }
259            }
260
261            Sym::String(s) => Value::String(s.to_owned()),
262            Sym::Number(n) => Value::Number(n),
263
264            Sym::Symbol(Symbol::OpenParen) => {
265                let expr = self.parse_expr()?;
266                expect_symbol(self.shift(), Symbol::CloseParen)?;
267
268                Value::Group(Box::new(expr))
269            }
270
271            Sym::Symbol(Symbol::OpenBracket) => {
272                let mut elems = vec![];
273
274                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBracket)) {
275                    elems.push(self.parse_expr()?);
276
277                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
278                        self.shift();
279                        elems.push(self.parse_expr()?);
280                    }
281                }
282
283                expect_symbol(self.shift(), Symbol::CloseBracket)?;
284
285                Value::Array(elems)
286            }
287
288            Sym::Symbol(Symbol::OpenBrace) => {
289                let mut fields = vec![];
290
291                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBrace)) {
292                    let name = self.parse_ident()?;
293                    expect_symbol(self.shift(), Symbol::Colon)?;
294                    let value = self.parse_expr()?;
295
296                    fields.push(Field { name, value });
297
298                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
299                        self.shift();
300
301                        let name = self.parse_ident()?;
302                        expect_symbol(self.shift(), Symbol::Colon)?;
303                        let value = self.parse_expr()?;
304
305                        fields.push(Field { name, value });
306                    }
307                }
308
309                expect_symbol(self.shift(), Symbol::CloseBrace)?;
310
311                Value::Record(fields)
312            }
313
314            Sym::Operator(op) if matches!(op, Operator::Add | Operator::Sub | Operator::Not) => {
315                Value::Unary(Unary {
316                    operator: op,
317                    expr: Box::new(self.parse_expr()?),
318                })
319            }
320
321            _ => {
322                return Err(ParserError::UnexpectedToken(
323                    token.line,
324                    token.col,
325                    token.sym.to_string(),
326                ));
327            }
328        };
329
330        Ok(Expr {
331            attrs: Attrs::new(token.into()),
332            value,
333        })
334    }
335
336    fn parse_binary(&mut self, min_bind: u64) -> ParseResult<Expr> {
337        let mut lhs = self.parse_primary()?;
338
339        loop {
340            let token = self.peek();
341            let operator = if let Sym::Operator(op) = token.sym {
342                op
343            } else {
344                break;
345            };
346
347            let (lhs_bind, rhs_bind) = binding_pow(operator);
348
349            if lhs_bind < min_bind {
350                break;
351            }
352
353            self.shift();
354            let rhs = self.parse_binary(rhs_bind)?;
355
356            lhs = Expr {
357                attrs: lhs.attrs,
358                value: Value::Binary(Binary {
359                    lhs: Box::new(lhs),
360                    operator,
361                    rhs: Box::new(rhs),
362                }),
363            };
364        }
365
366        Ok(lhs)
367    }
368
369    fn parse_query(&mut self) -> ParseResult<Query<Raw>> {
370        let mut sources = vec![];
371        let pos = self.peek().into();
372
373        while let Sym::Id(name) = self.peek().sym
374            && name.eq_ignore_ascii_case("from")
375        {
376            sources.push(self.parse_source()?);
377        }
378
379        if sources.is_empty() {
380            let token = self.peek();
381            return Err(ParserError::MissingFromStatement(token.line, token.col));
382        }
383
384        let predicate = if let Sym::Id(name) = self.peek().sym
385            && name.eq_ignore_ascii_case("where")
386        {
387            Some(self.parse_where_clause()?)
388        } else {
389            None
390        };
391
392        let group_by = if let Sym::Id(name) = self.peek().sym
393            && name.eq_ignore_ascii_case("group")
394        {
395            Some(self.parse_group_by()?)
396        } else {
397            None
398        };
399
400        let order_by = if let Sym::Id(name) = self.peek().sym
401            && name.eq_ignore_ascii_case("order")
402        {
403            Some(self.parse_order_by()?)
404        } else {
405            None
406        };
407
408        let limit = if let Sym::Id(name) = self.peek().sym
409            && (name.eq_ignore_ascii_case("skip") || name.eq_ignore_ascii_case("top"))
410        {
411            Some(self.parse_limit()?)
412        } else {
413            None
414        };
415
416        expect_keyword(self.shift(), "project")?;
417        expect_keyword(self.shift(), "into")?;
418
419        let distinct = if let Sym::Id(name) = self.peek().sym
420            && name.eq_ignore_ascii_case("distinct")
421        {
422            self.shift();
423            true
424        } else {
425            false
426        };
427
428        let projection = self.parse_expr()?;
429
430        Ok(Query {
431            attrs: Attrs::new(pos),
432            sources,
433            predicate,
434            group_by,
435            order_by,
436            limit,
437            projection,
438            distinct,
439            meta: Raw,
440        })
441    }
442}
443
444fn expect_keyword(token: Token, keyword: &'static str) -> ParseResult<()> {
445    if let Sym::Id(id) = token.sym
446        && id.eq_ignore_ascii_case(keyword)
447    {
448        return Ok(());
449    }
450
451    Err(ParserError::ExpectedKeyword(
452        token.line,
453        token.col,
454        keyword,
455        token.sym.to_string(),
456    ))
457}
458
459fn expect_symbol(token: Token, expect: Symbol) -> ParseResult<()> {
460    if let Sym::Symbol(sym) = token.sym
461        && sym == expect
462    {
463        return Ok(());
464    }
465
466    Err(ParserError::ExpectedSymbol(
467        token.line,
468        token.col,
469        expect,
470        token.sym.to_string(),
471    ))
472}
473
474// Pratt parser operator binding power (precedence)
475fn binding_pow(op: Operator) -> (u64, u64) {
476    match op {
477        Operator::Add | Operator::Sub => (20, 21),
478        Operator::Mul | Operator::Div => (30, 31),
479        Operator::Eq
480        | Operator::Neq
481        | Operator::Gt
482        | Operator::Lt
483        | Operator::Gte
484        | Operator::Lte => (10, 11),
485        Operator::And | Operator::Or | Operator::Xor | Operator::Not => (1, 2),
486    }
487}
488
489/// Parse a sequence of tokens into a Query AST.
490///
491/// This function performs syntactic analysis on the token stream, constructing
492/// an abstract syntax tree that represents the structure of the EventQL query.
493///
494/// # Grammar
495///
496/// The parser recognizes the following EventQL grammar:
497///
498/// ```text
499/// Query     := FROM+ WHERE? GROUP_BY? ORDER_BY? LIMIT? PROJECT
500/// FROM      := "FROM" Id "IN" SourceKind
501/// SourceKind := Id | String | "(" Query ")"
502/// WHERE     := "WHERE" Expr
503/// GROUP_BY  := "GROUP" "BY" Expr
504/// ORDER_BY  := "ORDER" "BY" Expr ("ASC" | "DESC")
505/// LIMIT     := ("TOP" | "SKIP") Number
506/// PROJECT   := "PROJECT" "INTO" Expr
507/// Expr      := Binary | Unary | Primary
508/// Primary   := Number | String | Bool | Id | Array | Record | Access | App | "(" Expr ")"
509/// ```
510///
511/// # Expression Precedence
512///
513/// The parser uses a Pratt parser for expressions with the following precedence
514/// (from highest to lowest):
515///
516/// 1. Unary operators (`+`, `-`, `NOT`)
517/// 2. Multiplicative (`*`, `/`)
518/// 3. Additive (`+`, `-`)
519/// 4. Comparison (`<`, `<=`, `>`, `>=`, `==`, `!=`)
520/// 5. Logical (`AND`, `OR`, `XOR`)
521pub fn parse<'a>(input: &'a [Token<'a>]) -> ParseResult<Query<Raw>> {
522    let mut parser = Parser::new(input);
523
524    parser.parse_query()
525}