eventql_parser/
parser.rs

1//! Syntactic analysis (parsing) for EventQL.
2//!
3//! This module provides the parser that converts a sequence of tokens into
4//! an abstract syntax tree (AST). The parser implements a recursive descent
5//! parser with operator precedence for expressions.
6//!
7//! # Main Function
8//!
9//! - [`parse`] - Convert a slice of tokens into a Query AST
10
11use crate::ast::{
12    Access, App, Attrs, Binary, Expr, Field, Limit, Order, OrderBy, Query, Source, SourceKind,
13    Unary, Value,
14};
15use crate::error::ParserError;
16use crate::token::{Operator, Sym, Symbol, Token};
17use crate::{Binding, GroupBy, Raw};
18
19/// Result type for parser operations.
20///
21/// This is a convenience alias for `Result<T, ParserError>`.
22pub type ParseResult<A> = Result<A, ParserError>;
23
24struct Parser<'a> {
25    input: &'a [Token<'a>],
26    offset: usize,
27}
28
29impl<'a> Parser<'a> {
30    fn new(input: &'a [Token<'a>]) -> Self {
31        Self { input, offset: 0 }
32    }
33
34    fn peek<'b>(&'b self) -> Token<'a> {
35        self.input[self.offset]
36    }
37
38    fn shift<'b>(&'b mut self) -> Token<'a> {
39        let res = self.input[self.offset];
40
41        if self.offset + 1 < self.input.len() {
42            self.offset += 1;
43        }
44
45        res
46    }
47
48    fn parse_ident(&mut self) -> ParseResult<String> {
49        let token = self.shift();
50
51        if let Sym::Id(id) = token.sym {
52            return Ok(id.to_owned());
53        }
54
55        Err(ParserError::ExpectedIdent(
56            token.line,
57            token.col,
58            token.sym.to_string(),
59        ))
60    }
61
62    fn parse_source_kind(&mut self) -> ParseResult<SourceKind<Raw>> {
63        let token = self.shift();
64        match token.sym {
65            Sym::Id(id) => Ok(SourceKind::Name(id.to_owned())),
66            Sym::String(sub) => Ok(SourceKind::Subject(sub.to_owned())),
67            Sym::Symbol(Symbol::OpenParen) => {
68                let query = self.parse_query()?;
69                expect_symbol(self.shift(), Symbol::CloseParen)?;
70
71                Ok(SourceKind::Subquery(Box::new(query)))
72            }
73            _ => Err(ParserError::UnexpectedToken(
74                token.line,
75                token.col,
76                token.sym.to_string(),
77            )),
78        }
79    }
80
81    fn parse_source(&mut self) -> ParseResult<Source<Raw>> {
82        expect_keyword(self.shift(), "from")?;
83
84        let token = self.shift();
85        let binding = if let Sym::Id(name) = token.sym {
86            Binding {
87                name: name.to_owned(),
88                pos: token.into(),
89            }
90        } else {
91            return Err(ParserError::ExpectedIdent(
92                token.line,
93                token.col,
94                token.sym.to_string(),
95            ));
96        };
97
98        expect_keyword(self.shift(), "in")?;
99        let kind = self.parse_source_kind()?;
100
101        Ok(Source { binding, kind })
102    }
103
104    fn parse_where_clause(&mut self) -> ParseResult<Expr> {
105        expect_keyword(self.shift(), "where")?;
106        self.parse_expr()
107    }
108
109    fn parse_group_by(&mut self) -> ParseResult<GroupBy> {
110        expect_keyword(self.shift(), "group")?;
111        expect_keyword(self.shift(), "by")?;
112
113        let expr = self.parse_expr()?;
114
115        let predicate = if let Sym::Id(name) = self.peek().sym
116            && name.eq_ignore_ascii_case("having")
117        {
118            self.shift();
119
120            Some(self.parse_expr()?)
121        } else {
122            None
123        };
124
125        Ok(GroupBy { expr, predicate })
126    }
127
128    fn parse_order_by(&mut self) -> ParseResult<OrderBy> {
129        expect_keyword(self.shift(), "order")?;
130        expect_keyword(self.shift(), "by")?;
131
132        let expr = self.parse_expr()?;
133        let token = self.shift();
134
135        if let Sym::Id(name) = token.sym {
136            let order = if name.eq_ignore_ascii_case("asc") {
137                Order::Asc
138            } else if name.eq_ignore_ascii_case("desc") {
139                Order::Desc
140            } else {
141                return Err(ParserError::UnexpectedToken(
142                    token.line,
143                    token.col,
144                    name.to_owned(),
145                ));
146            };
147
148            return Ok(OrderBy { expr, order });
149        }
150
151        Err(ParserError::UnexpectedToken(
152            token.line,
153            token.col,
154            token.sym.to_string(),
155        ))
156    }
157
158    fn parse_limit(&mut self) -> ParseResult<Limit> {
159        let token = self.shift();
160        let limit = expect_keyword(token, "top")
161            .map(|_| "top")
162            .or_else(|_| expect_keyword(token, "skip").map(|_| "skip"))
163            .map_err(|_| {
164                ParserError::UnexpectedToken(token.line, token.col, token.sym.to_string())
165            })?;
166
167        let token = self.shift();
168        if let Sym::Number(value) = token.sym
169            && value.fract() == 0.0
170        {
171            return match limit {
172                "top" => Ok(Limit::Top(value as u64)),
173                "skip" => Ok(Limit::Skip(value as u64)),
174                _ => unreachable!(),
175            };
176        }
177
178        Err(ParserError::UnexpectedToken(
179            token.line,
180            token.col,
181            token.sym.to_string(),
182        ))
183    }
184
185    fn parse_expr(&mut self) -> ParseResult<Expr> {
186        let token = self.peek();
187
188        match token.sym {
189            Sym::Eof => Err(ParserError::UnexpectedEof),
190
191            Sym::Id(_)
192            | Sym::String(_)
193            | Sym::Number(_)
194            | Sym::Symbol(Symbol::OpenParen | Symbol::OpenBracket | Symbol::OpenBrace)
195            | Sym::Operator(Operator::Add | Operator::Sub | Operator::Not) => self.parse_binary(0),
196
197            _ => Err(ParserError::UnexpectedToken(
198                token.line,
199                token.col,
200                token.sym.to_string(),
201            )),
202        }
203    }
204
205    fn parse_primary(&mut self) -> ParseResult<Expr> {
206        let token = self.shift();
207
208        let value = match token.sym {
209            Sym::Id(name) => {
210                if name.eq_ignore_ascii_case("true") {
211                    Value::Bool(true)
212                } else if name.eq_ignore_ascii_case("false") {
213                    Value::Bool(false)
214                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::OpenParen)) {
215                    self.shift();
216
217                    let mut args = vec![];
218
219                    if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseParen)) {
220                        args.push(self.parse_expr()?);
221
222                        while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
223                            self.shift();
224                            args.push(self.parse_expr()?);
225                        }
226                    }
227
228                    expect_symbol(self.shift(), Symbol::CloseParen)?;
229
230                    Value::App(App {
231                        func: name.to_owned(),
232                        args,
233                    })
234                } else if matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
235                    self.shift();
236                    let mut access = Access {
237                        target: Box::new(Expr {
238                            attrs: Attrs::new(token.into()),
239                            value: Value::Id(name.to_owned()),
240                        }),
241                        field: self.parse_ident()?,
242                    };
243
244                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Dot)) {
245                        self.shift();
246                        access = Access {
247                            target: Box::new(Expr {
248                                attrs: access.target.attrs,
249                                value: Value::Access(access),
250                            }),
251                            field: self.parse_ident()?,
252                        };
253                    }
254
255                    Value::Access(access)
256                } else {
257                    Value::Id(name.to_owned())
258                }
259            }
260
261            Sym::String(s) => Value::String(s.to_owned()),
262            Sym::Number(n) => Value::Number(n),
263
264            Sym::Symbol(Symbol::OpenParen) => {
265                let expr = self.parse_expr()?;
266                expect_symbol(self.shift(), Symbol::CloseParen)?;
267
268                Value::Group(Box::new(expr))
269            }
270
271            Sym::Symbol(Symbol::OpenBracket) => {
272                let mut elems = vec![];
273
274                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBracket)) {
275                    elems.push(self.parse_expr()?);
276
277                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
278                        self.shift();
279                        elems.push(self.parse_expr()?);
280                    }
281                }
282
283                expect_symbol(self.shift(), Symbol::CloseBracket)?;
284
285                Value::Array(elems)
286            }
287
288            Sym::Symbol(Symbol::OpenBrace) => {
289                let mut fields = vec![];
290
291                if !matches!(self.peek().sym, Sym::Symbol(Symbol::CloseBrace)) {
292                    let name = self.parse_ident()?;
293                    expect_symbol(self.shift(), Symbol::Colon)?;
294                    let value = self.parse_expr()?;
295
296                    fields.push(Field { name, value });
297
298                    while matches!(self.peek().sym, Sym::Symbol(Symbol::Comma)) {
299                        self.shift();
300
301                        let name = self.parse_ident()?;
302                        expect_symbol(self.shift(), Symbol::Colon)?;
303                        let value = self.parse_expr()?;
304
305                        fields.push(Field { name, value });
306                    }
307                }
308
309                expect_symbol(self.shift(), Symbol::CloseBrace)?;
310
311                Value::Record(fields)
312            }
313
314            Sym::Operator(op) if matches!(op, Operator::Add | Operator::Sub | Operator::Not) => {
315                Value::Unary(Unary {
316                    operator: op,
317                    expr: Box::new(self.parse_expr()?),
318                })
319            }
320
321            _ => {
322                return Err(ParserError::UnexpectedToken(
323                    token.line,
324                    token.col,
325                    token.sym.to_string(),
326                ));
327            }
328        };
329
330        Ok(Expr {
331            attrs: Attrs::new(token.into()),
332            value,
333        })
334    }
335
336    fn parse_binary(&mut self, min_bind: u64) -> ParseResult<Expr> {
337        let mut lhs = self.parse_primary()?;
338
339        loop {
340            let token = self.peek();
341            let operator = if let Sym::Operator(op) = token.sym {
342                op
343            } else {
344                break;
345            };
346
347            let (lhs_bind, rhs_bind) = binding_pow(operator);
348
349            if lhs_bind < min_bind {
350                break;
351            }
352
353            self.shift();
354            let rhs = self.parse_binary(rhs_bind)?;
355
356            if matches!(operator, Operator::As) && !matches!(rhs.value, Value::Id(_)) {
357                return Err(ParserError::ExpectedType(
358                    rhs.attrs.pos.line,
359                    rhs.attrs.pos.col,
360                ));
361            }
362
363            lhs = Expr {
364                attrs: lhs.attrs,
365                value: Value::Binary(Binary {
366                    lhs: Box::new(lhs),
367                    operator,
368                    rhs: Box::new(rhs),
369                }),
370            };
371        }
372
373        Ok(lhs)
374    }
375
376    fn parse_query(&mut self) -> ParseResult<Query<Raw>> {
377        let mut sources = vec![];
378        let pos = self.peek().into();
379
380        while let Sym::Id(name) = self.peek().sym
381            && name.eq_ignore_ascii_case("from")
382        {
383            sources.push(self.parse_source()?);
384        }
385
386        if sources.is_empty() {
387            let token = self.peek();
388            return Err(ParserError::MissingFromStatement(token.line, token.col));
389        }
390
391        let predicate = if let Sym::Id(name) = self.peek().sym
392            && name.eq_ignore_ascii_case("where")
393        {
394            Some(self.parse_where_clause()?)
395        } else {
396            None
397        };
398
399        let group_by = if let Sym::Id(name) = self.peek().sym
400            && name.eq_ignore_ascii_case("group")
401        {
402            Some(self.parse_group_by()?)
403        } else {
404            None
405        };
406
407        let order_by = if let Sym::Id(name) = self.peek().sym
408            && name.eq_ignore_ascii_case("order")
409        {
410            Some(self.parse_order_by()?)
411        } else {
412            None
413        };
414
415        let limit = if let Sym::Id(name) = self.peek().sym
416            && (name.eq_ignore_ascii_case("skip") || name.eq_ignore_ascii_case("top"))
417        {
418            Some(self.parse_limit()?)
419        } else {
420            None
421        };
422
423        expect_keyword(self.shift(), "project")?;
424        expect_keyword(self.shift(), "into")?;
425
426        let distinct = if let Sym::Id(name) = self.peek().sym
427            && name.eq_ignore_ascii_case("distinct")
428        {
429            self.shift();
430            true
431        } else {
432            false
433        };
434
435        let projection = self.parse_expr()?;
436
437        Ok(Query {
438            attrs: Attrs::new(pos),
439            sources,
440            predicate,
441            group_by,
442            order_by,
443            limit,
444            projection,
445            distinct,
446            meta: Raw,
447        })
448    }
449}
450
451fn expect_keyword(token: Token, keyword: &'static str) -> ParseResult<()> {
452    if let Sym::Id(id) = token.sym
453        && id.eq_ignore_ascii_case(keyword)
454    {
455        return Ok(());
456    }
457
458    Err(ParserError::ExpectedKeyword(
459        token.line,
460        token.col,
461        keyword,
462        token.sym.to_string(),
463    ))
464}
465
466fn expect_symbol(token: Token, expect: Symbol) -> ParseResult<()> {
467    if let Sym::Symbol(sym) = token.sym
468        && sym == expect
469    {
470        return Ok(());
471    }
472
473    Err(ParserError::ExpectedSymbol(
474        token.line,
475        token.col,
476        expect,
477        token.sym.to_string(),
478    ))
479}
480
481// Pratt parser operator binding power (precedence)
482fn binding_pow(op: Operator) -> (u64, u64) {
483    match op {
484        Operator::Add | Operator::Sub => (20, 21),
485        Operator::Mul | Operator::Div => (30, 31),
486        Operator::Contains => (40, 39),
487        Operator::As => (50, 49),
488        Operator::Eq
489        | Operator::Neq
490        | Operator::Gt
491        | Operator::Lt
492        | Operator::Gte
493        | Operator::Lte => (10, 11),
494        Operator::And | Operator::Or | Operator::Xor | Operator::Not => (1, 2),
495    }
496}
497
498/// Parse a sequence of tokens into a Query AST.
499///
500/// This function performs syntactic analysis on the token stream, constructing
501/// an abstract syntax tree that represents the structure of the EventQL query.
502///
503/// # Grammar
504///
505/// The parser recognizes the following EventQL grammar:
506///
507/// ```text
508/// Query     := FROM+ WHERE? GROUP_BY? ORDER_BY? LIMIT? PROJECT
509/// FROM      := "FROM" Id "IN" SourceKind
510/// SourceKind := Id | String | "(" Query ")"
511/// WHERE     := "WHERE" Expr
512/// GROUP_BY  := "GROUP" "BY" Expr
513/// ORDER_BY  := "ORDER" "BY" Expr ("ASC" | "DESC")
514/// LIMIT     := ("TOP" | "SKIP") Number
515/// PROJECT   := "PROJECT" "INTO" Expr
516/// Expr      := Binary | Unary | Primary
517/// Primary   := Number | String | Bool | Id | Array | Record | Access | App | "(" Expr ")"
518/// ```
519///
520/// # Expression Precedence
521///
522/// The parser uses a Pratt parser for expressions with the following precedence
523/// (from highest to lowest):
524///
525/// 1. Unary operators (`+`, `-`, `NOT`)
526/// 2. Multiplicative (`*`, `/`)
527/// 3. Additive (`+`, `-`)
528/// 4. Comparison (`<`, `<=`, `>`, `>=`, `==`, `!=`)
529/// 5. Logical (`AND`, `OR`, `XOR`)
530pub fn parse<'a>(input: &'a [Token<'a>]) -> ParseResult<Query<Raw>> {
531    let mut parser = Parser::new(input);
532
533    parser.parse_query()
534}