eventql_parser/
ast.rs

1//! Abstract syntax tree (AST) types for EventQL.
2//!
3//! This module defines the structure of parsed EventQL queries as an abstract
4//! syntax tree. The AST represents the semantic structure of a query, making it
5//! easy to analyze, transform, or execute queries.
6//!
7//! # Core Types
8//!
9//! - [`Query`] - The root of the AST, representing a complete query
10//! - [`Expr`] - Expressions with position and type information
11//! - [`Value`] - The various kinds of expression values (literals, operators, etc.)
12//! - [`Source`] - Data sources in FROM clauses
13//!
14use std::{collections::BTreeMap, mem};
15
16use crate::{
17    analysis::{AnalysisOptions, Typed, static_analysis},
18    error::{AnalysisError, Error},
19    token::{Operator, Token},
20};
21use serde::Serialize;
22
23/// Position information for source code locations.
24///
25/// This struct tracks the line and column number of tokens and AST nodes,
26/// which is useful for error reporting and debugging.
27///
28/// # Examples
29///
30/// ```
31/// use eventql_parser::Pos;
32///
33/// let pos = Pos { line: 1, col: 10 };
34/// assert_eq!(pos.line, 1);
35/// assert_eq!(pos.col, 10);
36/// ```
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
38pub struct Pos {
39    /// Line number (1-indexed)
40    pub line: u32,
41    /// Column number (1-indexed)
42    pub col: u32,
43}
44
45impl From<Token<'_>> for Pos {
46    fn from(value: Token<'_>) -> Self {
47        Self {
48            line: value.line,
49            col: value.col,
50        }
51    }
52}
53
54/// Type information for expressions.
55///
56/// This enum represents the type of an expression in the E
57
58#[derive(Clone, PartialEq, Eq, Debug, Default, Serialize)]
59pub enum Type {
60    /// Type has not been determined yet
61    #[default]
62    Unspecified,
63    /// Numeric type (f64)
64    Number,
65    /// String type
66    String,
67    /// Boolean type
68    Bool,
69    /// Array type
70    Array(Vec<Type>),
71    /// Record (object) type
72    Record(BTreeMap<String, Type>),
73    /// Subject pattern type
74    Subject,
75    /// Function type
76    App { args: Vec<Type>, result: Box<Type> },
77}
78
79impl Type {
80    pub fn as_record_or_panic_mut(&mut self) -> &mut BTreeMap<String, Type> {
81        if let Self::Record(r) = self {
82            return r;
83        }
84
85        panic!("expected record type, got {:?}", self);
86    }
87
88    /// Checks if two types are the same.
89    ///
90    /// * If `self` is `Type::Unspecified` then `self` is updated to the more specific `Type`.
91    /// * If `self` is `Type::Subject` and is checked against a `Type::String` then `self` is updated to `Type::String`
92    pub fn check(self, attrs: &Attrs, other: Type) -> Result<Type, AnalysisError> {
93        match (self, other) {
94            (Self::Unspecified, other) => Ok(other),
95            (this, Self::Unspecified) => Ok(this),
96            (Self::Subject, Self::Subject) => Ok(Self::Subject),
97
98            // Subjects are strings so there is no reason to reject a type
99            // when compared to a string. However, when it happens, we demote
100            // a subject to a string.
101            (Self::Subject, Self::String) => Ok(Self::String),
102            (Self::String, Self::Subject) => Ok(Self::String),
103
104            (Self::Number, Self::Number) => Ok(Self::Number),
105            (Self::String, Self::String) => Ok(Self::String),
106            (Self::Bool, Self::Bool) => Ok(Self::Bool),
107
108            (Self::Array(mut a), Self::Array(b)) if a.len() == b.len() => {
109                if a.is_empty() {
110                    return Ok(Self::Array(a));
111                }
112
113                for (a, b) in a.iter_mut().zip(b.into_iter()) {
114                    let tmp = mem::take(a);
115                    *a = tmp.check(attrs, b)?;
116                }
117
118                Ok(Self::Array(a))
119            }
120
121            (Self::Record(mut a), Self::Record(b)) if a.len() == b.len() => {
122                if a.is_empty() {
123                    return Ok(Self::Record(a));
124                }
125
126                for (ak, bk) in a.keys().zip(b.keys()) {
127                    if ak != bk {
128                        return Err(AnalysisError::TypeMismatch(
129                            attrs.pos.line,
130                            attrs.pos.col,
131                            Self::Record(a),
132                            Self::Record(b),
133                        ));
134                    }
135                }
136
137                for (av, bv) in a.values_mut().zip(b.into_values()) {
138                    let a = mem::take(av);
139                    *av = a.check(attrs, bv)?;
140                }
141
142                Ok(Self::Record(a))
143            }
144
145            (
146                Self::App {
147                    args: mut a_args,
148                    result: mut a_res,
149                },
150                Self::App {
151                    args: b_args,
152                    result: b_res,
153                },
154            ) if a_args.len() == b_args.len() => {
155                if a_args.is_empty() {
156                    let tmp = mem::take(a_res.as_mut());
157                    *a_res = tmp.check(attrs, *b_res)?;
158                    return Ok(Self::App {
159                        args: a_args,
160                        result: a_res,
161                    });
162                }
163
164                for (a, b) in a_args.iter_mut().zip(b_args.into_iter()) {
165                    let tmp = mem::take(a);
166                    *a = tmp.check(attrs, b)?;
167                }
168
169                let tmp = mem::take(a_res.as_mut());
170                *a_res = tmp.check(attrs, *b_res)?;
171
172                Ok(Self::App {
173                    args: a_args,
174                    result: a_res,
175                })
176            }
177
178            (this, other) => Err(AnalysisError::TypeMismatch(
179                attrs.pos.line,
180                attrs.pos.col,
181                this,
182                other,
183            )),
184        }
185    }
186}
187
188/// Attributes attached to each expression node.
189///
190/// These attributes provide metadata about an expression, including its
191/// position in the source code, scope information, and type information.
192#[derive(Debug, Clone, Copy, Serialize)]
193pub struct Attrs {
194    /// Source position of this expression
195    pub pos: Pos,
196}
197
198impl Attrs {
199    /// Create new attributes with unspecified type.
200    pub fn new(pos: Pos) -> Self {
201        Self { pos }
202    }
203}
204
205/// An expression with metadata.
206///
207/// This is the fundamental building block of the AST. Every expression
208/// carries attributes (position, scope, type) and a value that determines
209/// what kind of expression it is.
210#[derive(Debug, Clone, Serialize)]
211pub struct Expr {
212    /// Metadata about this expression
213    pub attrs: Attrs,
214    /// The value/kind of this expression
215    pub value: Value,
216}
217
218/// Field access expression (e.g., `e.data.price`).
219///
220/// Represents accessing a field of a record or object using dot notation.
221/// Can be chained for nested field access.
222///
223/// # Examples
224///
225/// In the query `WHERE e.data.user.id == 1`, the expression `e.data.user.id`
226/// is parsed as nested `Access` nodes.
227#[derive(Debug, Clone, Serialize)]
228pub struct Access {
229    /// The target expression being accessed
230    pub target: Box<Expr>,
231    /// The name of the field being accessed
232    pub field: String,
233}
234
235/// Function application (e.g., `sum(e.price)`, `count()`).
236///
237/// Represents a function call with zero or more arguments.
238///
239/// # Examples
240///
241/// In the query `WHERE count(e.items) > 5`, the `count(e.items)` is an `App` node.
242#[derive(Debug, Clone, Serialize)]
243pub struct App {
244    /// Name of the function being called
245    pub func: String,
246    /// Arguments passed to the function
247    pub args: Vec<Expr>,
248}
249
250/// A field in a record literal (e.g., `{name: "Alice", age: 30}`).
251///
252/// Represents a key-value pair in a record construction.
253#[derive(Debug, Clone, Serialize)]
254pub struct Field {
255    /// Field name
256    pub name: String,
257    /// Field value expression
258    pub value: Expr,
259}
260
261/// Binary operation (e.g., `a + b`, `x == y`, `p AND q`).
262///
263/// Represents operations that take two operands, including arithmetic,
264/// comparison, and logical operators.
265///
266/// # Examples
267///
268/// In `WHERE e.price > 100 AND e.active == true`, there are multiple
269/// binary operations: `>`, `==`, and `AND`.
270#[derive(Debug, Clone, Serialize)]
271pub struct Binary {
272    /// Left-hand side operand
273    pub lhs: Box<Expr>,
274    /// The operator
275    pub operator: Operator,
276    /// Right-hand side operand
277    pub rhs: Box<Expr>,
278}
279
280/// Unary operation (e.g., `-x`, `NOT active`).
281///
282/// Represents operations that take a single operand.
283///
284/// # Examples
285///
286/// In `WHERE NOT e.deleted`, the `NOT e.deleted` is a unary operation.
287#[derive(Debug, Clone, Serialize)]
288pub struct Unary {
289    /// The operator (Add for +, Sub for -, Not for NOT)
290    pub operator: Operator,
291    /// The operand expression
292    pub expr: Box<Expr>,
293}
294
295/// The kind of value an expression represents.
296///
297/// This enum contains all the different types of expressions that can appear
298/// in an EventQL query, from simple literals to complex operations.
299#[derive(Debug, Clone, Serialize)]
300pub enum Value {
301    /// Numeric literal (e.g., `42`, `3.14`)
302    Number(f64),
303    /// String literal (e.g., `"hello"`)
304    String(String),
305    /// Boolean literal (`true` or `false`)
306    Bool(bool),
307    /// Identifier (e.g., variable name `e`, `x`)
308    Id(String),
309    /// Array literal (e.g., `[1, 2, 3]`)
310    Array(Vec<Expr>),
311    /// Record literal (e.g., `{name: "Alice", age: 30}`)
312    Record(Vec<Field>),
313    /// Field access (e.g., `e.data.price`)
314    Access(Access),
315    /// Function application (e.g., `sum(e.price)`)
316    App(App),
317    /// Binary operation (e.g., `a + b`, `x == y`)
318    Binary(Binary),
319    /// Unary operation (e.g., `-x`, `NOT active`)
320    Unary(Unary),
321    /// Grouped/parenthesized expression (e.g., `(a + b)`)
322    Group(Box<Expr>),
323}
324
325/// A source binding. A name attached to a source of events.
326///
327/// # Examples
328/// in `FROM e IN events`, `e` is the binding.
329#[derive(Debug, Clone, Serialize)]
330pub struct Binding {
331    /// Name attached to a source of events
332    pub name: String,
333    /// Position in the source code where that binding was introduced
334    pub pos: Pos,
335}
336
337/// A data source in a FROM clause.
338///
339/// Sources specify where data comes from in a query. Each source has a binding
340/// (the variable name) and a kind (what it binds to).
341///
342/// # Examples
343///
344/// In `FROM e IN events`, the source has:
345/// - `binding`: `"e"`
346/// - `kind`: `SourceKind::Name("events")`
347#[derive(Debug, Clone, Serialize)]
348pub struct Source<A> {
349    /// Variable name bound to this source
350    pub binding: Binding,
351    /// What this source represents
352    pub kind: SourceKind<A>,
353}
354
355/// The kind of data source.
356///
357/// EventQL supports three types of sources:
358/// - Named sources (e.g., `FROM e IN events`)
359/// - Subject patterns (e.g., `FROM e IN "users/john"`)
360/// - Subqueries (e.g., `FROM e IN (SELECT ...)`)
361#[derive(Debug, Clone, Serialize)]
362pub enum SourceKind<A> {
363    /// Named source (identifier)
364    Name(String),
365    /// Subject pattern (string literal used as event subject pattern)
366    Subject(String),
367    /// Nested subquery
368    Subquery(Box<Query<A>>),
369}
370
371/// ORDER BY clause specification.
372///
373/// Defines how query results should be sorted.
374///
375/// # Examples
376///
377/// In `ORDER BY e.timestamp DESC`, this would be represented as:
378/// - `expr`: expression for `e.timestamp`
379/// - `order`: `Order::Desc`
380#[derive(Debug, Clone, Serialize)]
381pub struct OrderBy {
382    /// Expression to sort by
383    pub expr: Expr,
384    /// Sort direction (ascending or descending)
385    pub order: Order,
386}
387
388/// Sort order direction.
389///
390/// Specifies whether sorting is ascending or descending.
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
392pub enum Order {
393    /// Ascending order (smallest to largest)
394    Asc,
395    /// Descending order (largest to smallest)
396    Desc,
397}
398
399/// GROUP BY clause specification
400///
401/// Defines how query results should be order by.
402/// # Examples
403///
404/// In `GROUP BY e.age HAVING age > 123`, this would be represented as:
405/// - `expr`: expression for `e.age`
406/// - `predicate`: `age > 123`
407#[derive(Debug, Clone, Serialize)]
408pub struct GroupBy {
409    /// Expression to group by
410    pub expr: Expr,
411
412    /// Predicate to filter groups after aggregation
413    pub predicate: Option<Expr>,
414}
415
416/// Result set limit specification.
417///
418/// EventQL supports two types of limits:
419/// - `TOP n` - Take the first n results
420/// - `SKIP n` - Skip the first n results
421///
422/// # Examples
423///
424/// - `TOP 10` limits to first 10 results
425/// - `SKIP 20` skips first 20 results
426#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
427pub enum Limit {
428    /// Skip the first n results
429    Skip(u64),
430    /// Take only the first n results
431    Top(u64),
432}
433
434/// Represents the state of a query that only has a valid syntax. There are no guarantee that all
435/// the variables exists or that the query is sound. For example, if the user is asking for an event
436/// that has field that should be a string or a number at the same time.
437#[derive(Debug, Clone, Copy, Serialize)]
438pub struct Raw;
439
440/// A complete EventQL query.
441///
442/// This is the root node of the AST, representing a full query with all its clauses.
443/// A query must have at least one source and a projection; other clauses are optional.
444///
445/// # Structure
446///
447/// ```text
448/// FROM <alias> <source>
449/// [FROM <alias> <source>] ...
450/// [WHERE <condition>]
451/// [GROUP BY <field> [HAVING <condition>]]
452/// [ORDER BY <field> ASC|DESC]
453/// [TOP|SKIP <n>]
454/// PROJECT INTO [DISTINCT] <projection>
455/// ```
456///
457/// # Examples
458///
459/// ```
460/// use eventql_parser::parse_query;
461///
462/// let query = parse_query(
463///     "FROM e IN events \
464///      WHERE e.price > 100 \
465///      ORDER BY e.timestamp DESC \
466///      TOP 10 \
467///      PROJECT INTO {id: e.id, price: e.price}"
468/// ).unwrap();
469///
470/// assert_eq!(query.sources.len(), 1);
471/// assert!(query.predicate.is_some());
472/// assert!(query.order_by.is_some());
473/// assert!(query.limit.is_some());
474/// ```
475#[derive(Debug, Clone, Serialize)]
476pub struct Query<A> {
477    /// Metadata about this query
478    pub attrs: Attrs,
479    /// FROM clause sources (must have at least one)
480    pub sources: Vec<Source<A>>,
481    /// Optional WHERE clause filter predicate
482    pub predicate: Option<Expr>,
483    /// Optional GROUP BY clause expression
484    pub group_by: Option<GroupBy>,
485    /// Optional ORDER BY clause
486    pub order_by: Option<OrderBy>,
487    /// Optional LIMIT clause (TOP or SKIP)
488    pub limit: Option<Limit>,
489    /// PROJECT INTO clause expression (required)
490    pub projection: Expr,
491    /// Remove duplicate rows from the query's results
492    pub distinct: bool,
493    /// Type-level metadata about the query's analysis state.
494    ///
495    /// This field uses a generic type parameter to track whether the query
496    /// is in a raw (unparsed/untyped) state or has been statically analyzed:
497    /// - `Query<Raw>`: Query parsed but not yet type-checked
498    /// - `Query<Typed>`: Query that has passed static analysis with validated
499    ///   types and variable scopes
500    ///
501    /// This provides compile-time guarantees about the query's type safety.
502    pub meta: A,
503}
504
505impl Query<Raw> {
506    /// Performs static analysis on this raw query.
507    ///
508    /// This is a convenience method that runs type checking and variable scoping
509    /// analysis on the query, converting it from a raw (untyped) query to a
510    /// typed query.
511    ///
512    /// # Arguments
513    ///
514    /// * `options` - Configuration containing type information and default scope
515    ///
516    /// # Returns
517    ///
518    /// Returns a typed query on success, or an error if type checking fails.
519    pub fn run_static_analysis(self, options: &AnalysisOptions) -> crate::Result<Query<Typed>> {
520        static_analysis(options, self).map_err(Error::Analysis)
521    }
522}