eventql_parser/ast.rs
1//! Abstract syntax tree (AST) types for EventQL.
2//!
3//! This module defines the structure of parsed EventQL queries as an abstract
4//! syntax tree. The AST represents the semantic structure of a query, making it
5//! easy to analyze, transform, or execute queries.
6//!
7//! # Core Types
8//!
9//! - [`Query`] - The root of the AST, representing a complete query
10//! - [`Expr`] - Expressions with position and type information
11//! - [`Value`] - The various kinds of expression values (literals, operators, etc.)
12//! - [`Source`] - Data sources in FROM clauses
13//!
14use std::{collections::BTreeMap, mem};
15
16use crate::{
17 analysis::{AnalysisOptions, Typed, static_analysis},
18 error::{AnalysisError, Error},
19 token::{Operator, Token},
20};
21use serde::Serialize;
22
23/// Position information for source code locations.
24///
25/// This struct tracks the line and column number of tokens and AST nodes,
26/// which is useful for error reporting and debugging.
27///
28/// # Examples
29///
30/// ```
31/// use eventql_parser::Pos;
32///
33/// let pos = Pos { line: 1, col: 10 };
34/// assert_eq!(pos.line, 1);
35/// assert_eq!(pos.col, 10);
36/// ```
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
38pub struct Pos {
39 /// Line number (1-indexed)
40 pub line: u32,
41 /// Column number (1-indexed)
42 pub col: u32,
43}
44
45impl From<Token<'_>> for Pos {
46 fn from(value: Token<'_>) -> Self {
47 Self {
48 line: value.line,
49 col: value.col,
50 }
51 }
52}
53
54/// Type information for expressions.
55///
56/// This enum represents the type of an expression in the E
57
58#[derive(Clone, PartialEq, Eq, Debug, Default, Serialize)]
59pub enum Type {
60 /// Type has not been determined yet
61 #[default]
62 Unspecified,
63 /// Numeric type (f64)
64 Number,
65 /// String type
66 String,
67 /// Boolean type
68 Bool,
69 /// Array type
70 Array(Vec<Type>),
71 /// Record (object) type
72 Record(BTreeMap<String, Type>),
73 /// Subject pattern type
74 Subject,
75 /// Function type
76 App { args: Vec<Type>, result: Box<Type> },
77}
78
79impl Type {
80 pub fn as_record_or_panic_mut(&mut self) -> &mut BTreeMap<String, Type> {
81 if let Self::Record(r) = self {
82 return r;
83 }
84
85 panic!("expected record type, got {:?}", self);
86 }
87
88 /// Checks if two types are the same.
89 ///
90 /// * If `self` is `Type::Unspecified` then `self` is updated to the more specific `Type`.
91 /// * If `self` is `Type::Subject` and is checked against a `Type::String` then `self` is updated to `Type::String`
92 pub fn check(self, attrs: &Attrs, other: Type) -> Result<Type, AnalysisError> {
93 match (self, other) {
94 (Self::Unspecified, other) => Ok(other),
95 (this, Self::Unspecified) => Ok(this),
96 (Self::Subject, Self::Subject) => Ok(Self::Subject),
97
98 // Subjects are strings so there is no reason to reject a type
99 // when compared to a string. However, when it happens, we demote
100 // a subject to a string.
101 (Self::Subject, Self::String) => Ok(Self::String),
102 (Self::String, Self::Subject) => Ok(Self::String),
103
104 (Self::Number, Self::Number) => Ok(Self::Number),
105 (Self::String, Self::String) => Ok(Self::String),
106 (Self::Bool, Self::Bool) => Ok(Self::Bool),
107
108 (Self::Array(mut a), Self::Array(b)) if a.len() == b.len() => {
109 if a.is_empty() {
110 return Ok(Self::Array(a));
111 }
112
113 for (a, b) in a.iter_mut().zip(b.into_iter()) {
114 let tmp = mem::take(a);
115 *a = tmp.check(attrs, b)?;
116 }
117
118 Ok(Self::Array(a))
119 }
120
121 (Self::Record(mut a), Self::Record(b)) if a.len() == b.len() => {
122 if a.is_empty() {
123 return Ok(Self::Record(a));
124 }
125
126 for (ak, bk) in a.keys().zip(b.keys()) {
127 if ak != bk {
128 return Err(AnalysisError::TypeMismatch(
129 attrs.pos.line,
130 attrs.pos.col,
131 Self::Record(a),
132 Self::Record(b),
133 ));
134 }
135 }
136
137 for (av, bv) in a.values_mut().zip(b.into_values()) {
138 let a = mem::take(av);
139 *av = a.check(attrs, bv)?;
140 }
141
142 Ok(Self::Record(a))
143 }
144
145 (
146 Self::App {
147 args: mut a_args,
148 result: mut a_res,
149 },
150 Self::App {
151 args: b_args,
152 result: b_res,
153 },
154 ) if a_args.len() == b_args.len() => {
155 if a_args.is_empty() {
156 let tmp = mem::take(a_res.as_mut());
157 *a_res = tmp.check(attrs, *b_res)?;
158 return Ok(Self::App {
159 args: a_args,
160 result: a_res,
161 });
162 }
163
164 for (a, b) in a_args.iter_mut().zip(b_args.into_iter()) {
165 let tmp = mem::take(a);
166 *a = tmp.check(attrs, b)?;
167 }
168
169 let tmp = mem::take(a_res.as_mut());
170 *a_res = tmp.check(attrs, *b_res)?;
171
172 Ok(Self::App {
173 args: a_args,
174 result: a_res,
175 })
176 }
177
178 (this, other) => Err(AnalysisError::TypeMismatch(
179 attrs.pos.line,
180 attrs.pos.col,
181 this,
182 other,
183 )),
184 }
185 }
186}
187
188/// Attributes attached to each expression node.
189///
190/// These attributes provide metadata about an expression, including its
191/// position in the source code, scope information, and type information.
192#[derive(Debug, Clone, Copy, Serialize)]
193pub struct Attrs {
194 /// Source position of this expression
195 pub pos: Pos,
196}
197
198impl Attrs {
199 /// Create new attributes with unspecified type.
200 pub fn new(pos: Pos) -> Self {
201 Self { pos }
202 }
203}
204
205/// An expression with metadata.
206///
207/// This is the fundamental building block of the AST. Every expression
208/// carries attributes (position, scope, type) and a value that determines
209/// what kind of expression it is.
210#[derive(Debug, Clone, Serialize)]
211pub struct Expr {
212 /// Metadata about this expression
213 pub attrs: Attrs,
214 /// The value/kind of this expression
215 pub value: Value,
216}
217
218/// Field access expression (e.g., `e.data.price`).
219///
220/// Represents accessing a field of a record or object using dot notation.
221/// Can be chained for nested field access.
222///
223/// # Examples
224///
225/// In the query `WHERE e.data.user.id == 1`, the expression `e.data.user.id`
226/// is parsed as nested `Access` nodes.
227#[derive(Debug, Clone, Serialize)]
228pub struct Access {
229 /// The target expression being accessed
230 pub target: Box<Expr>,
231 /// The name of the field being accessed
232 pub field: String,
233}
234
235/// Function application (e.g., `sum(e.price)`, `count()`).
236///
237/// Represents a function call with zero or more arguments.
238///
239/// # Examples
240///
241/// In the query `WHERE count(e.items) > 5`, the `count(e.items)` is an `App` node.
242#[derive(Debug, Clone, Serialize)]
243pub struct App {
244 /// Name of the function being called
245 pub func: String,
246 /// Arguments passed to the function
247 pub args: Vec<Expr>,
248}
249
250/// A field in a record literal (e.g., `{name: "Alice", age: 30}`).
251///
252/// Represents a key-value pair in a record construction.
253#[derive(Debug, Clone, Serialize)]
254pub struct Field {
255 /// Field name
256 pub name: String,
257 /// Field value expression
258 pub value: Expr,
259}
260
261/// Binary operation (e.g., `a + b`, `x == y`, `p AND q`).
262///
263/// Represents operations that take two operands, including arithmetic,
264/// comparison, and logical operators.
265///
266/// # Examples
267///
268/// In `WHERE e.price > 100 AND e.active == true`, there are multiple
269/// binary operations: `>`, `==`, and `AND`.
270#[derive(Debug, Clone, Serialize)]
271pub struct Binary {
272 /// Left-hand side operand
273 pub lhs: Box<Expr>,
274 /// The operator
275 pub operator: Operator,
276 /// Right-hand side operand
277 pub rhs: Box<Expr>,
278}
279
280/// Unary operation (e.g., `-x`, `NOT active`).
281///
282/// Represents operations that take a single operand.
283///
284/// # Examples
285///
286/// In `WHERE NOT e.deleted`, the `NOT e.deleted` is a unary operation.
287#[derive(Debug, Clone, Serialize)]
288pub struct Unary {
289 /// The operator (Add for +, Sub for -, Not for NOT)
290 pub operator: Operator,
291 /// The operand expression
292 pub expr: Box<Expr>,
293}
294
295/// The kind of value an expression represents.
296///
297/// This enum contains all the different types of expressions that can appear
298/// in an EventQL query, from simple literals to complex operations.
299#[derive(Debug, Clone, Serialize)]
300pub enum Value {
301 /// Numeric literal (e.g., `42`, `3.14`)
302 Number(f64),
303 /// String literal (e.g., `"hello"`)
304 String(String),
305 /// Boolean literal (`true` or `false`)
306 Bool(bool),
307 /// Identifier (e.g., variable name `e`, `x`)
308 Id(String),
309 /// Array literal (e.g., `[1, 2, 3]`)
310 Array(Vec<Expr>),
311 /// Record literal (e.g., `{name: "Alice", age: 30}`)
312 Record(Vec<Field>),
313 /// Field access (e.g., `e.data.price`)
314 Access(Access),
315 /// Function application (e.g., `sum(e.price)`)
316 App(App),
317 /// Binary operation (e.g., `a + b`, `x == y`)
318 Binary(Binary),
319 /// Unary operation (e.g., `-x`, `NOT active`)
320 Unary(Unary),
321 /// Grouped/parenthesized expression (e.g., `(a + b)`)
322 Group(Box<Expr>),
323}
324
325/// A source binding. A name attached to a source of events.
326///
327/// # Examples
328/// in `FROM e IN events`, `e` is the binding.
329#[derive(Debug, Clone, Serialize)]
330pub struct Binding {
331 /// Name attached to a source of events
332 pub name: String,
333 /// Position in the source code where that binding was introduced
334 pub pos: Pos,
335}
336
337/// A data source in a FROM clause.
338///
339/// Sources specify where data comes from in a query. Each source has a binding
340/// (the variable name) and a kind (what it binds to).
341///
342/// # Examples
343///
344/// In `FROM e IN events`, the source has:
345/// - `binding`: `"e"`
346/// - `kind`: `SourceKind::Name("events")`
347#[derive(Debug, Clone, Serialize)]
348pub struct Source<A> {
349 /// Variable name bound to this source
350 pub binding: Binding,
351 /// What this source represents
352 pub kind: SourceKind<A>,
353}
354
355/// The kind of data source.
356///
357/// EventQL supports three types of sources:
358/// - Named sources (e.g., `FROM e IN events`)
359/// - Subject patterns (e.g., `FROM e IN "users/john"`)
360/// - Subqueries (e.g., `FROM e IN (SELECT ...)`)
361#[derive(Debug, Clone, Serialize)]
362pub enum SourceKind<A> {
363 /// Named source (identifier)
364 Name(String),
365 /// Subject pattern (string literal used as event subject pattern)
366 Subject(String),
367 /// Nested subquery
368 Subquery(Box<Query<A>>),
369}
370
371/// ORDER BY clause specification.
372///
373/// Defines how query results should be sorted.
374///
375/// # Examples
376///
377/// In `ORDER BY e.timestamp DESC`, this would be represented as:
378/// - `expr`: expression for `e.timestamp`
379/// - `order`: `Order::Desc`
380#[derive(Debug, Clone, Serialize)]
381pub struct OrderBy {
382 /// Expression to sort by
383 pub expr: Expr,
384 /// Sort direction (ascending or descending)
385 pub order: Order,
386}
387
388/// Sort order direction.
389///
390/// Specifies whether sorting is ascending or descending.
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
392pub enum Order {
393 /// Ascending order (smallest to largest)
394 Asc,
395 /// Descending order (largest to smallest)
396 Desc,
397}
398
399/// GROUP BY clause specification
400///
401/// Defines how query results should be order by.
402/// # Examples
403///
404/// In `GROUP BY e.age HAVING age > 123`, this would be represented as:
405/// - `expr`: expression for `e.age`
406/// - `predicate`: `age > 123`
407#[derive(Debug, Clone, Serialize)]
408pub struct GroupBy {
409 /// Expression to group by
410 pub expr: Expr,
411
412 /// Predicate to filter groups after aggregation
413 pub predicate: Option<Expr>,
414}
415
416/// Result set limit specification.
417///
418/// EventQL supports two types of limits:
419/// - `TOP n` - Take the first n results
420/// - `SKIP n` - Skip the first n results
421///
422/// # Examples
423///
424/// - `TOP 10` limits to first 10 results
425/// - `SKIP 20` skips first 20 results
426#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
427pub enum Limit {
428 /// Skip the first n results
429 Skip(u64),
430 /// Take only the first n results
431 Top(u64),
432}
433
434/// Represents the state of a query that only has a valid syntax. There are no guarantee that all
435/// the variables exists or that the query is sound. For example, if the user is asking for an event
436/// that has field that should be a string or a number at the same time.
437#[derive(Debug, Clone, Copy, Serialize)]
438pub struct Raw;
439
440/// A complete EventQL query.
441///
442/// This is the root node of the AST, representing a full query with all its clauses.
443/// A query must have at least one source and a projection; other clauses are optional.
444///
445/// # Structure
446///
447/// ```text
448/// FROM <alias> <source>
449/// [FROM <alias> <source>] ...
450/// [WHERE <condition>]
451/// [GROUP BY <field> [HAVING <condition>]]
452/// [ORDER BY <field> ASC|DESC]
453/// [TOP|SKIP <n>]
454/// PROJECT INTO [DISTINCT] <projection>
455/// ```
456///
457/// # Examples
458///
459/// ```
460/// use eventql_parser::parse_query;
461///
462/// let query = parse_query(
463/// "FROM e IN events \
464/// WHERE e.price > 100 \
465/// ORDER BY e.timestamp DESC \
466/// TOP 10 \
467/// PROJECT INTO {id: e.id, price: e.price}"
468/// ).unwrap();
469///
470/// assert_eq!(query.sources.len(), 1);
471/// assert!(query.predicate.is_some());
472/// assert!(query.order_by.is_some());
473/// assert!(query.limit.is_some());
474/// ```
475#[derive(Debug, Clone, Serialize)]
476pub struct Query<A> {
477 /// Metadata about this query
478 pub attrs: Attrs,
479 /// FROM clause sources (must have at least one)
480 pub sources: Vec<Source<A>>,
481 /// Optional WHERE clause filter predicate
482 pub predicate: Option<Expr>,
483 /// Optional GROUP BY clause expression
484 pub group_by: Option<GroupBy>,
485 /// Optional ORDER BY clause
486 pub order_by: Option<OrderBy>,
487 /// Optional LIMIT clause (TOP or SKIP)
488 pub limit: Option<Limit>,
489 /// PROJECT INTO clause expression (required)
490 pub projection: Expr,
491 /// Remove duplicate rows from the query's results
492 pub distinct: bool,
493 /// Type-level metadata about the query's analysis state.
494 ///
495 /// This field uses a generic type parameter to track whether the query
496 /// is in a raw (unparsed/untyped) state or has been statically analyzed:
497 /// - `Query<Raw>`: Query parsed but not yet type-checked
498 /// - `Query<Typed>`: Query that has passed static analysis with validated
499 /// types and variable scopes
500 ///
501 /// This provides compile-time guarantees about the query's type safety.
502 pub meta: A,
503}
504
505impl Query<Raw> {
506 /// Performs static analysis on this raw query.
507 ///
508 /// This is a convenience method that runs type checking and variable scoping
509 /// analysis on the query, converting it from a raw (untyped) query to a
510 /// typed query.
511 ///
512 /// # Arguments
513 ///
514 /// * `options` - Configuration containing type information and default scope
515 ///
516 /// # Returns
517 ///
518 /// Returns a typed query on success, or an error if type checking fails.
519 pub fn run_static_analysis(self, options: &AnalysisOptions) -> crate::Result<Query<Typed>> {
520 static_analysis(options, self).map_err(Error::Analysis)
521 }
522}