Skip to main content

eventql_parser/
lib.rs

1//! EventQL parser library for parsing event sourcing query language.
2//!
3//! This library provides a complete lexer and parser for EventQL (EQL), a query language
4//! designed for event sourcing systems. It allows you to parse EQL query strings into
5//! an abstract syntax tree (AST) that can be analyzed or executed.
6pub mod arena;
7mod ast;
8mod error;
9mod lexer;
10mod parser;
11#[cfg(test)]
12mod tests;
13mod token;
14mod typing;
15
16use crate::arena::Arena;
17use crate::lexer::tokenize;
18use crate::prelude::{
19    Analysis, AnalysisOptions, FunArgs, Typed, display_type, name_to_type, parse,
20};
21use crate::token::Token;
22pub use ast::*;
23use rustc_hash::FxHashMap;
24pub use typing::Type;
25use unicase::Ascii;
26
27/// Convenience module that re-exports all public types and functions.
28///
29/// This module provides a single import point for all the library's public API,
30/// including AST types, error types, lexer, parser, and token types.
31pub mod prelude {
32    pub use super::arena::*;
33    pub use super::ast::*;
34    pub use super::error::*;
35    pub use super::parser::*;
36    pub use super::token::*;
37    pub use super::typing::analysis::*;
38    pub use super::typing::*;
39}
40
41/// Builder for function argument specifications.
42///
43/// Allows defining function signatures with both required and optional parameters.
44/// When `required` equals the length of `args`, all parameters are required.
45pub struct FunArgsBuilder<'a> {
46    args: &'a [Type],
47    required: usize,
48}
49
50impl<'a> FunArgsBuilder<'a> {
51    /// Creates a new `FunArgsBuilder` with the given argument types and required count.
52    pub fn new(args: &'a [Type], required: usize) -> Self {
53        Self { args, required }
54    }
55}
56
57impl<'a> From<&'a [Type]> for FunArgsBuilder<'a> {
58    fn from(args: &'a [Type]) -> Self {
59        Self {
60            args,
61            required: args.len(),
62        }
63    }
64}
65
66impl<'a, const N: usize> From<&'a [Type; N]> for FunArgsBuilder<'a> {
67    fn from(value: &'a [Type; N]) -> Self {
68        Self {
69            args: value.as_slice(),
70            required: value.len(),
71        }
72    }
73}
74
75/// Builder for configuring event type information on a [`SessionBuilder`].
76///
77/// Obtained by calling [`SessionBuilder::declare_event_type`]. Use [`record`](EventTypeBuilder::record)
78/// to define a record-shaped event type or [`custom`](EventTypeBuilder::custom) for a named custom type.
79pub struct EventTypeBuilder {
80    parent: SessionBuilder,
81}
82
83impl EventTypeBuilder {
84    /// Starts building a record-shaped event type with named fields.
85    pub fn record(self) -> EventTypeRecordBuilder {
86        EventTypeRecordBuilder {
87            inner: self,
88            props: Default::default(),
89        }
90    }
91
92    /// Declares a custom (non-record) event type by name.
93    pub fn custom(self, _name: &str) -> SessionBuilder {
94        todo!("deal with custom type later")
95    }
96}
97
98/// Builder for defining the fields of a record-shaped event type.
99///
100/// Obtained by calling [`EventTypeBuilder::record`]. Add fields with [`prop`](EventTypeRecordBuilder::prop)
101/// and finalize with [`build`](EventTypeRecordBuilder::build) to return to the [`SessionBuilder`].
102pub struct EventTypeRecordBuilder {
103    inner: EventTypeBuilder,
104    props: FxHashMap<StrRef, Type>,
105}
106
107impl EventTypeRecordBuilder {
108    /// Conditionally adds a field to the event record type.
109    pub fn prop_when(mut self, test: bool, name: &str, tpe: Type) -> Self {
110        if test {
111            self.props
112                .insert(self.inner.parent.arena.strings.alloc(name), tpe);
113        }
114
115        self
116    }
117
118    /// Adds a field with the given name and type to the event record type.
119    pub fn prop(mut self, name: &str, tpe: Type) -> Self {
120        self.props
121            .insert(self.inner.parent.arena.strings.alloc(name), tpe);
122        self
123    }
124
125    /// Conditionally adds a field with a custom type to the event record type.
126    pub fn prop_with_custom_when(mut self, test: bool, name: &str, tpe: &str) -> Self {
127        if test {
128            let tpe = self.inner.parent.arena.strings.alloc(tpe);
129            self.props.insert(
130                self.inner.parent.arena.strings.alloc(name),
131                Type::Custom(tpe),
132            );
133        }
134
135        self
136    }
137
138    /// Finalizes the event record type and returns the [`SessionBuilder`].
139    pub fn build(mut self) -> SessionBuilder {
140        let ptr = self.inner.parent.arena.types.alloc_record(self.props);
141        self.inner.parent.options.event_type_info = Type::Record(ptr);
142        self.inner.parent
143    }
144}
145
146/// A specialized `Result` type for EventQL parser operations.
147pub type Result<A> = std::result::Result<A, error::Error>;
148
149/// `SessionBuilder` is a builder for `Session` objects.
150///
151/// It allows for the configuration of analysis options, such as declaring
152/// functions (both regular and aggregate), event types, and custom types,
153/// before building an `EventQL` parsing session.
154pub struct SessionBuilder {
155    arena: Arena,
156    options: AnalysisOptions,
157}
158
159impl SessionBuilder {
160    /// Declares a new function with the given name, arguments, and return type.
161    ///
162    /// This function adds a new entry to the session's default scope, allowing
163    /// the parser to recognize and type-check calls to this function.
164    ///
165    /// # Arguments
166    ///
167    /// * `name` - The name of the function.
168    /// * `args` - The arguments the function accepts, which can be converted into `FunArgs`.
169    /// * `result` - The return type of the function.
170    pub fn declare_func<'a>(
171        self,
172        name: &'a str,
173        args: impl Into<FunArgsBuilder<'a>>,
174        result: Type,
175    ) -> Self {
176        self.declare_func_when(true, name, args, result)
177    }
178
179    /// Conditionally declares a new function with the given name, arguments, and return type.
180    ///
181    /// This function behaves like `declare_func` but only declares the function
182    /// if the `test` argument is `true`. This is useful for conditionally
183    /// including functions based on configuration or features.
184    ///
185    /// # Arguments
186    ///
187    /// * `test` - A boolean indicating whether to declare the function.
188    /// * `name` - The name of the function.
189    /// * `args` - The arguments the function accepts, which can be converted into `FunArgs`.
190    /// * `result` - The return type of the function.
191    pub fn declare_func_when<'a>(
192        mut self,
193        test: bool,
194        name: &'a str,
195        args: impl Into<FunArgsBuilder<'a>>,
196        result: Type,
197    ) -> Self {
198        if test {
199            let builder = args.into();
200            let name = self.arena.strings.alloc_no_case(name);
201            let args = self.arena.types.alloc_args(builder.args);
202
203            self.options.default_scope.declare(
204                name,
205                Type::App {
206                    args: FunArgs {
207                        values: args,
208                        needed: builder.required,
209                    },
210                    result: self.arena.types.register_type(result),
211                    aggregate: false,
212                },
213            );
214        }
215
216        self
217    }
218
219    /// Declares a new aggregate function with the given name, arguments, and return type.
220    ///
221    /// Similar to `declare_func`, but marks the function as an aggregate function.
222    /// Aggregate functions have specific rules for where they can be used in an EQL query.
223    ///
224    /// # Arguments
225    ///
226    /// * `name` - The name of the aggregate function.
227    /// * `args` - The arguments the aggregate function accepts.
228    /// * `result` - The return type of the aggregate function.
229    pub fn declare_agg_func<'a>(
230        self,
231        name: &'a str,
232        args: impl Into<FunArgsBuilder<'a>>,
233        result: Type,
234    ) -> Self {
235        self.declare_agg_func_when(true, name, args, result)
236    }
237
238    /// Conditionally declares a new aggregate function.
239    ///
240    /// Behaves like `declare_agg_func` but only declares the function
241    /// if the `test` argument is `true`.
242    ///
243    /// # Arguments
244    ///
245    /// * `test` - A boolean indicating whether to declare the aggregate function.
246    /// * `name` - The name of the aggregate function.
247    /// * `args` - The arguments the aggregate function accepts.
248    /// * `result` - The return type of the aggregate function.
249    pub fn declare_agg_func_when<'a>(
250        mut self,
251        test: bool,
252        name: &'a str,
253        args: impl Into<FunArgsBuilder<'a>>,
254        result: Type,
255    ) -> Self {
256        if test {
257            let builder = args.into();
258            let name = self.arena.strings.alloc_no_case(name);
259            let args = self.arena.types.alloc_args(builder.args);
260
261            self.options.default_scope.declare(
262                name,
263                Type::App {
264                    args: FunArgs {
265                        values: args,
266                        needed: builder.required,
267                    },
268                    result: self.arena.types.register_type(result),
269                    aggregate: true,
270                },
271            );
272        }
273
274        self
275    }
276
277    /// Conditionally declares the expected type of event records.
278    ///
279    /// This type information is crucial for type-checking event properties
280    /// accessed in EQL queries (e.g., `e.id`, `e.data.value`).
281    /// The declaration only happens if `test` is `true`.
282    ///
283    /// # Arguments
284    ///
285    /// * `test` - A boolean indicating whether to declare the event type.
286    /// * `tpe` - The `Type` representing the structure of event records.
287    pub fn declare_event_type_when(mut self, test: bool, tpe: Type) -> Self {
288        if test {
289            self.options.event_type_info = tpe;
290        }
291
292        self
293    }
294
295    /// Declares the expected type of event records.
296    ///
297    /// This type information is crucial for type-checking event properties
298    /// accessed in EQL queries (e.g., `e.id`, `e.data.value`).
299    ///
300    /// # Arguments
301    ///
302    /// * `tpe` - The `Type` representing the structure of event records.
303    pub fn declare_event_type(self) -> EventTypeBuilder {
304        EventTypeBuilder { parent: self }
305    }
306
307    /// Conditionally declares a custom type that can be used in EQL queries.
308    ///
309    /// This allows the type-checker to recognize and validate custom types
310    /// that might be used in type conversions or record definitions.
311    /// The declaration only happens if `test` is `true`.
312    ///
313    /// # Arguments
314    ///
315    /// * `test` - A boolean indicating whether to declare the custom type.
316    /// * `name` - The name of the custom type.
317    pub fn declare_custom_type_when(mut self, test: bool, name: &str) -> Self {
318        if test {
319            self.options
320                .custom_types
321                .insert(Ascii::new(name.to_owned()));
322        }
323
324        self
325    }
326
327    /// Declares a custom type that can be used in EQL queries.
328    ///
329    /// This allows the type-checker to recognize and validate custom types
330    /// that might be used in type conversions or record definitions.
331    ///
332    /// # Arguments
333    ///
334    /// * `name` - The name of the custom type.
335    pub fn declare_custom_type(mut self, name: &str) -> Self {
336        self.options
337            .custom_types
338            .insert(Ascii::new(name.to_owned()));
339        self
340    }
341
342    /// Includes the standard library of functions and event types in the session.
343    ///
344    /// This method pre-configures the `SessionBuilder` with a set of commonly
345    /// used functions (e.g., mathematical, string, date/time) and a default
346    /// event type definition. Calling this method is equivalent to calling
347    /// `declare_func` and `declare_agg_func` for all standard library functions,
348    /// and `declare_event_type` for the default event structure.
349    pub fn use_stdlib(self) -> Self {
350        self.declare_func("abs", &[Type::Number], Type::Number)
351            .declare_func("ceil", &[Type::Number], Type::Number)
352            .declare_func("floor", &[Type::Number], Type::Number)
353            .declare_func("round", &[Type::Number], Type::Number)
354            .declare_func("cos", &[Type::Number], Type::Number)
355            .declare_func("exp", &[Type::Number], Type::Number)
356            .declare_func("pow", &[Type::Number, Type::Number], Type::Number)
357            .declare_func("sqrt", &[Type::Number], Type::Number)
358            .declare_func("rand", &[], Type::Number)
359            .declare_func("pi", &[Type::Number], Type::Number)
360            .declare_func("lower", &[Type::String], Type::String)
361            .declare_func("upper", &[Type::String], Type::String)
362            .declare_func("trim", &[Type::String], Type::String)
363            .declare_func("ltrim", &[Type::String], Type::String)
364            .declare_func("rtrim", &[Type::String], Type::String)
365            .declare_func("len", &[Type::String], Type::Number)
366            .declare_func("instr", &[Type::String], Type::Number)
367            .declare_func(
368                "substring",
369                &[Type::String, Type::Number, Type::Number],
370                Type::String,
371            )
372            .declare_func(
373                "replace",
374                &[Type::String, Type::String, Type::String],
375                Type::String,
376            )
377            .declare_func("startswith", &[Type::String, Type::String], Type::Bool)
378            .declare_func("endswith", &[Type::String, Type::String], Type::Bool)
379            .declare_func("now", &[], Type::DateTime)
380            .declare_func("year", &[Type::Date], Type::Number)
381            .declare_func("month", &[Type::Date], Type::Number)
382            .declare_func("day", &[Type::Date], Type::Number)
383            .declare_func("hour", &[Type::Time], Type::Number)
384            .declare_func("minute", &[Type::Time], Type::Number)
385            .declare_func("second", &[Type::Time], Type::Number)
386            .declare_func("weekday", &[Type::Date], Type::Number)
387            .declare_func(
388                "IF",
389                &[Type::Bool, Type::Unspecified, Type::Unspecified],
390                Type::Unspecified,
391            )
392            .declare_agg_func(
393                "count",
394                FunArgsBuilder {
395                    args: &[Type::Bool],
396                    required: 0,
397                },
398                Type::Number,
399            )
400            .declare_agg_func("sum", &[Type::Number], Type::Number)
401            .declare_agg_func("avg", &[Type::Number], Type::Number)
402            .declare_agg_func("min", &[Type::Number], Type::Number)
403            .declare_agg_func("max", &[Type::Number], Type::Number)
404            .declare_agg_func("median", &[Type::Number], Type::Number)
405            .declare_agg_func("stddev", &[Type::Number], Type::Number)
406            .declare_agg_func("variance", &[Type::Number], Type::Number)
407            .declare_agg_func("unique", &[Type::Unspecified], Type::Unspecified)
408            .declare_event_type()
409            .record()
410            .prop("specversion", Type::String)
411            .prop("id", Type::String)
412            .prop("time", Type::DateTime)
413            .prop("source", Type::String)
414            .prop("subject", Type::Subject)
415            .prop("type", Type::String)
416            .prop("datacontenttype", Type::String)
417            .prop("data", Type::Unspecified)
418            .prop("predecessorhash", Type::String)
419            .prop("hash", Type::String)
420            .prop("traceparent", Type::String)
421            .prop("tracestate", Type::String)
422            .prop("signature", Type::String)
423            .build()
424    }
425
426    /// Builds the `Session` object with the configured analysis options.
427    ///
428    /// This consumes the `SessionBuilder` and returns a `Session` instance
429    /// ready for tokenizing, parsing, and analyzing EventQL queries.
430    pub fn build(mut self) -> Session {
431        self.arena.types.freeze();
432
433        Session {
434            arena: self.arena,
435            options: self.options,
436        }
437    }
438}
439
440impl Default for SessionBuilder {
441    fn default() -> Self {
442        Self {
443            arena: Default::default(),
444            options: AnalysisOptions::empty(),
445        }
446    }
447}
448
449/// `Session` is the main entry point for parsing and analyzing EventQL queries.
450///
451/// It holds the necessary context, such as the expression arena and analysis options,
452/// to perform lexical analysis, parsing, and static analysis of EQL query strings.
453pub struct Session {
454    arena: Arena,
455    options: AnalysisOptions,
456}
457
458impl Session {
459    /// Creates a new `SessionBuilder` for configuring and building a `Session`.
460    ///
461    /// This is the recommended way to create a `Session` instance, allowing
462    /// for customization of functions, event types, and custom types.
463    ///
464    /// # Returns
465    ///
466    /// A new `SessionBuilder` instance.
467    pub fn builder() -> SessionBuilder {
468        SessionBuilder::default()
469    }
470
471    /// Tokenize an EventQL query string.
472    ///
473    /// This function performs lexical analysis on the input string, converting it
474    /// into a sequence of tokens. Each token includes position information (line
475    /// and column numbers) for error reporting.
476    /// # Recognized Tokens
477    ///
478    /// - **Identifiers**: Alphanumeric names starting with a letter (e.g., `events`, `e`)
479    /// - **Keywords**: Case-insensitive SQL-like keywords detected by the parser
480    /// - **Numbers**: Floating-point literals (e.g., `42`, `3.14`)
481    /// - **Strings**: Double-quoted string literals (e.g., `"hello"`)
482    /// - **Operators**: Arithmetic (`+`, `-`, `*`, `/`), comparison (`==`, `!=`, `<`, `<=`, `>`, `>=`), logical (`AND`, `OR`, `XOR`, `NOT`)
483    /// - **Symbols**: Structural characters (`(`, `)`, `[`, `]`, `{`, `}`, `.`, `,`, `:`)
484    pub fn tokenize<'a>(&self, input: &'a str) -> Result<Vec<Token<'a>>> {
485        let tokens = tokenize(input)?;
486        Ok(tokens)
487    }
488
489    /// Parse an EventQL query string into an abstract syntax tree.
490    ///
491    /// This is the main entry point for parsing EventQL queries. It performs both
492    /// lexical analysis (tokenization) and syntactic analysis (parsing) in a single call.
493    /// # Examples
494    ///
495    /// ```
496    /// use eventql_parser::Session;
497    ///
498    /// // Parse a simple query
499    /// let mut session = Session::builder().use_stdlib().build();
500    /// let query = session.parse("FROM e IN events WHERE e.id == \"1\" PROJECT INTO e").unwrap();
501    /// assert!(query.predicate.is_some());
502    /// ```
503    pub fn parse(&mut self, input: &str) -> Result<Query<Raw>> {
504        let tokens = self.tokenize(input)?;
505        Ok(parse(&mut self.arena, tokens.as_slice())?)
506    }
507
508    /// Performs static analysis on an EventQL query.
509    ///
510    /// This function takes a raw (untyped) query and performs type checking and
511    /// variable scoping analysis. It validates that:
512    /// - All variables are properly declared
513    /// - Types match expected types in expressions and operations
514    /// - Field accesses are valid for their record types
515    /// - Function calls have the correct argument types
516    /// - Aggregate functions are only used in PROJECT INTO clauses
517    /// - Aggregate functions are not mixed with source-bound fields in projections
518    /// - Aggregate function arguments are source-bound fields (not constants or function results)
519    /// - Record literals are non-empty in projection contexts
520    ///
521    /// # Arguments
522    ///
523    /// * `options` - Configuration containing type information and default scope
524    /// * `query` - The raw query to analyze
525    ///
526    /// # Returns
527    ///
528    /// Returns a typed query on success, or an `AnalysisError` if type checking fails.
529    pub fn run_static_analysis(&mut self, query: Query<Raw>) -> Result<Query<Typed>> {
530        let mut analysis = self.analysis();
531        Ok(analysis.analyze_query(query)?)
532    }
533
534    /// Converts a type name string to its corresponding [`Type`] variant.
535    ///
536    /// This function performs case-insensitive matching for built-in type names and checks
537    /// against custom types defined in the analysis options.
538    ///
539    /// # Returns
540    ///
541    /// * `Some(Type)` - If the name matches a built-in type or custom type
542    /// * `None` - If the name doesn't match any known type
543    ///
544    /// # Built-in Type Mappings
545    ///
546    /// The following type names are recognized (case-insensitive):
547    /// - `"string"` → [`Type::String`]
548    /// - `"int"` or `"float64"` → [`Type::Number`]
549    /// - `"boolean"` → [`Type::Bool`]
550    /// - `"date"` → [`Type::Date`]
551    /// - `"time"` → [`Type::Time`]
552    /// - `"datetime"` → [`Type::DateTime`]
553    pub fn get_type_from_name(&mut self, name: &str) -> Option<Type> {
554        let str_ref = self.arena.strings.alloc(name);
555        name_to_type(&self.arena, &self.options, str_ref)
556    }
557
558    /// Provides human-readable string formatting for types.
559    ///
560    /// Function types display optional parameters with a `?` suffix. For example,
561    /// a function with signature `(boolean, number?) -> string` accepts 1 or 2 arguments.
562    /// Aggregate functions use `=>` instead of `->` in their signature.
563    pub fn display_type(&self, tpe: &Type) -> String {
564        display_type(&self.arena, *tpe)
565    }
566
567    /// Creates an [`Analysis`] instance for fine-grained control over static analysis.
568    ///
569    /// Use this when you need to analyze individual expressions or manage scopes manually,
570    /// rather than using [`run_static_analysis`](Session::run_static_analysis) for whole queries.
571    pub fn analysis(&mut self) -> Analysis<'_> {
572        Analysis::new(&mut self.arena, &self.options)
573    }
574
575    /// Returns a reference to the underlying [`Arena`].
576    pub fn arena(&self) -> &Arena {
577        &self.arena
578    }
579
580    /// Returns a mutable reference to the underlying [`Arena`].
581    pub fn arena_mut(&mut self) -> &mut Arena {
582        &mut self.arena
583    }
584}