Skip to main content

eventql_parser/
arena.rs

1use crate::typing::{ArgsRef, Record, Type, TypeRef};
2use crate::{Attrs, ExprKey, ExprPtr, ExprRef, Field, RecRef, StrRef, Value, VecRef};
3use rustc_hash::{FxBuildHasher, FxHashMap};
4use serde::Serialize;
5use std::collections::hash_map::Entry;
6use std::hash::BuildHasher;
7use std::ops::Range;
8use unicase::Ascii;
9
10/// An arena-based allocator for interning strings.
11///
12/// Deduplicates strings by hash and returns lightweight [`StrRef`] handles for O(1) lookups.
13#[derive(Default, Serialize)]
14pub(crate) struct StringArena {
15    #[serde(skip_serializing)]
16    hasher: FxBuildHasher,
17
18    cache: FxHashMap<u64, StrRef>,
19    slots: Vec<String>,
20}
21
22impl StringArena {
23    /// Interns a string and returns its [`StrRef`]. Returns the existing reference if already interned.
24    pub fn alloc(&mut self, value: &str) -> StrRef {
25        match self.cache.entry(self.hasher.hash_one(value)) {
26            Entry::Occupied(entry) => *entry.get(),
27            Entry::Vacant(entry) => {
28                let key = StrRef(self.slots.len());
29                entry.insert(key);
30                self.slots.push(value.to_owned());
31
32                key
33            }
34        }
35    }
36
37    /// If that value has already been interned, it returns the string pointer in the arena.
38    pub fn str_ref(&self, value: &str) -> Option<StrRef> {
39        self.cache.get(&self.hasher.hash_one(value)).copied()
40    }
41
42    /// Like ['str_ref'] but case-insensitive
43    pub fn str_ref_no_case(&self, value: &str) -> Option<StrRef> {
44        self.cache
45            .get(&self.hasher.hash_one(Ascii::new(value)))
46            .copied()
47    }
48
49    /// Interns a string using case-insensitive hashing.
50    ///
51    /// Two strings that differ only in ASCII case will resolve to the same [`StrRef`].
52    /// The original casing of the first insertion is preserved.
53    pub fn alloc_no_case(&mut self, value: &str) -> StrRef {
54        match self.cache.entry(self.hasher.hash_one(Ascii::new(value))) {
55            Entry::Occupied(entry) => *entry.get(),
56            Entry::Vacant(entry) => {
57                let key = StrRef(self.slots.len());
58                entry.insert(key);
59                self.slots.push(value.to_owned());
60
61                key
62            }
63        }
64    }
65
66    /// Retrieves the string associated with the given [`StrRef`].
67    pub fn get(&self, key: StrRef) -> &str {
68        &self.slots[key.0]
69    }
70
71    /// Compares two interned strings for case-insensitive ASCII equality.
72    pub fn eq_ignore_ascii_case(&self, ka: StrRef, kb: StrRef) -> bool {
73        self.get(ka).eq_ignore_ascii_case(self.get(kb))
74    }
75}
76
77/// An expression node stored in the [`ExprArena`].
78///
79/// Combines the expression's metadata ([`Attrs`]) with its actual content ([`Value`]).
80#[derive(Debug, Clone, Copy, Serialize)]
81pub struct Expr {
82    /// Metadata including source position.
83    pub attrs: Attrs,
84    /// The kind and content of this expression.
85    pub value: Value,
86}
87
88/// An arena-based allocator for EventQL expressions.
89///
90/// The `ExprArena` provides a memory-efficient way to store and manage AST nodes
91/// by using a flat vector and returning lightweight [`ExprRef`] handles.
92#[derive(Default, Serialize)]
93pub(crate) struct ExprArena {
94    #[serde(skip_serializing)]
95    hasher: FxBuildHasher,
96    exprs: Vec<Expr>,
97    vecs: Vec<Vec<ExprRef>>,
98    recs: Vec<Vec<Field>>,
99}
100
101impl ExprArena {
102    /// Allocates a new expression in the arena.
103    ///
104    /// This method takes an expression's attributes and value, hashes the value
105    /// to create a stable [`ExprKey`], and stores it in the arena. It returns
106    /// an [`ExprRef`] which can be used to retrieve the expression later.
107    pub fn alloc(&mut self, attrs: Attrs, value: Value) -> ExprRef {
108        let key = ExprKey(self.hasher.hash_one(value));
109
110        let ptr = ExprPtr(self.exprs.len());
111        self.exprs.push(Expr { attrs, value });
112
113        ExprRef { key, ptr }
114    }
115
116    /// Retrieves a node from the arena using an [`ExprRef`].
117    ///
118    /// # Panics
119    ///
120    /// Panics if the [`ExprRef`] contains an invalid pointer that is out of bounds
121    /// of the arena's internal storage.
122    pub fn get(&self, node_ref: ExprRef) -> Expr {
123        self.exprs[node_ref.ptr.0]
124    }
125
126    /// Allocates a vector of expression references and returns a [`VecRef`] handle.
127    pub fn alloc_vec(&mut self, values: Vec<ExprRef>) -> VecRef {
128        let key = VecRef(self.vecs.len());
129        self.vecs.push(values);
130
131        key
132    }
133
134    /// Allocates a vector of record fields and returns a [`RecRef`] handle.
135    pub fn alloc_rec(&mut self, values: Vec<Field>) -> RecRef {
136        let key = RecRef(self.recs.len());
137        self.recs.push(values);
138
139        key
140    }
141
142    /// Returns the slice of expression references for the given [`VecRef`].
143    pub fn vec(&self, ptr: VecRef) -> &[ExprRef] {
144        &self.vecs[ptr.0]
145    }
146
147    /// Returns the expression reference at index `idx` within the given [`VecRef`].
148    pub fn vec_get(&self, ptr: VecRef, idx: usize) -> ExprRef {
149        self.vecs[ptr.0][idx]
150    }
151
152    /// Returns an iterator over valid indices for the given [`VecRef`].
153    pub fn vec_idxes(&self, ptr: VecRef) -> Range<usize> {
154        0..self.vec(ptr).len()
155    }
156
157    /// Returns the vector of fields for the given [`RecRef`].
158    pub fn rec(&self, ptr: RecRef) -> &[Field] {
159        self.recs[ptr.0].as_slice()
160    }
161
162    /// Returns the field at index `idx` within the given [`RecRef`].
163    pub fn rec_get(&self, ptr: RecRef, idx: usize) -> Field {
164        self.recs[ptr.0][idx]
165    }
166
167    /// Returns an iterator over valid indices for the given [`RecRef`].
168    pub fn rec_idxes(&self, ptr: RecRef) -> Range<usize> {
169        0..self.rec(ptr).len()
170    }
171}
172
173/// An arena-based allocator for type information.
174///
175/// Stores and deduplicates types, record definitions, and function argument lists.
176/// Supports freezing to mark a baseline and freeing types allocated after the baseline.
177#[derive(Default, Serialize)]
178pub(crate) struct TypeArena {
179    #[serde(skip_serializing)]
180    args_hasher: FxBuildHasher,
181
182    type_offset: usize,
183    rec_offset: usize,
184
185    dedup_types: FxHashMap<Type, TypeRef>,
186    dedup_args: FxHashMap<u64, ArgsRef>,
187    types: Vec<Type>,
188    pub(crate) records: Vec<FxHashMap<StrRef, Type>>,
189    pub(crate) args: Vec<Vec<Type>>,
190}
191
192impl TypeArena {
193    /// Marks the current allocation state as the baseline.
194    ///
195    /// Subsequent calls to [`free_space`](TypeArena::free_space) will deallocate
196    /// only types and records allocated after this point.
197    pub fn freeze(&mut self) {
198        self.rec_offset = self.records.len();
199        self.type_offset = self.types.len();
200    }
201
202    /// Frees types and records allocated after the last [`freeze`](TypeArena::freeze) call.
203    pub fn free_space(&mut self) {
204        for tpe in self.types.drain(self.type_offset..) {
205            self.dedup_types.remove(&tpe);
206        }
207
208        for _ in self.records.drain(self.rec_offset..) {}
209    }
210
211    /// Registers a type and returns a deduplicated [`TypeRef`]. Returns the existing reference if already registered.
212    pub fn register_type(&mut self, tpe: Type) -> TypeRef {
213        match self.dedup_types.entry(tpe) {
214            Entry::Occupied(entry) => *entry.get(),
215            Entry::Vacant(entry) => {
216                let key = TypeRef(self.types.len());
217                self.types.push(tpe);
218                entry.insert(key);
219
220                key
221            }
222        }
223    }
224
225    /// Allocates a fresh copy of a type. For records, this clones the record definition.
226    pub fn alloc_type(&mut self, tpe: Type) -> Type {
227        if let Type::Record(rec) = tpe {
228            let key = Record(self.records.len());
229            // TODO: technically, a deep-clone is needed here, where properties that point to
230            // records should also be allocated as well.
231            self.records.push(self.records[rec.0].clone());
232
233            return Type::Record(key);
234        }
235
236        tpe
237    }
238
239    /// Creates an array type containing elements of the given type.
240    pub fn alloc_array_of(&mut self, tpe: Type) -> Type {
241        Type::Array(self.register_type(tpe))
242    }
243
244    /// Allocates a new record type from a map of field names to types.
245    pub fn alloc_record(&mut self, record: FxHashMap<StrRef, Type>) -> Record {
246        let key = Record(self.records.len());
247        self.records.push(record);
248        key
249    }
250
251    /// Allocates a deduplicated list of function argument types and returns an [`ArgsRef`].
252    pub fn alloc_args(&mut self, args: &[Type]) -> ArgsRef {
253        let hash = self.args_hasher.hash_one(args);
254
255        match self.dedup_args.entry(hash) {
256            Entry::Occupied(entry) => *entry.get(),
257            Entry::Vacant(entry) => {
258                let key = ArgsRef(self.args.len());
259                entry.insert(key);
260                self.args.push(args.to_vec());
261
262                key
263            }
264        }
265    }
266
267    /// Retrieves the type for the given [`TypeRef`].
268    pub fn get_type(&self, key: TypeRef) -> Type {
269        self.types[key.0]
270    }
271
272    /// Returns the field map for the given record.
273    pub fn get_record(&self, key: Record) -> &FxHashMap<StrRef, Type> {
274        &self.records[key.0]
275    }
276
277    /// Returns the argument type slice for the given [`ArgsRef`].
278    pub fn get_args(&self, key: ArgsRef) -> &[Type] {
279        self.args[key.0].as_slice()
280    }
281
282    /// Returns an iterator over valid indices for the given [`ArgsRef`].
283    pub fn args_idxes(&self, key: ArgsRef) -> impl Iterator<Item = usize> + use<> {
284        0..self.get_args(key).len()
285    }
286
287    /// Returns the argument type at index `idx` for the given [`ArgsRef`].
288    pub fn args_get(&self, key: ArgsRef, idx: usize) -> Type {
289        self.get_args(key)[idx]
290    }
291
292    /// Returns the type of a field in the given record, or `None` if the field doesn't exist.
293    pub fn record_get(&self, record: Record, field: StrRef) -> Option<Type> {
294        self.records[record.0].get(&field).copied()
295    }
296
297    /// Checks whether two records have the exact same set of field names.
298    pub fn records_have_same_keys(&self, rec_a: Record, rec_b: Record) -> bool {
299        let rec_a = self.get_record(rec_a);
300        let rec_b = self.get_record(rec_b);
301
302        if rec_a.is_empty() && rec_b.is_empty() {
303            return true;
304        }
305
306        if rec_a.len() != rec_b.len() {
307            return false;
308        }
309
310        for bk in rec_b.keys() {
311            if !rec_a.contains_key(bk) {
312                return false;
313            }
314        }
315
316        true
317    }
318
319    /// Creates an empty record type.
320    pub fn instantiate_record(&mut self) -> Record {
321        self.alloc_record(FxHashMap::default())
322    }
323
324    /// Sets the type of a field in the given record, inserting or updating as needed.
325    pub fn record_set(&mut self, record: Record, field: StrRef, value: Type) {
326        self.records[record.0].insert(field, value);
327    }
328
329    /// Returns the number of fields in the given record.
330    pub fn record_len(&self, record: Record) -> usize {
331        self.records[record.0].len()
332    }
333}
334
335/// Top-level arena that holds all memory pools for expressions, strings, and types.
336#[derive(Default, Serialize)]
337pub struct Arena {
338    pub(crate) exprs: ExprArena,
339    pub(crate) strings: StringArena,
340    pub(crate) types: TypeArena,
341}
342
343impl Arena {
344    /// Freezes the type arena to mark the current state as baseline.
345    pub fn freeze(&mut self) {
346        self.types.freeze();
347    }
348
349    /// Frees types allocated after the last freeze, reclaiming memory for reuse.
350    pub fn free_space(&mut self) {
351        self.types.free_space();
352    }
353
354    /// Retrieves the interned string associated with the given [`StrRef`].
355    pub fn get_str(&self, key: StrRef) -> &str {
356        self.strings.get(key)
357    }
358
359    /// If that value has already been interned, it returns the string pointer in the arena.
360    pub fn str_ref(&self, key: &str) -> Option<StrRef> {
361        self.strings.str_ref(key)
362    }
363
364    /// Retrieves the expression node associated with the given [`ExprRef`].
365    pub fn get_expr(&self, key: ExprRef) -> Expr {
366        self.exprs.get(key)
367    }
368
369    /// Retrieves the type associated with the given [`TypeRef`].
370    pub fn get_type(&self, key: TypeRef) -> Type {
371        self.types.get_type(key)
372    }
373
374    /// Returns the slice of expression references for the given [`VecRef`].
375    pub fn get_vec(&self, key: VecRef) -> &[ExprRef] {
376        self.exprs.vec(key)
377    }
378
379    /// Returns the slice of record fields for the given [`RecRef`].
380    pub fn get_rec(&self, key: RecRef) -> &[Field] {
381        self.exprs.rec(key)
382    }
383
384    /// Returns the map of type record fields for the given type [`Record`]
385    pub fn get_type_rec(&self, key: Record) -> &FxHashMap<StrRef, Type> {
386        self.types.get_record(key)
387    }
388
389    /// Returns the function argument types for the given [`ArgsRef`].
390    pub fn get_args(&self, key: ArgsRef) -> &[Type] {
391        self.types.get_args(key)
392    }
393}