Skip to main content

reifydb_engine/expression/
compile.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::{mem::discriminant, slice::from_ref};
5
6use reifydb_core::value::column::{ColumnWithName, buffer::ColumnBuffer, columns::Columns};
7use reifydb_rql::expression::{Expression, name::display_label};
8use reifydb_type::{
9	error::{BinaryOp, Error, IntoDiagnostic, LogicalOp, RuntimeErrorKind, TypeError},
10	fragment::Fragment,
11	value::{Value, r#type::Type},
12};
13
14use super::{
15	context::CompileContext,
16	option::{binary_op_unwrap_option, unary_op_unwrap_option},
17};
18use crate::{
19	Result,
20	error::CastError,
21	expression::{
22		access::access_lookup,
23		arith::{add::add_columns, div::div_columns, mul::mul_columns, rem::rem_columns, sub::sub_columns},
24		call::call_builtin,
25		cast::cast_column_data,
26		compare::{Equal, GreaterThan, GreaterThanEqual, LessThan, LessThanEqual, NotEqual, compare_columns},
27		constant::{constant_value, constant_value_of},
28		context::EvalContext,
29		logic::{execute_logical_op, try_short_circuit_and, try_short_circuit_or},
30		lookup::column_lookup,
31		parameter::parameter_lookup,
32		prefix::prefix_apply,
33	},
34	vm::stack::Variable,
35};
36
37type SingleExprFn = Box<dyn Fn(&EvalContext) -> Result<ColumnWithName> + Send + Sync>;
38type MultiExprFn = Box<dyn Fn(&EvalContext) -> Result<Vec<ColumnWithName>> + Send + Sync>;
39
40pub struct CompiledExpr {
41	inner: CompiledExprInner,
42	access_column_name: Option<String>,
43}
44
45enum CompiledExprInner {
46	Single(SingleExprFn),
47	Multi(MultiExprFn),
48}
49
50impl CompiledExpr {
51	pub fn new(f: impl Fn(&EvalContext) -> Result<ColumnWithName> + Send + Sync + 'static) -> Self {
52		Self {
53			inner: CompiledExprInner::Single(Box::new(f)),
54			access_column_name: None,
55		}
56	}
57
58	pub fn new_multi(f: impl Fn(&EvalContext) -> Result<Vec<ColumnWithName>> + Send + Sync + 'static) -> Self {
59		Self {
60			inner: CompiledExprInner::Multi(Box::new(f)),
61			access_column_name: None,
62		}
63	}
64
65	pub fn new_access(
66		name: String,
67		f: impl Fn(&EvalContext) -> Result<ColumnWithName> + Send + Sync + 'static,
68	) -> Self {
69		Self {
70			inner: CompiledExprInner::Single(Box::new(f)),
71			access_column_name: Some(name),
72		}
73	}
74
75	pub fn access_column_name(&self) -> Option<&str> {
76		self.access_column_name.as_deref()
77	}
78
79	pub fn execute(&self, ctx: &EvalContext) -> Result<ColumnWithName> {
80		match &self.inner {
81			CompiledExprInner::Single(f) => f(ctx),
82			CompiledExprInner::Multi(f) => {
83				let columns = f(ctx)?;
84				Ok(columns.into_iter().next().unwrap_or_else(|| ColumnWithName {
85					name: Fragment::internal("none"),
86					data: ColumnBuffer::with_capacity(Type::Option(Box::new(Type::Boolean)), 0),
87				}))
88			}
89		}
90	}
91
92	pub fn execute_multi(&self, ctx: &EvalContext) -> Result<Vec<ColumnWithName>> {
93		match &self.inner {
94			CompiledExprInner::Single(f) => Ok(vec![f(ctx)?]),
95			CompiledExprInner::Multi(f) => f(ctx),
96		}
97	}
98}
99
100macro_rules! compile_arith {
101	($ctx:expr, $parent:expr, $e:expr, $op_fn:path) => {{
102		let left = compile_expression($ctx, &$e.left)?;
103		let right = compile_expression($ctx, &$e.right)?;
104		let fragment = $e.full_fragment_owned();
105		let label = display_label($parent);
106		CompiledExpr::new(move |ctx| {
107			let l = left.execute(ctx)?;
108			let r = right.execute(ctx)?;
109			let mut col = $op_fn(ctx, &l, &r, || fragment.clone())?;
110			col.name = label.clone();
111			Ok(col)
112		})
113	}};
114}
115
116macro_rules! compile_compare {
117	($ctx:expr, $parent:expr, $e:expr, $cmp_type:ty, $binary_op:expr) => {{
118		let left = compile_expression($ctx, &$e.left)?;
119		let right = compile_expression($ctx, &$e.right)?;
120		let fragment = $e.full_fragment_owned();
121		let label = display_label($parent);
122		CompiledExpr::new(move |ctx| {
123			let l = left.execute(ctx)?;
124			let r = right.execute(ctx)?;
125			let mut col = compare_columns::<$cmp_type>(&l, &r, fragment.clone(), |f, l, r| {
126				TypeError::BinaryOperatorNotApplicable {
127					operator: $binary_op,
128					left: l,
129					right: r,
130					fragment: f,
131				}
132				.into_diagnostic()
133			})?;
134			col.name = label.clone();
135			Ok(col)
136		})
137	}};
138}
139
140pub fn compile_expression(_ctx: &CompileContext, expr: &Expression) -> Result<CompiledExpr> {
141	Ok(match expr {
142		Expression::Constant(e) => {
143			let constant = e.clone();
144			let label = display_label(expr);
145			CompiledExpr::new(move |ctx| {
146				let row_count = ctx.take.unwrap_or(ctx.row_count);
147				Ok(ColumnWithName {
148					name: label.clone(),
149					data: constant_value(&constant, row_count)?,
150				})
151			})
152		}
153
154		Expression::Column(e) => {
155			let expr = e.clone();
156			CompiledExpr::new(move |ctx| column_lookup(ctx, &expr))
157		}
158
159		Expression::Variable(e) => {
160			let expr = e.clone();
161			CompiledExpr::new(move |ctx| {
162				let variable_name = expr.name();
163
164				if variable_name == "env" {
165					return Err(TypeError::Runtime {
166						kind: RuntimeErrorKind::VariableIsDataframe {
167							name: variable_name.to_string(),
168						},
169						message: format!(
170							"Variable '{}' contains a dataframe and cannot be used directly in scalar expressions",
171							variable_name
172						),
173					}
174					.into());
175				}
176
177				match ctx.symbols.get(variable_name) {
178					Some(Variable::Columns {
179						columns,
180					}) if columns.is_scalar() => {
181						let value = columns.scalar_value();
182						let mut data =
183							ColumnBuffer::with_capacity(value.get_type(), ctx.row_count);
184						for _ in 0..ctx.row_count {
185							data.push_value(value.clone());
186						}
187						Ok(ColumnWithName {
188							name: Fragment::internal(variable_name),
189							data,
190						})
191					}
192					Some(Variable::Columns {
193						..
194					})
195					| Some(Variable::ForIterator {
196						..
197					})
198					| Some(Variable::Closure(_)) => Err(TypeError::Runtime {
199						kind: RuntimeErrorKind::VariableIsDataframe {
200							name: variable_name.to_string(),
201						},
202						message: format!(
203							"Variable '{}' contains a dataframe and cannot be used directly in scalar expressions",
204							variable_name
205						),
206					}
207					.into()),
208					None => {
209						if let Some(value) = ctx.params.get_named(variable_name) {
210							let mut data = ColumnBuffer::with_capacity(
211								value.get_type(),
212								ctx.row_count,
213							);
214							for _ in 0..ctx.row_count {
215								data.push_value(value.clone());
216							}
217							return Ok(ColumnWithName {
218								name: Fragment::internal(variable_name),
219								data,
220							});
221						}
222						Err(TypeError::Runtime {
223							kind: RuntimeErrorKind::VariableNotFound {
224								name: variable_name.to_string(),
225							},
226							message: format!("Variable '{}' is not defined", variable_name),
227						}
228						.into())
229					}
230				}
231			})
232		}
233
234		Expression::Parameter(e) => {
235			let expr = e.clone();
236			CompiledExpr::new(move |ctx| parameter_lookup(ctx, &expr))
237		}
238
239		Expression::Alias(e) => {
240			let inner = compile_expression(_ctx, &e.expression)?;
241			let alias = e.alias.0.clone();
242			CompiledExpr::new(move |ctx| {
243				let mut column = inner.execute(ctx)?;
244				column.name = alias.clone();
245				Ok(column)
246			})
247		}
248
249		Expression::Add(e) => compile_arith!(_ctx, expr, e, add_columns),
250		Expression::Sub(e) => compile_arith!(_ctx, expr, e, sub_columns),
251		Expression::Mul(e) => compile_arith!(_ctx, expr, e, mul_columns),
252		Expression::Div(e) => compile_arith!(_ctx, expr, e, div_columns),
253		Expression::Rem(e) => compile_arith!(_ctx, expr, e, rem_columns),
254
255		Expression::Equal(e) => compile_compare!(_ctx, expr, e, Equal, BinaryOp::Equal),
256		Expression::NotEqual(e) => compile_compare!(_ctx, expr, e, NotEqual, BinaryOp::NotEqual),
257		Expression::GreaterThan(e) => compile_compare!(_ctx, expr, e, GreaterThan, BinaryOp::GreaterThan),
258		Expression::GreaterThanEqual(e) => {
259			compile_compare!(_ctx, expr, e, GreaterThanEqual, BinaryOp::GreaterThanEqual)
260		}
261		Expression::LessThan(e) => compile_compare!(_ctx, expr, e, LessThan, BinaryOp::LessThan),
262		Expression::LessThanEqual(e) => compile_compare!(_ctx, expr, e, LessThanEqual, BinaryOp::LessThanEqual),
263
264		Expression::And(e) => {
265			let left = compile_expression(_ctx, &e.left)?;
266			let right = compile_expression(_ctx, &e.right)?;
267			let fragment = e.full_fragment_owned();
268			let label = display_label(expr);
269			CompiledExpr::new(move |ctx| {
270				let l = left.execute(ctx)?;
271				if let Some(mut short) = try_short_circuit_and(&l, &fragment, l.data().len()) {
272					short.name = label.clone();
273					return Ok(short);
274				}
275				let r = right.execute(ctx)?;
276				let mut col = execute_logical_op(&l, &r, &fragment, LogicalOp::And, |a, b| a && b)?;
277				col.name = label.clone();
278				Ok(col)
279			})
280		}
281
282		Expression::Or(e) => {
283			let left = compile_expression(_ctx, &e.left)?;
284			let right = compile_expression(_ctx, &e.right)?;
285			let fragment = e.full_fragment_owned();
286			let label = display_label(expr);
287			CompiledExpr::new(move |ctx| {
288				let l = left.execute(ctx)?;
289				if let Some(mut short) = try_short_circuit_or(&l, &fragment, l.data().len()) {
290					short.name = label.clone();
291					return Ok(short);
292				}
293				let r = right.execute(ctx)?;
294				let mut col = execute_logical_op(&l, &r, &fragment, LogicalOp::Or, |a, b| a || b)?;
295				col.name = label.clone();
296				Ok(col)
297			})
298		}
299
300		Expression::Xor(e) => {
301			let left = compile_expression(_ctx, &e.left)?;
302			let right = compile_expression(_ctx, &e.right)?;
303			let fragment = e.full_fragment_owned();
304			let label = display_label(expr);
305			CompiledExpr::new(move |ctx| {
306				let l = left.execute(ctx)?;
307				let r = right.execute(ctx)?;
308				let mut col = execute_logical_op(&l, &r, &fragment, LogicalOp::Xor, |a, b| a != b)?;
309				col.name = label.clone();
310				Ok(col)
311			})
312		}
313
314		Expression::Prefix(e) => {
315			let inner = compile_expression(_ctx, &e.expression)?;
316			let operator = e.operator.clone();
317			let fragment = e.full_fragment_owned();
318			let label = display_label(expr);
319			CompiledExpr::new(move |ctx| {
320				let column = inner.execute(ctx)?;
321				let mut col = prefix_apply(&column, &operator, &fragment)?;
322				col.name = label.clone();
323				Ok(col)
324			})
325		}
326
327		Expression::Type(e) => {
328			let ty = e.ty.clone();
329			let fragment = e.fragment.clone();
330			CompiledExpr::new(move |ctx| {
331				let row_count = ctx.take.unwrap_or(ctx.row_count);
332				let values: Vec<Box<Value>> =
333					(0..row_count).map(|_| Box::new(Value::Type(ty.clone()))).collect();
334				Ok(ColumnWithName::new(fragment.text(), ColumnBuffer::any(values)))
335			})
336		}
337
338		Expression::AccessSource(e) => {
339			let col_name = e.column.name.text().to_string();
340			let expr = e.clone();
341			CompiledExpr::new_access(col_name, move |ctx| access_lookup(ctx, &expr))
342		}
343
344		Expression::Tuple(e) => {
345			if e.expressions.len() == 1 {
346				let inner = compile_expression(_ctx, &e.expressions[0])?;
347				CompiledExpr::new(move |ctx| inner.execute(ctx))
348			} else {
349				let compiled: Vec<CompiledExpr> = e
350					.expressions
351					.iter()
352					.map(|expr| compile_expression(_ctx, expr))
353					.collect::<Result<Vec<_>>>()?;
354				let fragment = e.fragment.clone();
355				CompiledExpr::new(move |ctx| {
356					let columns: Vec<ColumnWithName> = compiled
357						.iter()
358						.map(|expr| expr.execute(ctx))
359						.collect::<Result<Vec<_>>>()?;
360
361					let len = columns.first().map_or(1, |c| c.data().len());
362					let mut data: Vec<Box<Value>> = Vec::with_capacity(len);
363
364					for i in 0..len {
365						let items: Vec<Value> =
366							columns.iter().map(|col| col.data().get_value(i)).collect();
367						data.push(Box::new(Value::Tuple(items)));
368					}
369
370					Ok(ColumnWithName::new(fragment.clone(), ColumnBuffer::any(data)))
371				})
372			}
373		}
374
375		Expression::List(e) => {
376			let compiled: Vec<CompiledExpr> = e
377				.expressions
378				.iter()
379				.map(|expr| compile_expression(_ctx, expr))
380				.collect::<Result<Vec<_>>>()?;
381			let fragment = e.fragment.clone();
382			CompiledExpr::new(move |ctx| {
383				let columns: Vec<ColumnWithName> =
384					compiled.iter().map(|expr| expr.execute(ctx)).collect::<Result<Vec<_>>>()?;
385
386				let len = columns.first().map_or(1, |c| c.data().len());
387				let mut data: Vec<Box<Value>> = Vec::with_capacity(len);
388
389				for i in 0..len {
390					let items: Vec<Value> =
391						columns.iter().map(|col| col.data().get_value(i)).collect();
392					data.push(Box::new(Value::List(items)));
393				}
394
395				Ok(ColumnWithName::new(fragment.clone(), ColumnBuffer::any(data)))
396			})
397		}
398
399		Expression::Between(e) => {
400			let value = compile_expression(_ctx, &e.value)?;
401			let lower = compile_expression(_ctx, &e.lower)?;
402			let upper = compile_expression(_ctx, &e.upper)?;
403			let fragment = e.fragment.clone();
404			CompiledExpr::new(move |ctx| {
405				let value_col = value.execute(ctx)?;
406				let lower_col = lower.execute(ctx)?;
407				let upper_col = upper.execute(ctx)?;
408
409				let ge_result = compare_columns::<GreaterThanEqual>(
410					&value_col,
411					&lower_col,
412					fragment.clone(),
413					|f, l, r| {
414						TypeError::BinaryOperatorNotApplicable {
415							operator: BinaryOp::Between,
416							left: l,
417							right: r,
418							fragment: f,
419						}
420						.into_diagnostic()
421					},
422				)?;
423				let le_result = compare_columns::<LessThanEqual>(
424					&value_col,
425					&upper_col,
426					fragment.clone(),
427					|f, l, r| {
428						TypeError::BinaryOperatorNotApplicable {
429							operator: BinaryOp::Between,
430							left: l,
431							right: r,
432							fragment: f,
433						}
434						.into_diagnostic()
435					},
436				)?;
437
438				if !matches!(ge_result.data(), ColumnBuffer::Bool(_))
439					|| !matches!(le_result.data(), ColumnBuffer::Bool(_))
440				{
441					return Err(TypeError::BinaryOperatorNotApplicable {
442						operator: BinaryOp::Between,
443						left: value_col.get_type(),
444						right: lower_col.get_type(),
445						fragment: fragment.clone(),
446					}
447					.into());
448				}
449
450				match (ge_result.data(), le_result.data()) {
451					(ColumnBuffer::Bool(ge_container), ColumnBuffer::Bool(le_container)) => {
452						let mut data = Vec::with_capacity(ge_container.len());
453						let mut bitvec = Vec::with_capacity(ge_container.len());
454
455						for i in 0..ge_container.len() {
456							if ge_container.is_defined(i) && le_container.is_defined(i) {
457								data.push(ge_container.data().get(i)
458									&& le_container.data().get(i));
459								bitvec.push(true);
460							} else {
461								data.push(false);
462								bitvec.push(false);
463							}
464						}
465
466						Ok(ColumnWithName {
467							name: fragment.clone(),
468							data: ColumnBuffer::bool_with_bitvec(data, bitvec),
469						})
470					}
471					_ => unreachable!(
472						"Both comparison results should be boolean after the check above"
473					),
474				}
475			})
476		}
477
478		Expression::In(e) => {
479			let list_expressions = match e.list.as_ref() {
480				Expression::Tuple(tuple) => &tuple.expressions,
481				Expression::List(list) => &list.expressions,
482				_ => from_ref(e.list.as_ref()),
483			};
484			let value = compile_expression(_ctx, &e.value)?;
485			let list: Vec<CompiledExpr> = list_expressions
486				.iter()
487				.map(|expr| compile_expression(_ctx, expr))
488				.collect::<Result<Vec<_>>>()?;
489			let negated = e.negated;
490			let fragment = e.fragment.clone();
491			CompiledExpr::new(move |ctx| {
492				if list.is_empty() {
493					let value_col = value.execute(ctx)?;
494					let len = value_col.data().len();
495					let result = vec![negated; len];
496					return Ok(ColumnWithName::new(fragment.clone(), ColumnBuffer::bool(result)));
497				}
498
499				let value_col = value.execute(ctx)?;
500
501				let first_col = list[0].execute(ctx)?;
502				let mut result = compare_columns::<Equal>(
503					&value_col,
504					&first_col,
505					fragment.clone(),
506					|f, l, r| {
507						TypeError::BinaryOperatorNotApplicable {
508							operator: BinaryOp::Equal,
509							left: l,
510							right: r,
511							fragment: f,
512						}
513						.into_diagnostic()
514					},
515				)?;
516
517				for list_expr in list.iter().skip(1) {
518					let list_col = list_expr.execute(ctx)?;
519					let eq_result = compare_columns::<Equal>(
520						&value_col,
521						&list_col,
522						fragment.clone(),
523						|f, l, r| {
524							TypeError::BinaryOperatorNotApplicable {
525								operator: BinaryOp::Equal,
526								left: l,
527								right: r,
528								fragment: f,
529							}
530							.into_diagnostic()
531						},
532					)?;
533					result = combine_bool_columns(result, eq_result, fragment.clone(), |l, r| {
534						l || r
535					})?;
536				}
537
538				if negated {
539					result = negate_column(result, fragment.clone());
540				}
541
542				Ok(result)
543			})
544		}
545
546		Expression::Contains(e) => {
547			let list_expressions = match e.list.as_ref() {
548				Expression::Tuple(tuple) => &tuple.expressions,
549				Expression::List(list) => &list.expressions,
550				_ => from_ref(e.list.as_ref()),
551			};
552			let value = compile_expression(_ctx, &e.value)?;
553			let list: Vec<CompiledExpr> = list_expressions
554				.iter()
555				.map(|expr| compile_expression(_ctx, expr))
556				.collect::<Result<Vec<_>>>()?;
557			let fragment = e.fragment.clone();
558			CompiledExpr::new(move |ctx| {
559				let value_col = value.execute(ctx)?;
560
561				if list.is_empty() {
562					let len = value_col.data().len();
563					let result = vec![true; len];
564					return Ok(ColumnWithName::new(fragment.clone(), ColumnBuffer::bool(result)));
565				}
566
567				let first_col = list[0].execute(ctx)?;
568				let mut result = list_contains_element(&value_col, &first_col, &fragment)?;
569
570				for list_expr in list.iter().skip(1) {
571					let list_col = list_expr.execute(ctx)?;
572					let element_result = list_contains_element(&value_col, &list_col, &fragment)?;
573					result = combine_bool_columns(
574						result,
575						element_result,
576						fragment.clone(),
577						|l, r| l && r,
578					)?;
579				}
580
581				Ok(result)
582			})
583		}
584
585		Expression::Cast(e) => {
586			let label = display_label(expr);
587			if let Expression::Constant(const_expr) = e.expression.as_ref() {
588				let const_expr = const_expr.clone();
589				let target_type = e.to.ty.clone();
590				CompiledExpr::new(move |ctx| {
591					let row_count = ctx.take.unwrap_or(ctx.row_count);
592					let data = constant_value(&const_expr, row_count)?;
593					let casted = if data.get_type() == target_type {
594						data
595					} else {
596						constant_value_of(&const_expr, target_type.clone(), row_count)?
597					};
598					Ok(ColumnWithName::new(label.clone(), casted))
599				})
600			} else {
601				let inner = compile_expression(_ctx, &e.expression)?;
602				let target_type = e.to.ty.clone();
603				let inner_fragment = e.expression.full_fragment_owned();
604				CompiledExpr::new(move |ctx| {
605					let column = inner.execute(ctx)?;
606					let frag = inner_fragment.clone();
607					let casted = cast_column_data(ctx, column.data(), target_type.clone(), &|| {
608						inner_fragment.clone()
609					})
610					.map_err(|e| {
611						Error::from(CastError::InvalidNumber {
612							fragment: frag,
613							target: target_type.clone(),
614							cause: e.diagnostic(),
615						})
616					})?;
617					Ok(ColumnWithName::new(label.clone(), casted))
618				})
619			}
620		}
621
622		Expression::If(e) => {
623			let condition = compile_expression(_ctx, &e.condition)?;
624			let then_expr = compile_expressions(_ctx, from_ref(e.then_expr.as_ref()))?;
625			let else_ifs: Vec<(CompiledExpr, Vec<CompiledExpr>)> = e
626				.else_ifs
627				.iter()
628				.map(|ei| {
629					Ok((
630						compile_expression(_ctx, &ei.condition)?,
631						compile_expressions(_ctx, from_ref(ei.then_expr.as_ref()))?,
632					))
633				})
634				.collect::<Result<Vec<_>>>()?;
635			let else_branch: Option<Vec<CompiledExpr>> = match &e.else_expr {
636				Some(expr) => Some(compile_expressions(_ctx, from_ref(expr.as_ref()))?),
637				None => None,
638			};
639			let fragment = e.fragment.clone();
640			CompiledExpr::new_multi(move |ctx| {
641				execute_if_multi(ctx, &condition, &then_expr, &else_ifs, &else_branch, &fragment)
642			})
643		}
644
645		Expression::Map(e) => {
646			let expressions = compile_expressions(_ctx, &e.expressions)?;
647			CompiledExpr::new_multi(move |ctx| execute_projection_multi(ctx, &expressions))
648		}
649
650		Expression::Extend(e) => {
651			let expressions = compile_expressions(_ctx, &e.expressions)?;
652			CompiledExpr::new_multi(move |ctx| execute_projection_multi(ctx, &expressions))
653		}
654
655		Expression::Call(e) => {
656			let compiled_args: Vec<CompiledExpr> =
657				e.args.iter().map(|arg| compile_expression(_ctx, arg)).collect::<Result<Vec<_>>>()?;
658			let expr = e.clone();
659			CompiledExpr::new(move |ctx| {
660				let mut arg_columns = Vec::with_capacity(compiled_args.len());
661				for compiled_arg in &compiled_args {
662					arg_columns.push(compiled_arg.execute(ctx)?);
663				}
664				let arguments = Columns::new(arg_columns);
665				call_builtin(ctx, &expr, arguments)
666			})
667		}
668
669		Expression::SumTypeConstructor(_) => {
670			panic!(
671				"SumTypeConstructor in expression context — constructors should be expanded by InlineDataNode before expression compilation"
672			);
673		}
674
675		Expression::IsVariant(e) => {
676			let col_name = match e.expression.as_ref() {
677				Expression::Column(c) => c.0.name.text().to_string(),
678				other => display_label(other).text().to_string(),
679			};
680			let tag_col_name = format!("{}_tag", col_name);
681			let tag = e.tag.expect("IS variant tag must be resolved before compilation");
682			let fragment = e.fragment.clone();
683			CompiledExpr::new(move |ctx| {
684				if let Some(tag_col) =
685					ctx.columns.iter().find(|c| c.name().text() == tag_col_name.as_str())
686				{
687					match tag_col.data() {
688						ColumnBuffer::Uint1(container) => {
689							let results: Vec<bool> = container
690								.iter()
691								.take(ctx.row_count)
692								.map(|v| v == Some(tag))
693								.collect();
694							Ok(ColumnWithName::new(
695								fragment.clone(),
696								ColumnBuffer::bool(results),
697							))
698						}
699						_ => Ok(ColumnWithName {
700							name: fragment.clone(),
701							data: ColumnBuffer::none_typed(Type::Boolean, ctx.row_count),
702						}),
703					}
704				} else {
705					Ok(ColumnWithName {
706						name: fragment.clone(),
707						data: ColumnBuffer::none_typed(Type::Boolean, ctx.row_count),
708					})
709				}
710			})
711		}
712
713		Expression::FieldAccess(e) => {
714			let field_name = e.field.text().to_string();
715
716			let var_name = match e.object.as_ref() {
717				Expression::Variable(var_expr) => Some(var_expr.name().to_string()),
718				_ => None,
719			};
720			let object = compile_expression(_ctx, &e.object)?;
721			CompiledExpr::new(move |ctx| {
722				if let Some(ref variable_name) = var_name {
723					match ctx.symbols.get(variable_name) {
724						Some(Variable::Columns {
725							columns,
726						}) if !columns.is_scalar() => {
727							let col_pos = columns
728								.names
729								.iter()
730								.position(|n| n.text() == field_name);
731							match col_pos {
732								Some(pos) => {
733									let value = columns.columns[pos].get_value(0);
734									let row_count =
735										ctx.take.unwrap_or(ctx.row_count);
736									let mut data = ColumnBuffer::with_capacity(
737										value.get_type(),
738										row_count,
739									);
740									for _ in 0..row_count {
741										data.push_value(value.clone());
742									}
743									Ok(ColumnWithName {
744										name: Fragment::internal(&field_name),
745										data,
746									})
747								}
748								None => {
749									let available: Vec<String> = columns
750										.names
751										.iter()
752										.map(|n| n.text().to_string())
753										.collect();
754									Err(TypeError::Runtime {
755										kind: RuntimeErrorKind::FieldNotFound {
756											variable: variable_name
757												.to_string(),
758											field: field_name.to_string(),
759											available,
760										},
761										message: format!(
762											"Field '{}' not found on variable '{}'",
763											field_name, variable_name
764										),
765									}
766									.into())
767								}
768							}
769						}
770						Some(Variable::Columns {
771							..
772						})
773						| Some(Variable::Closure(_)) => Err(TypeError::Runtime {
774							kind: RuntimeErrorKind::FieldNotFound {
775								variable: variable_name.to_string(),
776								field: field_name.to_string(),
777								available: vec![],
778							},
779							message: format!(
780								"Field '{}' not found on variable '{}'",
781								field_name, variable_name
782							),
783						}
784						.into()),
785						Some(Variable::ForIterator {
786							..
787						}) => Err(TypeError::Runtime {
788							kind: RuntimeErrorKind::VariableIsDataframe {
789								name: variable_name.to_string(),
790							},
791							message: format!(
792								"Variable '{}' contains a dataframe and cannot be used directly in scalar expressions",
793								variable_name
794							),
795						}
796						.into()),
797						None => Err(TypeError::Runtime {
798							kind: RuntimeErrorKind::VariableNotFound {
799								name: variable_name.to_string(),
800							},
801							message: format!("Variable '{}' is not defined", variable_name),
802						}
803						.into()),
804					}
805				} else {
806					let _obj_col = object.execute(ctx)?;
807					Err(TypeError::Runtime {
808						kind: RuntimeErrorKind::FieldNotFound {
809							variable: "<expression>".to_string(),
810							field: field_name.to_string(),
811							available: vec![],
812						},
813						message: format!(
814							"Field '{}' not found on variable '<expression>'",
815							field_name
816						),
817					}
818					.into())
819				}
820			})
821		}
822	})
823}
824
825fn compile_expressions(ctx: &CompileContext, exprs: &[Expression]) -> Result<Vec<CompiledExpr>> {
826	exprs.iter().map(|e| compile_expression(ctx, e)).collect()
827}
828
829fn combine_bool_columns(
830	left: ColumnWithName,
831	right: ColumnWithName,
832	fragment: Fragment,
833	combine_fn: fn(bool, bool) -> bool,
834) -> Result<ColumnWithName> {
835	binary_op_unwrap_option(&left, &right, fragment.clone(), |left, right| match (left.data(), right.data()) {
836		(ColumnBuffer::Bool(l), ColumnBuffer::Bool(r)) => {
837			let len = l.len();
838			let mut data = Vec::with_capacity(len);
839			let mut bitvec = Vec::with_capacity(len);
840
841			for i in 0..len {
842				let l_defined = l.is_defined(i);
843				let r_defined = r.is_defined(i);
844				let l_val = l.data().get(i);
845				let r_val = r.data().get(i);
846
847				if l_defined && r_defined {
848					data.push(combine_fn(l_val, r_val));
849					bitvec.push(true);
850				} else {
851					data.push(false);
852					bitvec.push(false);
853				}
854			}
855
856			Ok(ColumnWithName {
857				name: fragment.clone(),
858				data: ColumnBuffer::bool_with_bitvec(data, bitvec),
859			})
860		}
861		_ => {
862			unreachable!("combine_bool_columns should only be called with boolean columns")
863		}
864	})
865}
866
867fn list_items_contain(items: &[Value], element: &Value, fragment: &Fragment) -> bool {
868	if items.iter().any(|item| item == element) {
869		return true;
870	}
871	if items.is_empty() {
872		return false;
873	}
874
875	if let Some(items_buf) = build_homogeneous_buffer(items) {
876		let elems_buf = ColumnBuffer::from_many(element.clone(), items.len());
877		let items_col = ColumnWithName::new(fragment.clone(), items_buf);
878		let elems_col = ColumnWithName::new(fragment.clone(), elems_buf);
879		return compare_columns::<Equal>(&items_col, &elems_col, fragment.clone(), |f, l, r| {
880			TypeError::BinaryOperatorNotApplicable {
881				operator: BinaryOp::Equal,
882				left: l,
883				right: r,
884				fragment: f,
885			}
886			.into_diagnostic()
887		})
888		.map(|c| bool_column_has_true(&c))
889		.unwrap_or(false);
890	}
891
892	list_items_contain_per_item(items, element, fragment)
893}
894
895fn list_items_contain_per_item(items: &[Value], element: &Value, fragment: &Fragment) -> bool {
896	items.iter().any(|item| {
897		let item_col = ColumnWithName::new(fragment.clone(), ColumnBuffer::from(item.clone()));
898		let elem_col = ColumnWithName::new(fragment.clone(), ColumnBuffer::from(element.clone()));
899		compare_columns::<Equal>(&item_col, &elem_col, fragment.clone(), |f, l, r| {
900			TypeError::BinaryOperatorNotApplicable {
901				operator: BinaryOp::Equal,
902				left: l,
903				right: r,
904				fragment: f,
905			}
906			.into_diagnostic()
907		})
908		.ok()
909		.and_then(|c| match c.data() {
910			ColumnBuffer::Bool(b) => Some(b.data().get(0)),
911			_ => None,
912		})
913		.unwrap_or(false)
914	})
915}
916
917fn bool_column_has_true(col: &ColumnWithName) -> bool {
918	match col.data() {
919		ColumnBuffer::Bool(b) => b.data().any(),
920		ColumnBuffer::Option {
921			inner,
922			bitvec,
923		} => match inner.as_ref() {
924			ColumnBuffer::Bool(b) => {
925				let n = bitvec.len().min(b.len());
926				(0..n).any(|i| bitvec.get(i) && b.data().get(i))
927			}
928			_ => false,
929		},
930		_ => false,
931	}
932}
933
934fn build_homogeneous_buffer(items: &[Value]) -> Option<ColumnBuffer> {
935	let first = items.first()?;
936	let first_disc = discriminant(first);
937	if !items.iter().all(|v| discriminant(v) == first_disc) {
938		return None;
939	}
940
941	macro_rules! collect {
942		($variant:ident, $constructor:ident, |$x:ident| $convert:expr) => {{
943			let data: Vec<_> = items
944				.iter()
945				.map(|v| match v {
946					Value::$variant($x) => $convert,
947					_ => unreachable!("homogeneous check guarantees variant"),
948				})
949				.collect();
950			Some(ColumnBuffer::$constructor(data))
951		}};
952	}
953
954	match first {
955		Value::Boolean(_) => collect!(Boolean, bool, |x| *x),
956		Value::Float4(_) => collect!(Float4, float4, |x| x.value()),
957		Value::Float8(_) => collect!(Float8, float8, |x| x.value()),
958		Value::Int1(_) => collect!(Int1, int1, |x| *x),
959		Value::Int2(_) => collect!(Int2, int2, |x| *x),
960		Value::Int4(_) => collect!(Int4, int4, |x| *x),
961		Value::Int8(_) => collect!(Int8, int8, |x| *x),
962		Value::Int16(_) => collect!(Int16, int16, |x| *x),
963		Value::Uint1(_) => collect!(Uint1, uint1, |x| *x),
964		Value::Uint2(_) => collect!(Uint2, uint2, |x| *x),
965		Value::Uint4(_) => collect!(Uint4, uint4, |x| *x),
966		Value::Uint8(_) => collect!(Uint8, uint8, |x| *x),
967		Value::Uint16(_) => collect!(Uint16, uint16, |x| *x),
968		Value::Utf8(_) => collect!(Utf8, utf8, |x| x.clone()),
969		Value::Date(_) => collect!(Date, date, |x| *x),
970		Value::DateTime(_) => collect!(DateTime, datetime, |x| *x),
971		Value::Time(_) => collect!(Time, time, |x| *x),
972		Value::Duration(_) => collect!(Duration, duration, |x| *x),
973		Value::Uuid4(_) => collect!(Uuid4, uuid4, |x| *x),
974		Value::Uuid7(_) => collect!(Uuid7, uuid7, |x| *x),
975		Value::IdentityId(_) => collect!(IdentityId, identity_id, |x| *x),
976		Value::Blob(_) => collect!(Blob, blob, |x| x.clone()),
977		Value::Int(_) => collect!(Int, int, |x| x.clone()),
978		Value::Uint(_) => collect!(Uint, uint, |x| x.clone()),
979		Value::Decimal(_) => collect!(Decimal, decimal, |x| x.clone()),
980		Value::DictionaryId(_) => collect!(DictionaryId, dictionary_id, |x| *x),
981
982		_ => None,
983	}
984}
985
986fn list_contains_element(
987	list_col: &ColumnWithName,
988	element_col: &ColumnWithName,
989	fragment: &Fragment,
990) -> Result<ColumnWithName> {
991	let len = list_col.data().len();
992	let mut data = Vec::with_capacity(len);
993
994	for i in 0..len {
995		let list_value = list_col.data().get_value(i);
996		let element_value = element_col.data().get_value(i);
997
998		let contained = match &list_value {
999			Value::List(items) => list_items_contain(items, &element_value, fragment),
1000			Value::Tuple(items) => list_items_contain(items, &element_value, fragment),
1001			Value::Any(boxed) => match boxed.as_ref() {
1002				Value::List(items) => list_items_contain(items, &element_value, fragment),
1003				Value::Tuple(items) => list_items_contain(items, &element_value, fragment),
1004				_ => false,
1005			},
1006			_ => false,
1007		};
1008		data.push(contained);
1009	}
1010
1011	Ok(ColumnWithName::new(fragment.clone(), ColumnBuffer::bool(data)))
1012}
1013
1014fn negate_column(col: ColumnWithName, fragment: Fragment) -> ColumnWithName {
1015	unary_op_unwrap_option(&col, |col| match col.data() {
1016		ColumnBuffer::Bool(container) => {
1017			let len = container.len();
1018			let mut data = Vec::with_capacity(len);
1019			let mut bitvec = Vec::with_capacity(len);
1020
1021			for i in 0..len {
1022				if container.is_defined(i) {
1023					data.push(!container.data().get(i));
1024					bitvec.push(true);
1025				} else {
1026					data.push(false);
1027					bitvec.push(false);
1028				}
1029			}
1030
1031			Ok(ColumnWithName {
1032				name: fragment.clone(),
1033				data: ColumnBuffer::bool_with_bitvec(data, bitvec),
1034			})
1035		}
1036		_ => unreachable!("negate_column should only be called with boolean columns"),
1037	})
1038	.unwrap()
1039}
1040
1041fn is_truthy(value: &Value) -> bool {
1042	match value {
1043		Value::Boolean(true) => true,
1044		Value::Boolean(false) => false,
1045		Value::None {
1046			..
1047		} => false,
1048		Value::Int1(0) | Value::Int2(0) | Value::Int4(0) | Value::Int8(0) | Value::Int16(0) => false,
1049		Value::Uint1(0) | Value::Uint2(0) | Value::Uint4(0) | Value::Uint8(0) | Value::Uint16(0) => false,
1050		Value::Int1(_) | Value::Int2(_) | Value::Int4(_) | Value::Int8(_) | Value::Int16(_) => true,
1051		Value::Uint1(_) | Value::Uint2(_) | Value::Uint4(_) | Value::Uint8(_) | Value::Uint16(_) => true,
1052		Value::Utf8(s) => !s.is_empty(),
1053		_ => true,
1054	}
1055}
1056
1057fn execute_if_multi(
1058	ctx: &EvalContext,
1059	condition: &CompiledExpr,
1060	then_expr: &[CompiledExpr],
1061	else_ifs: &[(CompiledExpr, Vec<CompiledExpr>)],
1062	else_branch: &Option<Vec<CompiledExpr>>,
1063	_fragment: &Fragment,
1064) -> Result<Vec<ColumnWithName>> {
1065	let condition_column = condition.execute(ctx)?;
1066
1067	let mut result_data: Option<Vec<ColumnBuffer>> = None;
1068	let mut result_names: Vec<Fragment> = Vec::new();
1069
1070	for row_idx in 0..ctx.row_count {
1071		let condition_value = condition_column.data().get_value(row_idx);
1072
1073		let branch_results = if is_truthy(&condition_value) {
1074			execute_multi_exprs(ctx, then_expr)?
1075		} else {
1076			let mut found_branch = false;
1077			let mut branch_columns = None;
1078
1079			for (else_if_condition, else_if_then) in else_ifs {
1080				let else_if_col = else_if_condition.execute(ctx)?;
1081				let else_if_value = else_if_col.data().get_value(row_idx);
1082
1083				if is_truthy(&else_if_value) {
1084					branch_columns = Some(execute_multi_exprs(ctx, else_if_then)?);
1085					found_branch = true;
1086					break;
1087				}
1088			}
1089
1090			if found_branch {
1091				branch_columns.unwrap()
1092			} else if let Some(else_exprs) = else_branch {
1093				execute_multi_exprs(ctx, else_exprs)?
1094			} else {
1095				vec![]
1096			}
1097		};
1098
1099		let is_empty_result = branch_results.is_empty();
1100		if is_empty_result {
1101			if let Some(data) = result_data.as_mut() {
1102				for col_data in data.iter_mut() {
1103					col_data.push_value(Value::none());
1104				}
1105			}
1106			continue;
1107		}
1108
1109		if result_data.is_none() {
1110			let mut data: Vec<ColumnBuffer> = branch_results
1111				.iter()
1112				.map(|col| ColumnBuffer::with_capacity(col.data().get_type(), ctx.row_count))
1113				.collect();
1114			for _ in 0..row_idx {
1115				for col_data in data.iter_mut() {
1116					col_data.push_value(Value::none());
1117				}
1118			}
1119			result_data = Some(data);
1120			result_names = branch_results.iter().map(|col| col.name.clone()).collect();
1121		}
1122
1123		let data = result_data.as_mut().unwrap();
1124		for (i, branch_col) in branch_results.iter().enumerate() {
1125			if i < data.len() {
1126				let branch_value = branch_col.data().get_value(row_idx);
1127				data[i].push_value(branch_value);
1128			}
1129		}
1130	}
1131
1132	let result_data = result_data.unwrap_or_default();
1133	let result: Vec<ColumnWithName> = result_data
1134		.into_iter()
1135		.enumerate()
1136		.map(|(i, data)| ColumnWithName {
1137			name: result_names.get(i).cloned().unwrap_or_else(|| Fragment::internal("column")),
1138			data,
1139		})
1140		.collect();
1141
1142	if result.is_empty() {
1143		Ok(vec![ColumnWithName {
1144			name: Fragment::internal("none"),
1145			data: ColumnBuffer::none_typed(Type::Boolean, ctx.row_count),
1146		}])
1147	} else {
1148		Ok(result)
1149	}
1150}
1151
1152fn execute_multi_exprs(ctx: &EvalContext, exprs: &[CompiledExpr]) -> Result<Vec<ColumnWithName>> {
1153	let mut result = Vec::new();
1154	for expr in exprs {
1155		result.extend(expr.execute_multi(ctx)?);
1156	}
1157	Ok(result)
1158}
1159
1160fn execute_projection_multi(ctx: &EvalContext, expressions: &[CompiledExpr]) -> Result<Vec<ColumnWithName>> {
1161	let mut result = Vec::with_capacity(expressions.len());
1162
1163	for expr in expressions {
1164		let column = expr.execute(ctx)?;
1165		let name = column.name.text().to_string();
1166		result.push(ColumnWithName::new(Fragment::internal(name), column.data));
1167	}
1168
1169	Ok(result)
1170}