vortex_array/expr/exprs.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Factory functions for creating [`Expression`]s from scalar function vtables.
5
6use std::sync::Arc;
7
8use vortex_error::VortexExpect;
9
10use crate::dtype::DType;
11use crate::dtype::FieldName;
12use crate::dtype::FieldNames;
13use crate::dtype::Nullability;
14use crate::expr::Expression;
15use crate::scalar::Scalar;
16use crate::scalar::ScalarValue;
17use crate::scalar_fn::EmptyOptions;
18use crate::scalar_fn::ScalarFnVTableExt;
19use crate::scalar_fn::fns::between::Between;
20use crate::scalar_fn::fns::between::BetweenOptions;
21use crate::scalar_fn::fns::binary::Binary;
22use crate::scalar_fn::fns::cast::Cast;
23use crate::scalar_fn::fns::dynamic::DynamicComparison;
24use crate::scalar_fn::fns::dynamic::DynamicComparisonExpr;
25use crate::scalar_fn::fns::dynamic::Rhs;
26use crate::scalar_fn::fns::fill_null::FillNull;
27use crate::scalar_fn::fns::get_item::GetItem;
28use crate::scalar_fn::fns::is_null::IsNull;
29use crate::scalar_fn::fns::like::Like;
30use crate::scalar_fn::fns::like::LikeOptions;
31use crate::scalar_fn::fns::list_contains::ListContains;
32use crate::scalar_fn::fns::literal::Literal;
33use crate::scalar_fn::fns::mask::Mask;
34use crate::scalar_fn::fns::merge::DuplicateHandling;
35use crate::scalar_fn::fns::merge::Merge;
36use crate::scalar_fn::fns::not::Not;
37use crate::scalar_fn::fns::operators::CompareOperator;
38use crate::scalar_fn::fns::operators::Operator;
39use crate::scalar_fn::fns::pack::Pack;
40use crate::scalar_fn::fns::pack::PackOptions;
41use crate::scalar_fn::fns::root::Root;
42use crate::scalar_fn::fns::select::FieldSelection;
43use crate::scalar_fn::fns::select::Select;
44use crate::scalar_fn::fns::zip::Zip;
45
46// ---- Root ----
47
48/// Creates an expression that references the root scope.
49///
50/// Returns the entire input array as passed to the expression evaluator.
51/// This is commonly used as the starting point for field access and other operations.
52pub fn root() -> Expression {
53 Root.try_new_expr(EmptyOptions, vec![])
54 .vortex_expect("Failed to create Root expression")
55}
56
57/// Return whether the expression is a root expression.
58pub fn is_root(expr: &Expression) -> bool {
59 expr.is::<Root>()
60}
61
62// ---- Literal ----
63
64/// Create a new `Literal` expression from a type that coerces to `Scalar`.
65///
66///
67/// ## Example usage
68///
69/// ```
70/// use vortex_array::arrays::PrimitiveArray;
71/// use vortex_array::dtype::Nullability;
72/// use vortex_array::expr::lit;
73/// use vortex_array::scalar_fn::fns::literal::Literal;
74/// use vortex_array::scalar::Scalar;
75///
76/// let number = lit(34i32);
77///
78/// let scalar = number.as_::<Literal>();
79/// assert_eq!(scalar, &Scalar::primitive(34i32, Nullability::NonNullable));
80/// ```
81pub fn lit(value: impl Into<Scalar>) -> Expression {
82 Literal.new_expr(value.into(), [])
83}
84
85// ---- GetItem / Col ----
86
87/// Creates an expression that accesses a field from the root array.
88///
89/// Equivalent to `get_item(field, root())` - extracts a named field from the input array.
90///
91/// ```rust
92/// # use vortex_array::expr::col;
93/// let expr = col("name");
94/// ```
95pub fn col(field: impl Into<FieldName>) -> Expression {
96 GetItem.new_expr(field.into(), vec![root()])
97}
98
99/// Creates an expression that extracts a named field from a struct expression.
100///
101/// Accesses the specified field from the result of the child expression.
102///
103/// ```rust
104/// # use vortex_array::expr::{get_item, root};
105/// let expr = get_item("user_id", root());
106/// ```
107pub fn get_item(field: impl Into<FieldName>, child: Expression) -> Expression {
108 GetItem.new_expr(field.into(), vec![child])
109}
110
111// ---- Binary operators ----
112
113/// Create a new [`Binary`] using the [`Eq`](Operator::Eq) operator.
114///
115/// ## Example usage
116///
117/// ```
118/// # use vortex_array::arrays::{BoolArray, PrimitiveArray};
119/// # use vortex_array::{Array, IntoArray, ToCanonical};
120/// # use vortex_array::validity::Validity;
121/// # use vortex_buffer::buffer;
122/// # use vortex_array::expr::{eq, root, lit};
123/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
124/// let result = xs.to_array().apply(&eq(root(), lit(3))).unwrap();
125///
126/// assert_eq!(
127/// result.to_bool().to_bit_buffer(),
128/// BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
129/// );
130/// ```
131pub fn eq(lhs: Expression, rhs: Expression) -> Expression {
132 Binary
133 .try_new_expr(Operator::Eq, [lhs, rhs])
134 .vortex_expect("Failed to create Eq binary expression")
135}
136
137/// Create a new [`Binary`] using the [`NotEq`](Operator::NotEq) operator.
138///
139/// ## Example usage
140///
141/// ```
142/// # use vortex_array::arrays::{BoolArray, PrimitiveArray};
143/// # use vortex_array::{Array, IntoArray, ToCanonical};
144/// # use vortex_array::validity::Validity;
145/// # use vortex_buffer::buffer;
146/// # use vortex_array::expr::{root, lit, not_eq};
147/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
148/// let result = xs.to_array().apply(¬_eq(root(), lit(3))).unwrap();
149///
150/// assert_eq!(
151/// result.to_bool().to_bit_buffer(),
152/// BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
153/// );
154/// ```
155pub fn not_eq(lhs: Expression, rhs: Expression) -> Expression {
156 Binary
157 .try_new_expr(Operator::NotEq, [lhs, rhs])
158 .vortex_expect("Failed to create NotEq binary expression")
159}
160
161/// Create a new [`Binary`] using the [`Gte`](Operator::Gte) operator.
162///
163/// ## Example usage
164///
165/// ```
166/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
167/// # use vortex_array::{Array, IntoArray, ToCanonical};
168/// # use vortex_array::validity::Validity;
169/// # use vortex_buffer::buffer;
170/// # use vortex_array::expr::{gt_eq, root, lit};
171/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
172/// let result = xs.to_array().apply(>_eq(root(), lit(3))).unwrap();
173///
174/// assert_eq!(
175/// result.to_bool().to_bit_buffer(),
176/// BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
177/// );
178/// ```
179pub fn gt_eq(lhs: Expression, rhs: Expression) -> Expression {
180 Binary
181 .try_new_expr(Operator::Gte, [lhs, rhs])
182 .vortex_expect("Failed to create Gte binary expression")
183}
184
185/// Create a new [`Binary`] using the [`Gt`](Operator::Gt) operator.
186///
187/// ## Example usage
188///
189/// ```
190/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
191/// # use vortex_array::{Array, IntoArray, ToCanonical};
192/// # use vortex_array::validity::Validity;
193/// # use vortex_buffer::buffer;
194/// # use vortex_array::expr::{gt, root, lit};
195/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
196/// let result = xs.to_array().apply(>(root(), lit(2))).unwrap();
197///
198/// assert_eq!(
199/// result.to_bool().to_bit_buffer(),
200/// BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
201/// );
202/// ```
203pub fn gt(lhs: Expression, rhs: Expression) -> Expression {
204 Binary
205 .try_new_expr(Operator::Gt, [lhs, rhs])
206 .vortex_expect("Failed to create Gt binary expression")
207}
208
209/// Create a new [`Binary`] using the [`Lte`](Operator::Lte) operator.
210///
211/// ## Example usage
212///
213/// ```
214/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
215/// # use vortex_array::{Array, IntoArray, ToCanonical};
216/// # use vortex_array::validity::Validity;
217/// # use vortex_buffer::buffer;
218/// # use vortex_array::expr::{root, lit, lt_eq};
219/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
220/// let result = xs.to_array().apply(<_eq(root(), lit(2))).unwrap();
221///
222/// assert_eq!(
223/// result.to_bool().to_bit_buffer(),
224/// BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
225/// );
226/// ```
227pub fn lt_eq(lhs: Expression, rhs: Expression) -> Expression {
228 Binary
229 .try_new_expr(Operator::Lte, [lhs, rhs])
230 .vortex_expect("Failed to create Lte binary expression")
231}
232
233/// Create a new [`Binary`] using the [`Lt`](Operator::Lt) operator.
234///
235/// ## Example usage
236///
237/// ```
238/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
239/// # use vortex_array::{Array, IntoArray, ToCanonical};
240/// # use vortex_array::validity::Validity;
241/// # use vortex_buffer::buffer;
242/// # use vortex_array::expr::{root, lit, lt};
243/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
244/// let result = xs.to_array().apply(<(root(), lit(3))).unwrap();
245///
246/// assert_eq!(
247/// result.to_bool().to_bit_buffer(),
248/// BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
249/// );
250/// ```
251pub fn lt(lhs: Expression, rhs: Expression) -> Expression {
252 Binary
253 .try_new_expr(Operator::Lt, [lhs, rhs])
254 .vortex_expect("Failed to create Lt binary expression")
255}
256
257/// Create a new [`Binary`] using the [`Or`](Operator::Or) operator.
258///
259/// ## Example usage
260///
261/// ```
262/// # use vortex_array::arrays::BoolArray;
263/// # use vortex_array::{Array, IntoArray, ToCanonical};
264/// # use vortex_array::expr::{root, lit, or};
265/// let xs = BoolArray::from_iter(vec![true, false, true]);
266/// let result = xs.to_array().apply(&or(root(), lit(false))).unwrap();
267///
268/// assert_eq!(
269/// result.to_bool().to_bit_buffer(),
270/// BoolArray::from_iter(vec![true, false, true]).to_bit_buffer(),
271/// );
272/// ```
273pub fn or(lhs: Expression, rhs: Expression) -> Expression {
274 Binary
275 .try_new_expr(Operator::Or, [lhs, rhs])
276 .vortex_expect("Failed to create Or binary expression")
277}
278
279/// Collects a list of `or`ed values into a single expression using a balanced tree.
280///
281/// This creates a balanced binary tree to avoid deep nesting that could cause
282/// stack overflow during drop or evaluation.
283///
284/// [a, b, c, d] => or(or(a, b), or(c, d))
285pub fn or_collect<I>(iter: I) -> Option<Expression>
286where
287 I: IntoIterator<Item = Expression>,
288{
289 let exprs: Vec<_> = iter.into_iter().collect();
290 balanced_reduce(exprs, or)
291}
292
293/// Create a new [`Binary`] using the [`And`](Operator::And) operator.
294///
295/// ## Example usage
296///
297/// ```
298/// # use vortex_array::arrays::BoolArray;
299/// # use vortex_array::{Array, IntoArray, ToCanonical};
300/// # use vortex_array::expr::{and, root, lit};
301/// let xs = BoolArray::from_iter(vec![true, false, true]);
302/// let result = xs.to_array().apply(&and(root(), lit(true))).unwrap();
303///
304/// assert_eq!(
305/// result.to_bool().to_bit_buffer(),
306/// BoolArray::from_iter(vec![true, false, true]).to_bit_buffer(),
307/// );
308/// ```
309pub fn and(lhs: Expression, rhs: Expression) -> Expression {
310 Binary
311 .try_new_expr(Operator::And, [lhs, rhs])
312 .vortex_expect("Failed to create And binary expression")
313}
314
315/// Collects a list of `and`ed values into a single expression using a balanced tree.
316///
317/// This creates a balanced binary tree to avoid deep nesting that could cause
318/// stack overflow during drop or evaluation.
319///
320/// [a, b, c, d] => and(and(a, b), and(c, d))
321pub fn and_collect<I>(iter: I) -> Option<Expression>
322where
323 I: IntoIterator<Item = Expression>,
324{
325 let exprs: Vec<_> = iter.into_iter().collect();
326 balanced_reduce(exprs, and)
327}
328
329/// Helper function to reduce a list of expressions into a balanced binary tree.
330fn balanced_reduce<F>(mut exprs: Vec<Expression>, combine: F) -> Option<Expression>
331where
332 F: Fn(Expression, Expression) -> Expression + Copy,
333{
334 if exprs.is_empty() {
335 return None;
336 }
337 if exprs.len() == 1 {
338 return exprs.pop();
339 }
340
341 while exprs.len() > 1 {
342 let exprs_len = exprs.len();
343
344 for target_idx in 0..(exprs.len() / 2) {
345 let item_idx = target_idx * 2;
346 let new = combine(exprs[item_idx].clone(), exprs[item_idx + 1].clone());
347 exprs[target_idx] = new;
348 }
349
350 if !exprs.len().is_multiple_of(2) {
351 // We want the odd nodes to be inside the tree and not at root
352 let lhs = exprs[(exprs.len() / 2) - 1].clone();
353 let rhs = exprs[exprs.len() - 1].clone();
354 exprs[exprs_len / 2 - 1] = combine(lhs, rhs);
355 }
356
357 exprs.truncate(exprs_len / 2);
358 }
359
360 exprs.pop()
361}
362
363/// Create a new [`Binary`] using the [`Add`](Operator::Add) operator.
364///
365/// ## Example usage
366///
367/// ```
368/// # use vortex_array::{Array, IntoArray};
369/// # use vortex_array::arrow::IntoArrowArray as _;
370/// # use vortex_buffer::buffer;
371/// # use vortex_array::expr::{checked_add, lit, root};
372/// let xs = buffer![1, 2, 3].into_array();
373/// let result = xs.apply(&checked_add(root(), lit(5))).unwrap();
374///
375/// assert_eq!(
376/// &result.into_arrow_preferred().unwrap(),
377/// &buffer![6, 7, 8]
378/// .into_array()
379/// .into_arrow_preferred()
380/// .unwrap()
381/// );
382/// ```
383pub fn checked_add(lhs: Expression, rhs: Expression) -> Expression {
384 Binary
385 .try_new_expr(Operator::Add, [lhs, rhs])
386 .vortex_expect("Failed to create Add binary expression")
387}
388
389// ---- Not ----
390
391/// Creates an expression that logically inverts boolean values.
392///
393/// Returns the logical negation of the input boolean expression.
394///
395/// ```rust
396/// # use vortex_array::expr::{not, root};
397/// let expr = not(root());
398/// ```
399pub fn not(operand: Expression) -> Expression {
400 Not.new_expr(EmptyOptions, vec![operand])
401}
402
403// ---- Between ----
404
405/// Creates an expression that checks if values are between two bounds.
406///
407/// Returns a boolean array indicating which values fall within the specified range.
408/// The comparison strictness is controlled by the options parameter.
409///
410/// ```rust
411/// # use vortex_array::scalar_fn::fns::between::BetweenOptions;
412/// # use vortex_array::scalar_fn::fns::between::StrictComparison;
413/// # use vortex_array::expr::{between, lit, root};
414/// let opts = BetweenOptions {
415/// lower_strict: StrictComparison::NonStrict,
416/// upper_strict: StrictComparison::NonStrict,
417/// };
418/// let expr = between(root(), lit(10), lit(20), opts);
419/// ```
420pub fn between(
421 arr: Expression,
422 lower: Expression,
423 upper: Expression,
424 options: BetweenOptions,
425) -> Expression {
426 Between
427 .try_new_expr(options, [arr, lower, upper])
428 .vortex_expect("Failed to create Between expression")
429}
430
431// ---- Select ----
432
433/// Creates an expression that selects (includes) specific fields from an array.
434///
435/// Projects only the specified fields from the child expression, which must be of DType struct.
436/// ```rust
437/// # use vortex_array::expr::{select, root};
438/// let expr = select(["name", "age"], root());
439/// ```
440pub fn select(field_names: impl Into<FieldNames>, child: Expression) -> Expression {
441 Select
442 .try_new_expr(FieldSelection::Include(field_names.into()), [child])
443 .vortex_expect("Failed to create Select expression")
444}
445
446/// Creates an expression that excludes specific fields from an array.
447///
448/// Projects all fields except the specified ones from the input struct expression.
449///
450/// ```rust
451/// # use vortex_array::expr::{select_exclude, root};
452/// let expr = select_exclude(["internal_id", "metadata"], root());
453/// ```
454pub fn select_exclude(fields: impl Into<FieldNames>, child: Expression) -> Expression {
455 Select
456 .try_new_expr(FieldSelection::Exclude(fields.into()), [child])
457 .vortex_expect("Failed to create Select expression")
458}
459
460// ---- Pack ----
461
462/// Creates an expression that packs values into a struct with named fields.
463///
464/// ```rust
465/// # use vortex_array::dtype::Nullability;
466/// # use vortex_array::expr::{pack, col, lit};
467/// let expr = pack([("id", col("user_id")), ("constant", lit(42))], Nullability::NonNullable);
468/// ```
469pub fn pack(
470 elements: impl IntoIterator<Item = (impl Into<FieldName>, Expression)>,
471 nullability: Nullability,
472) -> Expression {
473 let (names, values): (Vec<_>, Vec<_>) = elements
474 .into_iter()
475 .map(|(name, value)| (name.into(), value))
476 .unzip();
477 Pack.new_expr(
478 PackOptions {
479 names: names.into(),
480 nullability,
481 },
482 values,
483 )
484}
485
486// ---- Cast ----
487
488/// Creates an expression that casts values to a target data type.
489///
490/// Converts the input expression's values to the specified target type.
491///
492/// ```rust
493/// # use vortex_array::dtype::{DType, Nullability, PType};
494/// # use vortex_array::expr::{cast, root};
495/// let expr = cast(root(), DType::Primitive(PType::I64, Nullability::NonNullable));
496/// ```
497pub fn cast(child: Expression, target: DType) -> Expression {
498 Cast.try_new_expr(target, [child])
499 .vortex_expect("Failed to create Cast expression")
500}
501
502// ---- FillNull ----
503
504/// Creates an expression that replaces null values with a fill value.
505///
506/// ```rust
507/// # use vortex_array::expr::{fill_null, root, lit};
508/// let expr = fill_null(root(), lit(0i32));
509/// ```
510pub fn fill_null(child: Expression, fill_value: Expression) -> Expression {
511 FillNull.new_expr(EmptyOptions, [child, fill_value])
512}
513
514// ---- IsNull ----
515
516/// Creates an expression that checks for null values.
517///
518/// Returns a boolean array indicating which positions contain null values.
519///
520/// ```rust
521/// # use vortex_array::expr::{is_null, root};
522/// let expr = is_null(root());
523/// ```
524pub fn is_null(child: Expression) -> Expression {
525 IsNull.new_expr(EmptyOptions, vec![child])
526}
527
528// ---- Like ----
529
530/// Creates a SQL LIKE expression.
531pub fn like(child: Expression, pattern: Expression) -> Expression {
532 Like.new_expr(
533 LikeOptions {
534 negated: false,
535 case_insensitive: false,
536 },
537 [child, pattern],
538 )
539}
540
541/// Creates a case-insensitive SQL ILIKE expression.
542pub fn ilike(child: Expression, pattern: Expression) -> Expression {
543 Like.new_expr(
544 LikeOptions {
545 negated: false,
546 case_insensitive: true,
547 },
548 [child, pattern],
549 )
550}
551
552/// Creates a negated SQL NOT LIKE expression.
553pub fn not_like(child: Expression, pattern: Expression) -> Expression {
554 Like.new_expr(
555 LikeOptions {
556 negated: true,
557 case_insensitive: false,
558 },
559 [child, pattern],
560 )
561}
562
563/// Creates a negated case-insensitive SQL NOT ILIKE expression.
564pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
565 Like.new_expr(
566 LikeOptions {
567 negated: true,
568 case_insensitive: true,
569 },
570 [child, pattern],
571 )
572}
573
574// ---- Mask ----
575
576/// Creates a mask expression that applies the given boolean mask to the input array.
577pub fn mask(array: Expression, mask: Expression) -> Expression {
578 Mask.new_expr(EmptyOptions, [array, mask])
579}
580
581// ---- Merge ----
582
583/// Creates an expression that merges struct expressions into a single struct.
584///
585/// Combines fields from all input expressions. If field names are duplicated,
586/// later expressions win. Fields are not recursively merged.
587///
588/// ```rust
589/// # use vortex_array::dtype::Nullability;
590/// # use vortex_array::expr::{merge, get_item, root};
591/// let expr = merge([get_item("a", root()), get_item("b", root())]);
592/// ```
593pub fn merge(elements: impl IntoIterator<Item = impl Into<Expression>>) -> Expression {
594 use itertools::Itertools as _;
595 let values = elements.into_iter().map(|value| value.into()).collect_vec();
596 Merge.new_expr(DuplicateHandling::default(), values)
597}
598
599/// Creates a merge expression with explicit duplicate handling.
600pub fn merge_opts(
601 elements: impl IntoIterator<Item = impl Into<Expression>>,
602 duplicate_handling: DuplicateHandling,
603) -> Expression {
604 use itertools::Itertools as _;
605 let values = elements.into_iter().map(|value| value.into()).collect_vec();
606 Merge.new_expr(duplicate_handling, values)
607}
608
609// ---- Zip ----
610
611/// Creates a zip expression that conditionally selects between two arrays.
612///
613/// ```rust
614/// # use vortex_array::expr::{zip_expr, root, lit};
615/// let expr = zip_expr(root(), lit(0i32), lit(true));
616/// ```
617pub fn zip_expr(if_true: Expression, if_false: Expression, mask: Expression) -> Expression {
618 Zip.new_expr(EmptyOptions, [if_true, if_false, mask])
619}
620
621// ---- Dynamic ----
622
623/// Creates a dynamic comparison expression.
624pub fn dynamic(
625 operator: CompareOperator,
626 rhs_value: impl Fn() -> Option<ScalarValue> + Send + Sync + 'static,
627 rhs_dtype: DType,
628 default: bool,
629 lhs: Expression,
630) -> Expression {
631 DynamicComparison.new_expr(
632 DynamicComparisonExpr {
633 operator,
634 rhs: Arc::new(Rhs {
635 value: Arc::new(rhs_value),
636 dtype: rhs_dtype,
637 }),
638 default,
639 },
640 [lhs],
641 )
642}
643
644// ---- ListContains ----
645
646/// Creates an expression that checks if a value is contained in a list.
647///
648/// Returns a boolean array indicating whether the value appears in each list.
649///
650/// ```rust
651/// # use vortex_array::expr::{list_contains, lit, root};
652/// let expr = list_contains(root(), lit(42));
653/// ```
654pub fn list_contains(list: Expression, value: Expression) -> Expression {
655 ListContains.new_expr(EmptyOptions, [list, value])
656}