Skip to main content

vortex_array/expr/
exprs.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Factory functions for creating [`Expression`]s from scalar function vtables.
5
6use std::sync::Arc;
7
8use vortex_error::VortexExpect;
9
10use crate::dtype::DType;
11use crate::dtype::FieldName;
12use crate::dtype::FieldNames;
13use crate::dtype::Nullability;
14use crate::expr::Expression;
15use crate::scalar::Scalar;
16use crate::scalar::ScalarValue;
17use crate::scalar_fn::EmptyOptions;
18use crate::scalar_fn::ScalarFnVTableExt;
19use crate::scalar_fn::fns::between::Between;
20use crate::scalar_fn::fns::between::BetweenOptions;
21use crate::scalar_fn::fns::binary::Binary;
22use crate::scalar_fn::fns::cast::Cast;
23use crate::scalar_fn::fns::dynamic::DynamicComparison;
24use crate::scalar_fn::fns::dynamic::DynamicComparisonExpr;
25use crate::scalar_fn::fns::dynamic::Rhs;
26use crate::scalar_fn::fns::fill_null::FillNull;
27use crate::scalar_fn::fns::get_item::GetItem;
28use crate::scalar_fn::fns::is_null::IsNull;
29use crate::scalar_fn::fns::like::Like;
30use crate::scalar_fn::fns::like::LikeOptions;
31use crate::scalar_fn::fns::list_contains::ListContains;
32use crate::scalar_fn::fns::literal::Literal;
33use crate::scalar_fn::fns::mask::Mask;
34use crate::scalar_fn::fns::merge::DuplicateHandling;
35use crate::scalar_fn::fns::merge::Merge;
36use crate::scalar_fn::fns::not::Not;
37use crate::scalar_fn::fns::operators::CompareOperator;
38use crate::scalar_fn::fns::operators::Operator;
39use crate::scalar_fn::fns::pack::Pack;
40use crate::scalar_fn::fns::pack::PackOptions;
41use crate::scalar_fn::fns::root::Root;
42use crate::scalar_fn::fns::select::FieldSelection;
43use crate::scalar_fn::fns::select::Select;
44use crate::scalar_fn::fns::zip::Zip;
45
46// ---- Root ----
47
48/// Creates an expression that references the root scope.
49///
50/// Returns the entire input array as passed to the expression evaluator.
51/// This is commonly used as the starting point for field access and other operations.
52pub fn root() -> Expression {
53    Root.try_new_expr(EmptyOptions, vec![])
54        .vortex_expect("Failed to create Root expression")
55}
56
57/// Return whether the expression is a root expression.
58pub fn is_root(expr: &Expression) -> bool {
59    expr.is::<Root>()
60}
61
62// ---- Literal ----
63
64/// Create a new `Literal` expression from a type that coerces to `Scalar`.
65///
66///
67/// ## Example usage
68///
69/// ```
70/// use vortex_array::arrays::PrimitiveArray;
71/// use vortex_array::dtype::Nullability;
72/// use vortex_array::expr::lit;
73/// use vortex_array::scalar_fn::fns::literal::Literal;
74/// use vortex_array::scalar::Scalar;
75///
76/// let number = lit(34i32);
77///
78/// let scalar = number.as_::<Literal>();
79/// assert_eq!(scalar, &Scalar::primitive(34i32, Nullability::NonNullable));
80/// ```
81pub fn lit(value: impl Into<Scalar>) -> Expression {
82    Literal.new_expr(value.into(), [])
83}
84
85// ---- GetItem / Col ----
86
87/// Creates an expression that accesses a field from the root array.
88///
89/// Equivalent to `get_item(field, root())` - extracts a named field from the input array.
90///
91/// ```rust
92/// # use vortex_array::expr::col;
93/// let expr = col("name");
94/// ```
95pub fn col(field: impl Into<FieldName>) -> Expression {
96    GetItem.new_expr(field.into(), vec![root()])
97}
98
99/// Creates an expression that extracts a named field from a struct expression.
100///
101/// Accesses the specified field from the result of the child expression.
102///
103/// ```rust
104/// # use vortex_array::expr::{get_item, root};
105/// let expr = get_item("user_id", root());
106/// ```
107pub fn get_item(field: impl Into<FieldName>, child: Expression) -> Expression {
108    GetItem.new_expr(field.into(), vec![child])
109}
110
111// ---- Binary operators ----
112
113/// Create a new [`Binary`] using the [`Eq`](Operator::Eq) operator.
114///
115/// ## Example usage
116///
117/// ```
118/// # use vortex_array::arrays::{BoolArray, PrimitiveArray};
119/// # use vortex_array::{Array, IntoArray, ToCanonical};
120/// # use vortex_array::validity::Validity;
121/// # use vortex_buffer::buffer;
122/// # use vortex_array::expr::{eq, root, lit};
123/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
124/// let result = xs.to_array().apply(&eq(root(), lit(3))).unwrap();
125///
126/// assert_eq!(
127///     result.to_bool().to_bit_buffer(),
128///     BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
129/// );
130/// ```
131pub fn eq(lhs: Expression, rhs: Expression) -> Expression {
132    Binary
133        .try_new_expr(Operator::Eq, [lhs, rhs])
134        .vortex_expect("Failed to create Eq binary expression")
135}
136
137/// Create a new [`Binary`] using the [`NotEq`](Operator::NotEq) operator.
138///
139/// ## Example usage
140///
141/// ```
142/// # use vortex_array::arrays::{BoolArray, PrimitiveArray};
143/// # use vortex_array::{Array, IntoArray, ToCanonical};
144/// # use vortex_array::validity::Validity;
145/// # use vortex_buffer::buffer;
146/// # use vortex_array::expr::{root, lit, not_eq};
147/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
148/// let result = xs.to_array().apply(&not_eq(root(), lit(3))).unwrap();
149///
150/// assert_eq!(
151///     result.to_bool().to_bit_buffer(),
152///     BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
153/// );
154/// ```
155pub fn not_eq(lhs: Expression, rhs: Expression) -> Expression {
156    Binary
157        .try_new_expr(Operator::NotEq, [lhs, rhs])
158        .vortex_expect("Failed to create NotEq binary expression")
159}
160
161/// Create a new [`Binary`] using the [`Gte`](Operator::Gte) operator.
162///
163/// ## Example usage
164///
165/// ```
166/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
167/// # use vortex_array::{Array, IntoArray, ToCanonical};
168/// # use vortex_array::validity::Validity;
169/// # use vortex_buffer::buffer;
170/// # use vortex_array::expr::{gt_eq, root, lit};
171/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
172/// let result = xs.to_array().apply(&gt_eq(root(), lit(3))).unwrap();
173///
174/// assert_eq!(
175///     result.to_bool().to_bit_buffer(),
176///     BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
177/// );
178/// ```
179pub fn gt_eq(lhs: Expression, rhs: Expression) -> Expression {
180    Binary
181        .try_new_expr(Operator::Gte, [lhs, rhs])
182        .vortex_expect("Failed to create Gte binary expression")
183}
184
185/// Create a new [`Binary`] using the [`Gt`](Operator::Gt) operator.
186///
187/// ## Example usage
188///
189/// ```
190/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
191/// # use vortex_array::{Array, IntoArray, ToCanonical};
192/// # use vortex_array::validity::Validity;
193/// # use vortex_buffer::buffer;
194/// # use vortex_array::expr::{gt, root, lit};
195/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
196/// let result = xs.to_array().apply(&gt(root(), lit(2))).unwrap();
197///
198/// assert_eq!(
199///     result.to_bool().to_bit_buffer(),
200///     BoolArray::from_iter(vec![false, false, true]).to_bit_buffer(),
201/// );
202/// ```
203pub fn gt(lhs: Expression, rhs: Expression) -> Expression {
204    Binary
205        .try_new_expr(Operator::Gt, [lhs, rhs])
206        .vortex_expect("Failed to create Gt binary expression")
207}
208
209/// Create a new [`Binary`] using the [`Lte`](Operator::Lte) operator.
210///
211/// ## Example usage
212///
213/// ```
214/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
215/// # use vortex_array::{Array, IntoArray, ToCanonical};
216/// # use vortex_array::validity::Validity;
217/// # use vortex_buffer::buffer;
218/// # use vortex_array::expr::{root, lit, lt_eq};
219/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
220/// let result = xs.to_array().apply(&lt_eq(root(), lit(2))).unwrap();
221///
222/// assert_eq!(
223///     result.to_bool().to_bit_buffer(),
224///     BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
225/// );
226/// ```
227pub fn lt_eq(lhs: Expression, rhs: Expression) -> Expression {
228    Binary
229        .try_new_expr(Operator::Lte, [lhs, rhs])
230        .vortex_expect("Failed to create Lte binary expression")
231}
232
233/// Create a new [`Binary`] using the [`Lt`](Operator::Lt) operator.
234///
235/// ## Example usage
236///
237/// ```
238/// # use vortex_array::arrays::{BoolArray, PrimitiveArray };
239/// # use vortex_array::{Array, IntoArray, ToCanonical};
240/// # use vortex_array::validity::Validity;
241/// # use vortex_buffer::buffer;
242/// # use vortex_array::expr::{root, lit, lt};
243/// let xs = PrimitiveArray::new(buffer![1i32, 2i32, 3i32], Validity::NonNullable);
244/// let result = xs.to_array().apply(&lt(root(), lit(3))).unwrap();
245///
246/// assert_eq!(
247///     result.to_bool().to_bit_buffer(),
248///     BoolArray::from_iter(vec![true, true, false]).to_bit_buffer(),
249/// );
250/// ```
251pub fn lt(lhs: Expression, rhs: Expression) -> Expression {
252    Binary
253        .try_new_expr(Operator::Lt, [lhs, rhs])
254        .vortex_expect("Failed to create Lt binary expression")
255}
256
257/// Create a new [`Binary`] using the [`Or`](Operator::Or) operator.
258///
259/// ## Example usage
260///
261/// ```
262/// # use vortex_array::arrays::BoolArray;
263/// # use vortex_array::{Array, IntoArray, ToCanonical};
264/// # use vortex_array::expr::{root, lit, or};
265/// let xs = BoolArray::from_iter(vec![true, false, true]);
266/// let result = xs.to_array().apply(&or(root(), lit(false))).unwrap();
267///
268/// assert_eq!(
269///     result.to_bool().to_bit_buffer(),
270///     BoolArray::from_iter(vec![true, false, true]).to_bit_buffer(),
271/// );
272/// ```
273pub fn or(lhs: Expression, rhs: Expression) -> Expression {
274    Binary
275        .try_new_expr(Operator::Or, [lhs, rhs])
276        .vortex_expect("Failed to create Or binary expression")
277}
278
279/// Collects a list of `or`ed values into a single expression using a balanced tree.
280///
281/// This creates a balanced binary tree to avoid deep nesting that could cause
282/// stack overflow during drop or evaluation.
283///
284/// [a, b, c, d] => or(or(a, b), or(c, d))
285pub fn or_collect<I>(iter: I) -> Option<Expression>
286where
287    I: IntoIterator<Item = Expression>,
288{
289    let exprs: Vec<_> = iter.into_iter().collect();
290    balanced_reduce(exprs, or)
291}
292
293/// Create a new [`Binary`] using the [`And`](Operator::And) operator.
294///
295/// ## Example usage
296///
297/// ```
298/// # use vortex_array::arrays::BoolArray;
299/// # use vortex_array::{Array, IntoArray, ToCanonical};
300/// # use vortex_array::expr::{and, root, lit};
301/// let xs = BoolArray::from_iter(vec![true, false, true]);
302/// let result = xs.to_array().apply(&and(root(), lit(true))).unwrap();
303///
304/// assert_eq!(
305///     result.to_bool().to_bit_buffer(),
306///     BoolArray::from_iter(vec![true, false, true]).to_bit_buffer(),
307/// );
308/// ```
309pub fn and(lhs: Expression, rhs: Expression) -> Expression {
310    Binary
311        .try_new_expr(Operator::And, [lhs, rhs])
312        .vortex_expect("Failed to create And binary expression")
313}
314
315/// Collects a list of `and`ed values into a single expression using a balanced tree.
316///
317/// This creates a balanced binary tree to avoid deep nesting that could cause
318/// stack overflow during drop or evaluation.
319///
320/// [a, b, c, d] => and(and(a, b), and(c, d))
321pub fn and_collect<I>(iter: I) -> Option<Expression>
322where
323    I: IntoIterator<Item = Expression>,
324{
325    let exprs: Vec<_> = iter.into_iter().collect();
326    balanced_reduce(exprs, and)
327}
328
329/// Helper function to reduce a list of expressions into a balanced binary tree.
330fn balanced_reduce<F>(mut exprs: Vec<Expression>, combine: F) -> Option<Expression>
331where
332    F: Fn(Expression, Expression) -> Expression + Copy,
333{
334    if exprs.is_empty() {
335        return None;
336    }
337    if exprs.len() == 1 {
338        return exprs.pop();
339    }
340
341    while exprs.len() > 1 {
342        let exprs_len = exprs.len();
343
344        for target_idx in 0..(exprs.len() / 2) {
345            let item_idx = target_idx * 2;
346            let new = combine(exprs[item_idx].clone(), exprs[item_idx + 1].clone());
347            exprs[target_idx] = new;
348        }
349
350        if !exprs.len().is_multiple_of(2) {
351            // We want the odd nodes to be inside the tree and not at root
352            let lhs = exprs[(exprs.len() / 2) - 1].clone();
353            let rhs = exprs[exprs.len() - 1].clone();
354            exprs[exprs_len / 2 - 1] = combine(lhs, rhs);
355        }
356
357        exprs.truncate(exprs_len / 2);
358    }
359
360    exprs.pop()
361}
362
363/// Create a new [`Binary`] using the [`Add`](Operator::Add) operator.
364///
365/// ## Example usage
366///
367/// ```
368/// # use vortex_array::{Array, IntoArray};
369/// # use vortex_array::arrow::IntoArrowArray as _;
370/// # use vortex_buffer::buffer;
371/// # use vortex_array::expr::{checked_add, lit, root};
372/// let xs = buffer![1, 2, 3].into_array();
373/// let result = xs.apply(&checked_add(root(), lit(5))).unwrap();
374///
375/// assert_eq!(
376///     &result.into_arrow_preferred().unwrap(),
377///     &buffer![6, 7, 8]
378///         .into_array()
379///         .into_arrow_preferred()
380///         .unwrap()
381/// );
382/// ```
383pub fn checked_add(lhs: Expression, rhs: Expression) -> Expression {
384    Binary
385        .try_new_expr(Operator::Add, [lhs, rhs])
386        .vortex_expect("Failed to create Add binary expression")
387}
388
389// ---- Not ----
390
391/// Creates an expression that logically inverts boolean values.
392///
393/// Returns the logical negation of the input boolean expression.
394///
395/// ```rust
396/// # use vortex_array::expr::{not, root};
397/// let expr = not(root());
398/// ```
399pub fn not(operand: Expression) -> Expression {
400    Not.new_expr(EmptyOptions, vec![operand])
401}
402
403// ---- Between ----
404
405/// Creates an expression that checks if values are between two bounds.
406///
407/// Returns a boolean array indicating which values fall within the specified range.
408/// The comparison strictness is controlled by the options parameter.
409///
410/// ```rust
411/// # use vortex_array::scalar_fn::fns::between::BetweenOptions;
412/// # use vortex_array::scalar_fn::fns::between::StrictComparison;
413/// # use vortex_array::expr::{between, lit, root};
414/// let opts = BetweenOptions {
415///     lower_strict: StrictComparison::NonStrict,
416///     upper_strict: StrictComparison::NonStrict,
417/// };
418/// let expr = between(root(), lit(10), lit(20), opts);
419/// ```
420pub fn between(
421    arr: Expression,
422    lower: Expression,
423    upper: Expression,
424    options: BetweenOptions,
425) -> Expression {
426    Between
427        .try_new_expr(options, [arr, lower, upper])
428        .vortex_expect("Failed to create Between expression")
429}
430
431// ---- Select ----
432
433/// Creates an expression that selects (includes) specific fields from an array.
434///
435/// Projects only the specified fields from the child expression, which must be of DType struct.
436/// ```rust
437/// # use vortex_array::expr::{select, root};
438/// let expr = select(["name", "age"], root());
439/// ```
440pub fn select(field_names: impl Into<FieldNames>, child: Expression) -> Expression {
441    Select
442        .try_new_expr(FieldSelection::Include(field_names.into()), [child])
443        .vortex_expect("Failed to create Select expression")
444}
445
446/// Creates an expression that excludes specific fields from an array.
447///
448/// Projects all fields except the specified ones from the input struct expression.
449///
450/// ```rust
451/// # use vortex_array::expr::{select_exclude, root};
452/// let expr = select_exclude(["internal_id", "metadata"], root());
453/// ```
454pub fn select_exclude(fields: impl Into<FieldNames>, child: Expression) -> Expression {
455    Select
456        .try_new_expr(FieldSelection::Exclude(fields.into()), [child])
457        .vortex_expect("Failed to create Select expression")
458}
459
460// ---- Pack ----
461
462/// Creates an expression that packs values into a struct with named fields.
463///
464/// ```rust
465/// # use vortex_array::dtype::Nullability;
466/// # use vortex_array::expr::{pack, col, lit};
467/// let expr = pack([("id", col("user_id")), ("constant", lit(42))], Nullability::NonNullable);
468/// ```
469pub fn pack(
470    elements: impl IntoIterator<Item = (impl Into<FieldName>, Expression)>,
471    nullability: Nullability,
472) -> Expression {
473    let (names, values): (Vec<_>, Vec<_>) = elements
474        .into_iter()
475        .map(|(name, value)| (name.into(), value))
476        .unzip();
477    Pack.new_expr(
478        PackOptions {
479            names: names.into(),
480            nullability,
481        },
482        values,
483    )
484}
485
486// ---- Cast ----
487
488/// Creates an expression that casts values to a target data type.
489///
490/// Converts the input expression's values to the specified target type.
491///
492/// ```rust
493/// # use vortex_array::dtype::{DType, Nullability, PType};
494/// # use vortex_array::expr::{cast, root};
495/// let expr = cast(root(), DType::Primitive(PType::I64, Nullability::NonNullable));
496/// ```
497pub fn cast(child: Expression, target: DType) -> Expression {
498    Cast.try_new_expr(target, [child])
499        .vortex_expect("Failed to create Cast expression")
500}
501
502// ---- FillNull ----
503
504/// Creates an expression that replaces null values with a fill value.
505///
506/// ```rust
507/// # use vortex_array::expr::{fill_null, root, lit};
508/// let expr = fill_null(root(), lit(0i32));
509/// ```
510pub fn fill_null(child: Expression, fill_value: Expression) -> Expression {
511    FillNull.new_expr(EmptyOptions, [child, fill_value])
512}
513
514// ---- IsNull ----
515
516/// Creates an expression that checks for null values.
517///
518/// Returns a boolean array indicating which positions contain null values.
519///
520/// ```rust
521/// # use vortex_array::expr::{is_null, root};
522/// let expr = is_null(root());
523/// ```
524pub fn is_null(child: Expression) -> Expression {
525    IsNull.new_expr(EmptyOptions, vec![child])
526}
527
528// ---- Like ----
529
530/// Creates a SQL LIKE expression.
531pub fn like(child: Expression, pattern: Expression) -> Expression {
532    Like.new_expr(
533        LikeOptions {
534            negated: false,
535            case_insensitive: false,
536        },
537        [child, pattern],
538    )
539}
540
541/// Creates a case-insensitive SQL ILIKE expression.
542pub fn ilike(child: Expression, pattern: Expression) -> Expression {
543    Like.new_expr(
544        LikeOptions {
545            negated: false,
546            case_insensitive: true,
547        },
548        [child, pattern],
549    )
550}
551
552/// Creates a negated SQL NOT LIKE expression.
553pub fn not_like(child: Expression, pattern: Expression) -> Expression {
554    Like.new_expr(
555        LikeOptions {
556            negated: true,
557            case_insensitive: false,
558        },
559        [child, pattern],
560    )
561}
562
563/// Creates a negated case-insensitive SQL NOT ILIKE expression.
564pub fn not_ilike(child: Expression, pattern: Expression) -> Expression {
565    Like.new_expr(
566        LikeOptions {
567            negated: true,
568            case_insensitive: true,
569        },
570        [child, pattern],
571    )
572}
573
574// ---- Mask ----
575
576/// Creates a mask expression that applies the given boolean mask to the input array.
577pub fn mask(array: Expression, mask: Expression) -> Expression {
578    Mask.new_expr(EmptyOptions, [array, mask])
579}
580
581// ---- Merge ----
582
583/// Creates an expression that merges struct expressions into a single struct.
584///
585/// Combines fields from all input expressions. If field names are duplicated,
586/// later expressions win. Fields are not recursively merged.
587///
588/// ```rust
589/// # use vortex_array::dtype::Nullability;
590/// # use vortex_array::expr::{merge, get_item, root};
591/// let expr = merge([get_item("a", root()), get_item("b", root())]);
592/// ```
593pub fn merge(elements: impl IntoIterator<Item = impl Into<Expression>>) -> Expression {
594    use itertools::Itertools as _;
595    let values = elements.into_iter().map(|value| value.into()).collect_vec();
596    Merge.new_expr(DuplicateHandling::default(), values)
597}
598
599/// Creates a merge expression with explicit duplicate handling.
600pub fn merge_opts(
601    elements: impl IntoIterator<Item = impl Into<Expression>>,
602    duplicate_handling: DuplicateHandling,
603) -> Expression {
604    use itertools::Itertools as _;
605    let values = elements.into_iter().map(|value| value.into()).collect_vec();
606    Merge.new_expr(duplicate_handling, values)
607}
608
609// ---- Zip ----
610
611/// Creates a zip expression that conditionally selects between two arrays.
612///
613/// ```rust
614/// # use vortex_array::expr::{zip_expr, root, lit};
615/// let expr = zip_expr(root(), lit(0i32), lit(true));
616/// ```
617pub fn zip_expr(if_true: Expression, if_false: Expression, mask: Expression) -> Expression {
618    Zip.new_expr(EmptyOptions, [if_true, if_false, mask])
619}
620
621// ---- Dynamic ----
622
623/// Creates a dynamic comparison expression.
624pub fn dynamic(
625    operator: CompareOperator,
626    rhs_value: impl Fn() -> Option<ScalarValue> + Send + Sync + 'static,
627    rhs_dtype: DType,
628    default: bool,
629    lhs: Expression,
630) -> Expression {
631    DynamicComparison.new_expr(
632        DynamicComparisonExpr {
633            operator,
634            rhs: Arc::new(Rhs {
635                value: Arc::new(rhs_value),
636                dtype: rhs_dtype,
637            }),
638            default,
639        },
640        [lhs],
641    )
642}
643
644// ---- ListContains ----
645
646/// Creates an expression that checks if a value is contained in a list.
647///
648/// Returns a boolean array indicating whether the value appears in each list.
649///
650/// ```rust
651/// # use vortex_array::expr::{list_contains, lit, root};
652/// let expr = list_contains(root(), lit(42));
653/// ```
654pub fn list_contains(list: Expression, value: Expression) -> Expression {
655    ListContains.new_expr(EmptyOptions, [list, value])
656}