Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_error::VortexResult;
13use vortex_session::VortexSession;
14
15use crate::Array;
16use crate::ArrayRef;
17use crate::ExecutionCtx;
18use crate::IntoArray;
19use crate::arrays::ConstantArray;
20use crate::arrays::ScalarFnArrayExt;
21use crate::dtype::DType;
22use crate::dtype::FieldName;
23use crate::expr::Expression;
24use crate::optimizer::ArrayOptimizer;
25use crate::scalar::Scalar;
26use crate::scalar_fn::EmptyOptions;
27use crate::scalar_fn::ScalarFnVTableExt;
28use crate::scalar_fn::fns::between::Between;
29use crate::scalar_fn::fns::between::BetweenOptions;
30use crate::scalar_fn::fns::binary::Binary;
31use crate::scalar_fn::fns::cast::Cast;
32use crate::scalar_fn::fns::fill_null::FillNull;
33use crate::scalar_fn::fns::get_item::GetItem;
34use crate::scalar_fn::fns::is_null::IsNull;
35use crate::scalar_fn::fns::list_contains::ListContains;
36use crate::scalar_fn::fns::mask::Mask;
37use crate::scalar_fn::fns::not::Not;
38use crate::scalar_fn::fns::operators::Operator;
39use crate::scalar_fn::fns::zip::Zip;
40
41/// A collection of built-in scalar functions that can be applied to expressions or arrays.
42pub trait ExprBuiltins: Sized {
43    /// Cast to the given data type.
44    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
45
46    /// Replace null values with the given fill value.
47    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
48
49    /// Get item by field name (for struct types).
50    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
51
52    /// Is null check.
53    fn is_null(&self) -> VortexResult<Expression>;
54
55    /// Mask the expression using the given boolean mask.
56    /// The resulting expression's validity is the intersection of the original expression's
57    /// validity.
58    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
59
60    /// Boolean negation.
61    fn not(&self) -> VortexResult<Expression>;
62
63    /// Check if a list contains a value.
64    fn list_contains(&self, value: Expression) -> VortexResult<Expression>;
65
66    /// Conditional selection: `result[i] = if mask[i] then self[i] else if_false[i]`.
67    fn zip(&self, if_false: Expression, mask: Expression) -> VortexResult<Expression>;
68
69    /// Apply a binary operator to this expression and another.
70    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression>;
71}
72
73impl ExprBuiltins for Expression {
74    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
75        Cast.try_new_expr(dtype, [self.clone()])
76    }
77
78    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
79        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
80    }
81
82    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
83        GetItem.try_new_expr(field_name.into(), [self.clone()])
84    }
85
86    fn is_null(&self) -> VortexResult<Expression> {
87        IsNull.try_new_expr(EmptyOptions, [self.clone()])
88    }
89
90    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
91        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
92    }
93
94    fn not(&self) -> VortexResult<Expression> {
95        Not.try_new_expr(EmptyOptions, [self.clone()])
96    }
97
98    fn list_contains(&self, value: Expression) -> VortexResult<Expression> {
99        ListContains.try_new_expr(EmptyOptions, [self.clone(), value])
100    }
101
102    fn zip(&self, if_false: Expression, mask: Expression) -> VortexResult<Expression> {
103        Zip.try_new_expr(EmptyOptions, [self.clone(), if_false, mask])
104    }
105
106    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression> {
107        Binary.try_new_expr(op, [self.clone(), rhs])
108    }
109}
110
111pub trait ArrayBuiltins: Sized {
112    /// Cast to the given data type.
113    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
114
115    /// Replace null values with the given fill value.
116    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
117
118    /// Get item by field name (for struct types).
119    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
120
121    /// Is null check.
122    fn is_null(&self) -> VortexResult<ArrayRef>;
123
124    /// Mask the array using the given boolean mask.
125    /// The resulting array's validity is the intersection of the original array's validity
126    /// and the mask's validity.
127    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
128
129    /// Boolean negation.
130    fn not(&self) -> VortexResult<ArrayRef>;
131
132    /// Conditional selection: `result[i] = if mask[i] then self[i] else if_false[i]`.
133    fn zip(&self, if_false: ArrayRef, mask: ArrayRef) -> VortexResult<ArrayRef>;
134
135    /// Check if a list contains a value.
136    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef>;
137
138    /// Apply a binary operator to this array and another.
139    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef>;
140
141    /// Compare a values between lower </<= value </<= upper
142    fn between(
143        self,
144        lower: ArrayRef,
145        upper: ArrayRef,
146        options: BetweenOptions,
147    ) -> VortexResult<ArrayRef>;
148}
149
150impl ArrayBuiltins for ArrayRef {
151    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
152        if self.dtype() == &dtype {
153            return Ok(self.clone());
154        }
155        Cast.try_new_array(self.len(), dtype, [self.clone()])?
156            .optimize()
157    }
158
159    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
160        FillNull
161            .try_new_array(
162                self.len(),
163                EmptyOptions,
164                [
165                    self.clone(),
166                    ConstantArray::new(fill_value.into(), self.len()).into_array(),
167                ],
168            )?
169            .optimize()
170    }
171
172    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
173        GetItem
174            .try_new_array(self.len(), field_name.into(), [self.clone()])?
175            .optimize()
176    }
177
178    fn is_null(&self) -> VortexResult<ArrayRef> {
179        IsNull
180            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
181            .optimize()
182    }
183
184    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
185        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
186            .optimize()
187    }
188
189    fn not(&self) -> VortexResult<ArrayRef> {
190        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
191            .optimize()
192    }
193
194    fn zip(&self, if_false: ArrayRef, mask: ArrayRef) -> VortexResult<ArrayRef> {
195        let scalar_fn =
196            Zip.try_new_array(self.len(), EmptyOptions, [self.clone(), if_false, mask])?;
197        let mut ctx = ExecutionCtx::new(VortexSession::empty());
198        scalar_fn.execute::<ArrayRef>(&mut ctx)
199    }
200
201    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef> {
202        ListContains
203            .try_new_array(self.len(), EmptyOptions, [self.clone(), value])?
204            .optimize()
205    }
206
207    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef> {
208        Binary
209            .try_new_array(self.len(), op, [self.clone(), rhs])?
210            .optimize()
211    }
212
213    fn between(
214        self,
215        lower: ArrayRef,
216        upper: ArrayRef,
217        options: BetweenOptions,
218    ) -> VortexResult<ArrayRef> {
219        Between
220            .try_new_array(self.len(), options, [self, lower, upper])?
221            .optimize()
222    }
223}