Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_error::VortexResult;
13
14use crate::ArrayRef;
15use crate::IntoArray;
16use crate::arrays::ConstantArray;
17use crate::arrays::scalar_fn::ScalarFnFactoryExt;
18use crate::dtype::DType;
19use crate::dtype::FieldName;
20use crate::expr::Expression;
21use crate::optimizer::ArrayOptimizer;
22use crate::scalar::Scalar;
23use crate::scalar_fn::EmptyOptions;
24use crate::scalar_fn::ScalarFnVTableExt;
25use crate::scalar_fn::fns::between::Between;
26use crate::scalar_fn::fns::between::BetweenOptions;
27use crate::scalar_fn::fns::binary::Binary;
28use crate::scalar_fn::fns::cast::Cast;
29use crate::scalar_fn::fns::fill_null::FillNull;
30use crate::scalar_fn::fns::get_item::GetItem;
31use crate::scalar_fn::fns::is_not_null::IsNotNull;
32use crate::scalar_fn::fns::is_null::IsNull;
33use crate::scalar_fn::fns::list_contains::ListContains;
34use crate::scalar_fn::fns::mask::Mask;
35use crate::scalar_fn::fns::not::Not;
36use crate::scalar_fn::fns::operators::Operator;
37use crate::scalar_fn::fns::zip::Zip;
38
39/// A collection of built-in scalar functions that can be applied to expressions or arrays.
40pub trait ExprBuiltins: Sized {
41    /// Cast to the given data type.
42    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
43
44    /// Replace null values with the given fill value.
45    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
46
47    /// Get item by field name (for struct types).
48    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
49
50    /// Is null check.
51    fn is_null(&self) -> VortexResult<Expression>;
52
53    /// Is not null check.
54    fn is_not_null(&self) -> VortexResult<Expression>;
55
56    /// Mask the expression using the given boolean mask.
57    /// The resulting expression's validity is the intersection of the original expression's
58    /// validity.
59    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
60
61    /// Boolean negation.
62    fn not(&self) -> VortexResult<Expression>;
63
64    /// Check if a list contains a value.
65    fn list_contains(&self, value: Expression) -> VortexResult<Expression>;
66
67    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
68    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression>;
69
70    /// Apply a binary operator to this expression and another.
71    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression>;
72}
73
74impl ExprBuiltins for Expression {
75    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
76        Cast.try_new_expr(dtype, [self.clone()])
77    }
78
79    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
80        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
81    }
82
83    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
84        GetItem.try_new_expr(field_name.into(), [self.clone()])
85    }
86
87    fn is_null(&self) -> VortexResult<Expression> {
88        IsNull.try_new_expr(EmptyOptions, [self.clone()])
89    }
90
91    fn is_not_null(&self) -> VortexResult<Expression> {
92        IsNotNull.try_new_expr(EmptyOptions, [self.clone()])
93    }
94
95    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
96        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
97    }
98
99    fn not(&self) -> VortexResult<Expression> {
100        Not.try_new_expr(EmptyOptions, [self.clone()])
101    }
102
103    fn list_contains(&self, value: Expression) -> VortexResult<Expression> {
104        ListContains.try_new_expr(EmptyOptions, [self.clone(), value])
105    }
106
107    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression> {
108        Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()])
109    }
110
111    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression> {
112        Binary.try_new_expr(op, [self.clone(), rhs])
113    }
114}
115
116pub trait ArrayBuiltins: Sized {
117    /// Cast to the given data type.
118    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
119
120    /// Replace null values with the given fill value.
121    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
122
123    /// Get item by field name (for struct types).
124    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
125
126    /// Is null check.
127    fn is_null(&self) -> VortexResult<ArrayRef>;
128
129    /// Is not null check.
130    fn is_not_null(&self) -> VortexResult<ArrayRef>;
131
132    /// Mask the array using the given boolean mask.
133    /// The resulting array's validity is the intersection of the original array's validity
134    /// and the mask's validity.
135    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
136
137    /// Boolean negation.
138    fn not(&self) -> VortexResult<ArrayRef>;
139
140    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
141    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef>;
142
143    /// Check if a list contains a value.
144    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef>;
145
146    /// Apply a binary operator to this array and another.
147    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef>;
148
149    /// Compare a values between lower </<= value </<= upper
150    fn between(
151        self,
152        lower: ArrayRef,
153        upper: ArrayRef,
154        options: BetweenOptions,
155    ) -> VortexResult<ArrayRef>;
156}
157
158impl ArrayBuiltins for ArrayRef {
159    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
160        if self.dtype() == &dtype {
161            return Ok(self.clone());
162        }
163        Cast.try_new_array(self.len(), dtype, [self.clone()])?
164            .optimize()
165    }
166
167    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
168        let fill_value = fill_value.into();
169        if !self.dtype().is_nullable() {
170            return self.cast(fill_value.dtype().clone());
171        }
172        FillNull
173            .try_new_array(
174                self.len(),
175                EmptyOptions,
176                [
177                    self.clone(),
178                    ConstantArray::new(fill_value, self.len()).into_array(),
179                ],
180            )?
181            .optimize()
182    }
183
184    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
185        GetItem
186            .try_new_array(self.len(), field_name.into(), [self.clone()])?
187            .optimize()
188    }
189
190    fn is_null(&self) -> VortexResult<ArrayRef> {
191        IsNull
192            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
193            .optimize()
194    }
195
196    fn is_not_null(&self) -> VortexResult<ArrayRef> {
197        IsNotNull
198            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
199            .optimize()
200    }
201
202    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
203        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
204            .optimize()
205    }
206
207    fn not(&self) -> VortexResult<ArrayRef> {
208        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
209            .optimize()
210    }
211
212    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef> {
213        Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()])
214    }
215
216    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef> {
217        ListContains
218            .try_new_array(self.len(), EmptyOptions, [self.clone(), value])?
219            .optimize()
220    }
221
222    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef> {
223        Binary
224            .try_new_array(self.len(), op, [self.clone(), rhs])?
225            .optimize()
226    }
227
228    fn between(
229        self,
230        lower: ArrayRef,
231        upper: ArrayRef,
232        options: BetweenOptions,
233    ) -> VortexResult<ArrayRef> {
234        Between
235            .try_new_array(self.len(), options, [self, lower, upper])?
236            .optimize()
237    }
238}