Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_error::VortexResult;
13
14use crate::ArrayRef;
15use crate::IntoArray;
16use crate::arrays::ConstantArray;
17use crate::arrays::InterleaveArray;
18use crate::arrays::scalar_fn::ScalarFnFactoryExt;
19use crate::dtype::DType;
20use crate::dtype::FieldName;
21use crate::expr::Expression;
22use crate::optimizer::ArrayOptimizer;
23use crate::scalar::Scalar;
24use crate::scalar_fn::EmptyOptions;
25use crate::scalar_fn::ScalarFnVTableExt;
26use crate::scalar_fn::fns::between::Between;
27use crate::scalar_fn::fns::between::BetweenOptions;
28use crate::scalar_fn::fns::binary::Binary;
29use crate::scalar_fn::fns::cast::Cast;
30use crate::scalar_fn::fns::fill_null::FillNull;
31use crate::scalar_fn::fns::get_item::GetItem;
32use crate::scalar_fn::fns::is_not_null::IsNotNull;
33use crate::scalar_fn::fns::is_null::IsNull;
34use crate::scalar_fn::fns::list_contains::ListContains;
35use crate::scalar_fn::fns::mask::Mask;
36use crate::scalar_fn::fns::not::Not;
37use crate::scalar_fn::fns::operators::Operator;
38use crate::scalar_fn::fns::zip::Zip;
39
40/// A collection of built-in scalar functions that can be applied to expressions or arrays.
41pub trait ExprBuiltins: Sized {
42    /// Cast to the given data type.
43    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
44
45    /// Replace null values with the given fill value.
46    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
47
48    /// Get item by field name (for struct types).
49    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
50
51    /// Is null check.
52    fn is_null(&self) -> VortexResult<Expression>;
53
54    /// Is not null check.
55    fn is_not_null(&self) -> VortexResult<Expression>;
56
57    /// Mask the expression using the given boolean mask.
58    /// The resulting expression's validity is the intersection of the original expression's
59    /// validity.
60    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
61
62    /// Boolean negation.
63    fn not(&self) -> VortexResult<Expression>;
64
65    /// Check if a list contains a value.
66    fn list_contains(&self, value: Expression) -> VortexResult<Expression>;
67
68    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
69    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression>;
70
71    // TODO(joe): add an `interleave` expression builtin mirroring `ArrayBuiltins::interleave`.
72
73    /// Apply a binary operator to this expression and another.
74    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression>;
75}
76
77impl ExprBuiltins for Expression {
78    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
79        Cast.try_new_expr(dtype, [self.clone()])
80    }
81
82    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
83        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
84    }
85
86    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
87        GetItem.try_new_expr(field_name.into(), [self.clone()])
88    }
89
90    fn is_null(&self) -> VortexResult<Expression> {
91        IsNull.try_new_expr(EmptyOptions, [self.clone()])
92    }
93
94    fn is_not_null(&self) -> VortexResult<Expression> {
95        IsNotNull.try_new_expr(EmptyOptions, [self.clone()])
96    }
97
98    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
99        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
100    }
101
102    fn not(&self) -> VortexResult<Expression> {
103        Not.try_new_expr(EmptyOptions, [self.clone()])
104    }
105
106    fn list_contains(&self, value: Expression) -> VortexResult<Expression> {
107        ListContains.try_new_expr(EmptyOptions, [self.clone(), value])
108    }
109
110    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression> {
111        Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()])
112    }
113
114    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression> {
115        Binary.try_new_expr(op, [self.clone(), rhs])
116    }
117}
118
119pub trait ArrayBuiltins: Sized {
120    /// Cast to the given data type.
121    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
122
123    /// Replace null values with the given fill value.
124    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
125
126    /// Get item by field name (for struct types).
127    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
128
129    /// Is null check.
130    fn is_null(&self) -> VortexResult<ArrayRef>;
131
132    /// Is not null check.
133    fn is_not_null(&self) -> VortexResult<ArrayRef>;
134
135    /// Mask the array using the given boolean mask.
136    /// The resulting array's validity is the intersection of the original array's validity
137    /// and the mask's validity.
138    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
139
140    /// Boolean negation.
141    fn not(&self) -> VortexResult<ArrayRef>;
142
143    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
144    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef>;
145
146    /// Random-access gather by `(array_index, row_index)`: output row `i` is taken from
147    /// `values[array_indices[i]][row_indices[i]]`, where `self` is the (non-nullable)
148    /// `array_indices` selector and `row_indices` names the position within the selected value.
149    /// See [`InterleaveArray`].
150    fn interleave(
151        &self,
152        values: impl IntoIterator<Item = ArrayRef>,
153        row_indices: ArrayRef,
154    ) -> VortexResult<ArrayRef>;
155
156    /// Check if a list contains a value.
157    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef>;
158
159    /// Apply a binary operator to this array and another.
160    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef>;
161
162    /// Compare a values between lower </<= value </<= upper
163    fn between(
164        self,
165        lower: ArrayRef,
166        upper: ArrayRef,
167        options: BetweenOptions,
168    ) -> VortexResult<ArrayRef>;
169}
170
171impl ArrayBuiltins for ArrayRef {
172    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
173        if self.dtype() == &dtype {
174            return Ok(self.clone());
175        }
176        Cast.try_new_array(self.len(), dtype, [self.clone()])?
177            .optimize()
178    }
179
180    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
181        let fill_value = fill_value.into();
182        if !self.dtype().is_nullable() {
183            return self.cast(fill_value.dtype().clone());
184        }
185        FillNull
186            .try_new_array(
187                self.len(),
188                EmptyOptions,
189                [
190                    self.clone(),
191                    ConstantArray::new(fill_value, self.len()).into_array(),
192                ],
193            )?
194            .optimize()
195    }
196
197    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
198        GetItem
199            .try_new_array(self.len(), field_name.into(), [self.clone()])?
200            .optimize()
201    }
202
203    fn is_null(&self) -> VortexResult<ArrayRef> {
204        IsNull
205            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
206            .optimize()
207    }
208
209    fn is_not_null(&self) -> VortexResult<ArrayRef> {
210        IsNotNull
211            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
212            .optimize()
213    }
214
215    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
216        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
217            .optimize()
218    }
219
220    fn not(&self) -> VortexResult<ArrayRef> {
221        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
222            .optimize()
223    }
224
225    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef> {
226        Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()])
227    }
228
229    fn interleave(
230        &self,
231        values: impl IntoIterator<Item = ArrayRef>,
232        row_indices: ArrayRef,
233    ) -> VortexResult<ArrayRef> {
234        Ok(
235            InterleaveArray::try_new(values.into_iter().collect(), self.clone(), row_indices)?
236                .into_array(),
237        )
238    }
239
240    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef> {
241        ListContains
242            .try_new_array(self.len(), EmptyOptions, [self.clone(), value])?
243            .optimize()
244    }
245
246    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef> {
247        Binary
248            .try_new_array(self.len(), op, [self.clone(), rhs])?
249            .optimize()
250    }
251
252    fn between(
253        self,
254        lower: ArrayRef,
255        upper: ArrayRef,
256        options: BetweenOptions,
257    ) -> VortexResult<ArrayRef> {
258        Between
259            .try_new_array(self.len(), options, [self, lower, upper])?
260            .optimize()
261    }
262}