Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_error::VortexResult;
13
14use crate::Array;
15use crate::ArrayRef;
16use crate::IntoArray;
17use crate::arrays::ConstantArray;
18use crate::arrays::ScalarFnArrayExt;
19use crate::dtype::DType;
20use crate::dtype::FieldName;
21use crate::expr::Expression;
22use crate::optimizer::ArrayOptimizer;
23use crate::scalar::Scalar;
24use crate::scalar_fn::EmptyOptions;
25use crate::scalar_fn::ScalarFnVTableExt;
26use crate::scalar_fn::fns::between::Between;
27use crate::scalar_fn::fns::between::BetweenOptions;
28use crate::scalar_fn::fns::binary::Binary;
29use crate::scalar_fn::fns::cast::Cast;
30use crate::scalar_fn::fns::fill_null::FillNull;
31use crate::scalar_fn::fns::get_item::GetItem;
32use crate::scalar_fn::fns::is_null::IsNull;
33use crate::scalar_fn::fns::list_contains::ListContains;
34use crate::scalar_fn::fns::mask::Mask;
35use crate::scalar_fn::fns::not::Not;
36use crate::scalar_fn::fns::operators::Operator;
37use crate::scalar_fn::fns::zip::Zip;
38
39/// A collection of built-in scalar functions that can be applied to expressions or arrays.
40pub trait ExprBuiltins: Sized {
41    /// Cast to the given data type.
42    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
43
44    /// Replace null values with the given fill value.
45    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
46
47    /// Get item by field name (for struct types).
48    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
49
50    /// Is null check.
51    fn is_null(&self) -> VortexResult<Expression>;
52
53    /// Mask the expression using the given boolean mask.
54    /// The resulting expression's validity is the intersection of the original expression's
55    /// validity.
56    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
57
58    /// Boolean negation.
59    fn not(&self) -> VortexResult<Expression>;
60
61    /// Check if a list contains a value.
62    fn list_contains(&self, value: Expression) -> VortexResult<Expression>;
63
64    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
65    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression>;
66
67    /// Apply a binary operator to this expression and another.
68    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression>;
69}
70
71impl ExprBuiltins for Expression {
72    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
73        Cast.try_new_expr(dtype, [self.clone()])
74    }
75
76    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
77        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
78    }
79
80    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
81        GetItem.try_new_expr(field_name.into(), [self.clone()])
82    }
83
84    fn is_null(&self) -> VortexResult<Expression> {
85        IsNull.try_new_expr(EmptyOptions, [self.clone()])
86    }
87
88    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
89        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
90    }
91
92    fn not(&self) -> VortexResult<Expression> {
93        Not.try_new_expr(EmptyOptions, [self.clone()])
94    }
95
96    fn list_contains(&self, value: Expression) -> VortexResult<Expression> {
97        ListContains.try_new_expr(EmptyOptions, [self.clone(), value])
98    }
99
100    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression> {
101        Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()])
102    }
103
104    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression> {
105        Binary.try_new_expr(op, [self.clone(), rhs])
106    }
107}
108
109pub trait ArrayBuiltins: Sized {
110    /// Cast to the given data type.
111    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
112
113    /// Replace null values with the given fill value.
114    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
115
116    /// Get item by field name (for struct types).
117    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
118
119    /// Is null check.
120    fn is_null(&self) -> VortexResult<ArrayRef>;
121
122    /// Mask the array using the given boolean mask.
123    /// The resulting array's validity is the intersection of the original array's validity
124    /// and the mask's validity.
125    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
126
127    /// Boolean negation.
128    fn not(&self) -> VortexResult<ArrayRef>;
129
130    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
131    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef>;
132
133    /// Check if a list contains a value.
134    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef>;
135
136    /// Apply a binary operator to this array and another.
137    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef>;
138
139    /// Compare a values between lower </<= value </<= upper
140    fn between(
141        self,
142        lower: ArrayRef,
143        upper: ArrayRef,
144        options: BetweenOptions,
145    ) -> VortexResult<ArrayRef>;
146}
147
148impl ArrayBuiltins for ArrayRef {
149    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
150        if self.dtype() == &dtype {
151            return Ok(self.clone());
152        }
153        Cast.try_new_array(self.len(), dtype, [self.clone()])?
154            .optimize()
155    }
156
157    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
158        let fill_value = fill_value.into();
159        if !self.dtype().is_nullable() {
160            return self.cast(fill_value.dtype().clone());
161        }
162        FillNull
163            .try_new_array(
164                self.len(),
165                EmptyOptions,
166                [
167                    self.clone(),
168                    ConstantArray::new(fill_value, self.len()).into_array(),
169                ],
170            )?
171            .optimize()
172    }
173
174    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
175        GetItem
176            .try_new_array(self.len(), field_name.into(), [self.clone()])?
177            .optimize()
178    }
179
180    fn is_null(&self) -> VortexResult<ArrayRef> {
181        IsNull
182            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
183            .optimize()
184    }
185
186    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
187        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
188            .optimize()
189    }
190
191    fn not(&self) -> VortexResult<ArrayRef> {
192        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
193            .optimize()
194    }
195
196    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef> {
197        Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()])
198    }
199
200    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef> {
201        ListContains
202            .try_new_array(self.len(), EmptyOptions, [self.clone(), value])?
203            .optimize()
204    }
205
206    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef> {
207        Binary
208            .try_new_array(self.len(), op, [self.clone(), rhs])?
209            .optimize()
210    }
211
212    fn between(
213        self,
214        lower: ArrayRef,
215        upper: ArrayRef,
216        options: BetweenOptions,
217    ) -> VortexResult<ArrayRef> {
218        Between
219            .try_new_array(self.len(), options, [self, lower, upper])?
220            .optimize()
221    }
222}