Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_error::VortexResult;
13
14use crate::ArrayRef;
15use crate::IntoArray;
16use crate::arrays::ConstantArray;
17use crate::arrays::scalar_fn::ScalarFnArrayExt;
18use crate::dtype::DType;
19use crate::dtype::FieldName;
20use crate::expr::Expression;
21use crate::optimizer::ArrayOptimizer;
22use crate::scalar::Scalar;
23use crate::scalar_fn::EmptyOptions;
24use crate::scalar_fn::ScalarFnVTableExt;
25use crate::scalar_fn::fns::between::Between;
26use crate::scalar_fn::fns::between::BetweenOptions;
27use crate::scalar_fn::fns::binary::Binary;
28use crate::scalar_fn::fns::cast::Cast;
29use crate::scalar_fn::fns::fill_null::FillNull;
30use crate::scalar_fn::fns::get_item::GetItem;
31use crate::scalar_fn::fns::is_null::IsNull;
32use crate::scalar_fn::fns::list_contains::ListContains;
33use crate::scalar_fn::fns::mask::Mask;
34use crate::scalar_fn::fns::not::Not;
35use crate::scalar_fn::fns::operators::Operator;
36use crate::scalar_fn::fns::zip::Zip;
37
38/// A collection of built-in scalar functions that can be applied to expressions or arrays.
39pub trait ExprBuiltins: Sized {
40    /// Cast to the given data type.
41    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
42
43    /// Replace null values with the given fill value.
44    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
45
46    /// Get item by field name (for struct types).
47    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
48
49    /// Is null check.
50    fn is_null(&self) -> VortexResult<Expression>;
51
52    /// Mask the expression using the given boolean mask.
53    /// The resulting expression's validity is the intersection of the original expression's
54    /// validity.
55    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
56
57    /// Boolean negation.
58    fn not(&self) -> VortexResult<Expression>;
59
60    /// Check if a list contains a value.
61    fn list_contains(&self, value: Expression) -> VortexResult<Expression>;
62
63    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
64    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression>;
65
66    /// Apply a binary operator to this expression and another.
67    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression>;
68}
69
70impl ExprBuiltins for Expression {
71    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
72        Cast.try_new_expr(dtype, [self.clone()])
73    }
74
75    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
76        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
77    }
78
79    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
80        GetItem.try_new_expr(field_name.into(), [self.clone()])
81    }
82
83    fn is_null(&self) -> VortexResult<Expression> {
84        IsNull.try_new_expr(EmptyOptions, [self.clone()])
85    }
86
87    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
88        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
89    }
90
91    fn not(&self) -> VortexResult<Expression> {
92        Not.try_new_expr(EmptyOptions, [self.clone()])
93    }
94
95    fn list_contains(&self, value: Expression) -> VortexResult<Expression> {
96        ListContains.try_new_expr(EmptyOptions, [self.clone(), value])
97    }
98
99    fn zip(&self, if_true: Expression, if_false: Expression) -> VortexResult<Expression> {
100        Zip.try_new_expr(EmptyOptions, [if_true, if_false, self.clone()])
101    }
102
103    fn binary(&self, rhs: Expression, op: Operator) -> VortexResult<Expression> {
104        Binary.try_new_expr(op, [self.clone(), rhs])
105    }
106}
107
108pub trait ArrayBuiltins: Sized {
109    /// Cast to the given data type.
110    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
111
112    /// Replace null values with the given fill value.
113    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
114
115    /// Get item by field name (for struct types).
116    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
117
118    /// Is null check.
119    fn is_null(&self) -> VortexResult<ArrayRef>;
120
121    /// Mask the array using the given boolean mask.
122    /// The resulting array's validity is the intersection of the original array's validity
123    /// and the mask's validity.
124    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
125
126    /// Boolean negation.
127    fn not(&self) -> VortexResult<ArrayRef>;
128
129    /// Conditional selection: `result[i] = if mask[i] then if_true[i] else if_false[i]`.
130    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef>;
131
132    /// Check if a list contains a value.
133    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef>;
134
135    /// Apply a binary operator to this array and another.
136    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef>;
137
138    /// Compare a values between lower </<= value </<= upper
139    fn between(
140        self,
141        lower: ArrayRef,
142        upper: ArrayRef,
143        options: BetweenOptions,
144    ) -> VortexResult<ArrayRef>;
145}
146
147impl ArrayBuiltins for ArrayRef {
148    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
149        if self.dtype() == &dtype {
150            return Ok(self.clone());
151        }
152        Cast.try_new_array(self.len(), dtype, [self.clone()])?
153            .optimize()
154    }
155
156    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
157        let fill_value = fill_value.into();
158        if !self.dtype().is_nullable() {
159            return self.cast(fill_value.dtype().clone());
160        }
161        FillNull
162            .try_new_array(
163                self.len(),
164                EmptyOptions,
165                [
166                    self.clone(),
167                    ConstantArray::new(fill_value, self.len()).into_array(),
168                ],
169            )?
170            .optimize()
171    }
172
173    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
174        GetItem
175            .try_new_array(self.len(), field_name.into(), [self.clone()])?
176            .optimize()
177    }
178
179    fn is_null(&self) -> VortexResult<ArrayRef> {
180        IsNull
181            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
182            .optimize()
183    }
184
185    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
186        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
187            .optimize()
188    }
189
190    fn not(&self) -> VortexResult<ArrayRef> {
191        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
192            .optimize()
193    }
194
195    fn zip(&self, if_true: ArrayRef, if_false: ArrayRef) -> VortexResult<ArrayRef> {
196        Zip.try_new_array(self.len(), EmptyOptions, [if_true, if_false, self.clone()])
197    }
198
199    fn list_contains(&self, value: ArrayRef) -> VortexResult<ArrayRef> {
200        ListContains
201            .try_new_array(self.len(), EmptyOptions, [self.clone(), value])?
202            .optimize()
203    }
204
205    fn binary(&self, rhs: ArrayRef, op: Operator) -> VortexResult<ArrayRef> {
206        Binary
207            .try_new_array(self.len(), op, [self.clone(), rhs])?
208            .optimize()
209    }
210
211    fn between(
212        self,
213        lower: ArrayRef,
214        upper: ArrayRef,
215        options: BetweenOptions,
216    ) -> VortexResult<ArrayRef> {
217        Between
218            .try_new_array(self.len(), options, [self, lower, upper])?
219            .optimize()
220    }
221}