Skip to main content

vortex_array/
builtins.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! A collection of built-in common scalar functions.
5//!
6//! It is expected that each Vortex integration may provide its own set of scalar functions with
7//! semantics that exactly match the underlying system (e.g. SQL engine, DataFrame library, etc).
8//!
9//! This set of functions should cover the basics, and in general leans towards the semantics of
10//! the equivalent Arrow compute function.
11
12use vortex_dtype::DType;
13use vortex_dtype::FieldName;
14use vortex_error::VortexResult;
15use vortex_session::VortexSession;
16
17use crate::Array;
18use crate::ArrayRef;
19use crate::ExecutionCtx;
20use crate::IntoArray;
21use crate::arrays::ConstantArray;
22use crate::arrays::ScalarFnArrayExt;
23use crate::expr::Between;
24use crate::expr::BetweenOptions;
25use crate::expr::Cast;
26use crate::expr::EmptyOptions;
27use crate::expr::Expression;
28use crate::expr::FillNull;
29use crate::expr::GetItem;
30use crate::expr::IsNull;
31use crate::expr::Mask;
32use crate::expr::Not;
33use crate::expr::VTableExt;
34use crate::expr::Zip;
35use crate::optimizer::ArrayOptimizer;
36use crate::scalar::Scalar;
37
38/// A collection of built-in scalar functions that can be applied to expressions or arrays.
39pub trait ExprBuiltins: Sized {
40    /// Cast to the given data type.
41    fn cast(&self, dtype: DType) -> VortexResult<Expression>;
42
43    /// Replace null values with the given fill value.
44    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression>;
45
46    /// Get item by field name (for struct types).
47    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression>;
48
49    /// Is null check.
50    fn is_null(&self) -> VortexResult<Expression>;
51
52    /// Mask the expression using the given boolean mask.
53    /// The resulting expression's validity is the intersection of the original expression's
54    /// validity.
55    fn mask(&self, mask: Expression) -> VortexResult<Expression>;
56
57    /// Boolean negation.
58    fn not(&self) -> VortexResult<Expression>;
59
60    /// Conditional selection: `result[i] = if mask[i] then self[i] else if_false[i]`.
61    fn zip(&self, if_false: Expression, mask: Expression) -> VortexResult<Expression>;
62}
63
64impl ExprBuiltins for Expression {
65    fn cast(&self, dtype: DType) -> VortexResult<Expression> {
66        Cast.try_new_expr(dtype, [self.clone()])
67    }
68
69    fn fill_null(&self, fill_value: Expression) -> VortexResult<Expression> {
70        FillNull.try_new_expr(EmptyOptions, [self.clone(), fill_value])
71    }
72
73    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<Expression> {
74        GetItem.try_new_expr(field_name.into(), [self.clone()])
75    }
76
77    fn is_null(&self) -> VortexResult<Expression> {
78        IsNull.try_new_expr(EmptyOptions, [self.clone()])
79    }
80
81    fn mask(&self, mask: Expression) -> VortexResult<Expression> {
82        Mask.try_new_expr(EmptyOptions, [self.clone(), mask])
83    }
84
85    fn not(&self) -> VortexResult<Expression> {
86        Not.try_new_expr(EmptyOptions, [self.clone()])
87    }
88
89    fn zip(&self, if_false: Expression, mask: Expression) -> VortexResult<Expression> {
90        Zip.try_new_expr(EmptyOptions, [self.clone(), if_false, mask])
91    }
92}
93
94pub trait ArrayBuiltins: Sized {
95    /// Cast to the given data type.
96    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef>;
97
98    /// Replace null values with the given fill value.
99    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef>;
100
101    /// Get item by field name (for struct types).
102    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef>;
103
104    /// Is null check.
105    fn is_null(&self) -> VortexResult<ArrayRef>;
106
107    /// Mask the array using the given boolean mask.
108    /// The resulting array's validity is the intersection of the original array's validity
109    /// and the mask's validity.
110    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef>;
111
112    /// Boolean negation.
113    fn not(&self) -> VortexResult<ArrayRef>;
114
115    /// Conditional selection: `result[i] = if mask[i] then self[i] else if_false[i]`.
116    fn zip(&self, if_false: ArrayRef, mask: ArrayRef) -> VortexResult<ArrayRef>;
117
118    /// Compare a values between lower </<= value </<= upper
119    fn between(
120        self,
121        lower: ArrayRef,
122        upper: ArrayRef,
123        options: BetweenOptions,
124    ) -> VortexResult<ArrayRef>;
125}
126
127impl ArrayBuiltins for ArrayRef {
128    fn cast(&self, dtype: DType) -> VortexResult<ArrayRef> {
129        if self.dtype() == &dtype {
130            return Ok(self.clone());
131        }
132        Cast.try_new_array(self.len(), dtype, [self.clone()])?
133            .optimize()
134    }
135
136    fn fill_null(&self, fill_value: impl Into<Scalar>) -> VortexResult<ArrayRef> {
137        FillNull
138            .try_new_array(
139                self.len(),
140                EmptyOptions,
141                [
142                    self.clone(),
143                    ConstantArray::new(fill_value.into(), self.len()).into_array(),
144                ],
145            )?
146            .optimize()
147    }
148
149    fn get_item(&self, field_name: impl Into<FieldName>) -> VortexResult<ArrayRef> {
150        GetItem
151            .try_new_array(self.len(), field_name.into(), [self.clone()])?
152            .optimize()
153    }
154
155    fn is_null(&self) -> VortexResult<ArrayRef> {
156        IsNull
157            .try_new_array(self.len(), EmptyOptions, [self.clone()])?
158            .optimize()
159    }
160
161    fn mask(self, mask: ArrayRef) -> VortexResult<ArrayRef> {
162        Mask.try_new_array(self.len(), EmptyOptions, [self, mask])?
163            .optimize()
164    }
165
166    fn not(&self) -> VortexResult<ArrayRef> {
167        Not.try_new_array(self.len(), EmptyOptions, [self.clone()])?
168            .optimize()
169    }
170
171    fn zip(&self, if_false: ArrayRef, mask: ArrayRef) -> VortexResult<ArrayRef> {
172        let scalar_fn =
173            Zip.try_new_array(self.len(), EmptyOptions, [self.clone(), if_false, mask])?;
174        let mut ctx = ExecutionCtx::new(VortexSession::empty());
175        scalar_fn.execute::<ArrayRef>(&mut ctx)
176    }
177
178    fn between(
179        self,
180        lower: ArrayRef,
181        upper: ArrayRef,
182        options: BetweenOptions,
183    ) -> VortexResult<ArrayRef> {
184        Between
185            .try_new_array(self.len(), options, [self, lower, upper])?
186            .optimize()
187    }
188}