vortex_array/expr/functions/
vtable.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9use std::mem::transmute;
10use std::ops::Deref;
11use std::sync::Arc;
12
13use arcref::ArcRef;
14use vortex_dtype::DType;
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_vector::Datum;
19
20use crate::expr::Expression;
21use crate::expr::StatsCatalog;
22use crate::expr::functions::ArgName;
23use crate::expr::functions::FunctionId;
24use crate::expr::functions::execution::ExecutionArgs;
25use crate::expr::functions::scalar::ScalarFn;
26use crate::expr::stats::Stat;
27
28/// A non-object-safe vtable trait for scalar function types.
29///
30/// This trait should be implemented in order to define new scalar functions within Vortex.
31pub trait VTable: 'static + Send + Sync + Sized {
32    /// Any options for configuring the function's behaviour.
33    type Options: 'static + Send + Sync + Clone + PartialEq + Eq + Hash + fmt::Debug + fmt::Display;
34
35    /// The globally unique identifier for this function.
36    fn id(&self) -> FunctionId;
37
38    /// Serializes the options for a function instance.
39    fn serialize(&self, _options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
40        Ok(None)
41    }
42
43    /// Deserializes the options for this function from a byte slice.
44    fn deserialize(&self, _bytes: &[u8]) -> VortexResult<Self::Options> {
45        vortex_bail!("Serialization is not supported for {}", self.id())
46    }
47
48    /// Returns the arity (number of arguments) for this function.
49    fn arity(&self, options: &Self::Options) -> Arity;
50
51    /// How the function behaves when one or more arguments are NULL.
52    ///
53    /// Most functions propagate NULL (any NULL argument produces NULL output).
54    /// Some functions have special NULL handling that can short-circuit
55    /// evaluation or treat NULL as a meaningful value.
56    ///
57    /// Required for correct NULL semantics; may also enable optimizations
58    /// when argument nullability is known from schema or statistics.
59    fn null_handling(&self, options: &Self::Options) -> NullHandling {
60        _ = options;
61        NullHandling::default()
62    }
63
64    /// Returns the display name of the nth argument for this function.
65    fn arg_name(&self, options: &Self::Options, arg_idx: usize) -> ArgName;
66
67    /// See [`Expression::stat_falsification`]
68    ///
69    /// Note that the falsification API will change in the future to instead use a `falsify`
70    /// expression along with push-down rules.
71    fn stat_falsification(
72        &self,
73        options: &Self::Options,
74        expr: &Expression,
75        catalog: &dyn StatsCatalog,
76    ) -> Option<Expression> {
77        _ = options;
78        _ = expr;
79        _ = catalog;
80        None
81    }
82
83    /// See [`Expression::stat_expression`]
84    ///
85    /// Note that the stat_expression API will change in the future such that layouts with pruning
86    /// capabilities perform their own mapping over statistics.
87    fn stat_expression(
88        &self,
89        options: &Self::Options,
90        expr: &Expression,
91        stat: Stat,
92        catalog: &dyn StatsCatalog,
93    ) -> Option<Expression> {
94        _ = options;
95        _ = expr;
96        _ = stat;
97        _ = catalog;
98        None
99    }
100
101    /// Computes the return [`DType`] given the argument types and function options.
102    fn return_dtype(&self, options: &Self::Options, arg_types: &[DType]) -> VortexResult<DType>;
103
104    /// Binds the function for execution over a specific set of inputs.
105    // TODO(ngates): in the future, we should return a kernel as a node in a physical plan and
106    //  continue to run further cost-based optimizations prior to execution.
107    fn execute(&self, _options: &Self::Options, _args: &ExecutionArgs) -> VortexResult<Datum>;
108}
109
110/// The arity (number of arguments) of a function.
111#[derive(Clone, Copy, Debug, PartialEq, Eq)]
112pub enum Arity {
113    Exact(usize),
114    Variadic { min: usize, max: Option<usize> },
115}
116
117impl Arity {
118    /// Whether the given argument count matches this arity.
119    pub fn matches(&self, arg_count: usize) -> bool {
120        match self {
121            Arity::Exact(m) => *m == arg_count,
122            Arity::Variadic { min, max } => {
123                if arg_count < *min {
124                    return false;
125                }
126                if let Some(max) = max
127                    && arg_count > *max
128                {
129                    return false;
130                }
131                true
132            }
133        }
134    }
135}
136
137/// How a function handles NULL arguments.
138#[derive(Clone, Debug, Default, PartialEq, Eq)]
139pub enum NullHandling {
140    /// NULL in any argument produces NULL output.
141    ///
142    /// This is standard SQL behavior for most scalar functions.
143    /// Enables simplification when any argument is known to be NULL.
144    Propagate,
145
146    /// NULL is short-circuited when paired with the absorbing element.
147    ///
148    /// This is a special case where the absorbing element "wins" over NULL.
149    ///
150    /// # Examples
151    /// - `AND_KLEENE(false, NULL)` → `false` (false absorbs NULL)
152    /// - `OR_KLEENE(true, NULL)` → `true` (true absorbs NULL)
153    AbsorbsNull,
154
155    /// The function has special NULL semantics that don't follow
156    /// simple propagation rules.
157    ///
158    /// This prevents any simplifications based on NULL arguments.
159    ///
160    /// # Examples
161    /// - `IS NULL`, `IS NOT NULL`: NULL → true/false
162    /// - `COALESCE`: returns first non-NULL argument
163    /// - `NULLIF`: conditionally produces NULL
164    #[default]
165    Custom,
166}
167
168/// An object-safe vtable for scalar functions that dispatches to the non-object-safe vtable.
169pub(crate) trait DynScalarFnVTable: 'static + Send + Sync {
170    fn as_any(&self) -> &dyn Any;
171
172    fn id(&self) -> FunctionId;
173
174    fn options_serialize(&self, options: &dyn Any) -> VortexResult<Option<Vec<u8>>>;
175    fn options_deserialize(&self, data: &[u8]) -> VortexResult<Box<dyn Any + Send + Sync>>;
176    fn options_clone(&self, options: &dyn Any) -> Box<dyn Any + Send + Sync>;
177    fn options_eq(&self, a: &dyn Any, b: &dyn Any) -> bool;
178    fn options_hash(&self, options: &dyn Any, hasher: &mut dyn Hasher);
179    fn options_display(&self, options: &dyn Any, fmt: &mut Formatter<'_>) -> fmt::Result;
180    fn options_debug(&self, options: &dyn Any, fmt: &mut Formatter<'_>) -> fmt::Result;
181
182    fn arity(&self, options: &dyn Any) -> Arity;
183    fn arg_name(&self, options: &dyn Any, arg_idx: usize) -> ArgName;
184    fn null_handling(&self, options: &dyn Any) -> NullHandling;
185
186    fn stat_falsification(
187        &self,
188        options: &dyn Any,
189        expr: &Expression,
190        catalog: &dyn StatsCatalog,
191    ) -> Option<Expression>;
192    fn stat_expression(
193        &self,
194        options: &dyn Any,
195        expr: &Expression,
196        stat: Stat,
197        catalog: &dyn StatsCatalog,
198    ) -> Option<Expression>;
199
200    fn return_dtype(&self, options: &dyn Any, arg_types: &[DType]) -> VortexResult<DType>;
201    fn execute(&self, options: &dyn Any, args: &ExecutionArgs) -> VortexResult<Datum>;
202}
203
204#[repr(transparent)]
205pub struct ScalarFnVTableAdapter<V>(V);
206impl<V: VTable> DynScalarFnVTable for ScalarFnVTableAdapter<V> {
207    fn as_any(&self) -> &dyn Any {
208        &self.0
209    }
210
211    fn id(&self) -> FunctionId {
212        V::id(&self.0)
213    }
214
215    fn options_serialize(&self, options: &dyn Any) -> VortexResult<Option<Vec<u8>>> {
216        V::serialize(&self.0, downcast::<V>(options))
217    }
218
219    fn options_deserialize(&self, data: &[u8]) -> VortexResult<Box<dyn Any + Send + Sync>> {
220        Ok(Box::new(V::deserialize(&self.0, data)?))
221    }
222
223    fn options_clone(&self, options: &dyn Any) -> Box<dyn Any + Send + Sync> {
224        Box::new(downcast::<V>(options).clone())
225    }
226
227    fn options_eq(&self, a: &dyn Any, b: &dyn Any) -> bool {
228        downcast::<V>(a) == downcast::<V>(b)
229    }
230
231    fn options_hash(&self, options: &dyn Any, mut hasher: &mut dyn Hasher) {
232        downcast::<V>(options).hash(&mut hasher);
233    }
234
235    fn options_display(&self, options: &dyn Any, f: &mut Formatter) -> fmt::Result {
236        fmt::Display::fmt(downcast::<V>(options), f)
237    }
238
239    fn options_debug(&self, options: &dyn Any, f: &mut Formatter) -> fmt::Result {
240        fmt::Debug::fmt(downcast::<V>(options), f)
241    }
242
243    fn arity(&self, options: &dyn Any) -> Arity {
244        V::arity(&self.0, downcast::<V>(options))
245    }
246
247    fn arg_name(&self, options: &dyn Any, arg_idx: usize) -> ArgName {
248        V::arg_name(&self.0, downcast::<V>(options), arg_idx)
249    }
250
251    fn null_handling(&self, options: &dyn Any) -> NullHandling {
252        V::null_handling(&self.0, downcast::<V>(options))
253    }
254
255    fn stat_falsification(
256        &self,
257        options: &dyn Any,
258        expr: &Expression,
259        catalog: &dyn StatsCatalog,
260    ) -> Option<Expression> {
261        V::stat_falsification(&self.0, downcast::<V>(options), expr, catalog)
262    }
263
264    fn stat_expression(
265        &self,
266        options: &dyn Any,
267        expr: &Expression,
268        stat: Stat,
269        catalog: &dyn StatsCatalog,
270    ) -> Option<Expression> {
271        V::stat_expression(&self.0, downcast::<V>(options), expr, stat, catalog)
272    }
273
274    fn return_dtype(&self, options: &dyn Any, arg_types: &[DType]) -> VortexResult<DType> {
275        V::return_dtype(&self.0, downcast::<V>(options), arg_types)
276    }
277
278    fn execute(&self, options: &dyn Any, args: &ExecutionArgs) -> VortexResult<Datum> {
279        // TODO(ngates): validate result matches expected dtype from ctx.
280        V::execute(&self.0, downcast::<V>(options), args)
281    }
282}
283
284fn downcast<V: VTable>(options: &dyn Any) -> &V::Options {
285    options
286        .downcast_ref::<V::Options>()
287        .vortex_expect("Invalid options type for scalar function")
288}
289
290/// A vtable for scalar functions, registered against a VortexSession.
291#[derive(Clone)]
292pub struct ScalarFnVTable(ArcRef<dyn DynScalarFnVTable>);
293
294impl ScalarFnVTable {
295    /// Creates a ScalarFnVTable from a VTable implementation.
296    pub fn new<F: VTable>(vtable: F) -> Self {
297        Self(ArcRef::new_arc(Arc::new(ScalarFnVTableAdapter(vtable))))
298    }
299
300    /// Creates a ScalarFnVTable from a 'static reference to a VTable.
301    pub fn new_static<F: VTable>(vtable: &'static F) -> Self {
302        // SAFETY: this transmute is safe since ScalarFnVTableAdapter is transparent over F.
303        let adapter: &'static ScalarFnVTableAdapter<F> =
304            unsafe { transmute::<&'static F, &'static ScalarFnVTableAdapter<F>>(vtable) };
305        Self(ArcRef::new_ref(adapter))
306    }
307
308    /// Crate-local function for accessing the underlying vtable.
309    pub(crate) fn as_dyn(&self) -> &dyn DynScalarFnVTable {
310        self.0.deref()
311    }
312
313    pub fn id(&self) -> FunctionId {
314        self.0.id()
315    }
316
317    pub fn as_any(&self) -> &dyn Any {
318        self.0.deref().as_any()
319    }
320
321    pub fn deserialize(&self, bytes: &[u8]) -> VortexResult<ScalarFn> {
322        let options = self.0.options_deserialize(bytes)?;
323        // SAFETY: options were created by this vtable.
324        Ok(unsafe { ScalarFn::new_unchecked(self.clone(), options) })
325    }
326}
327
328impl fmt::Debug for ScalarFnVTable {
329    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
330        f.debug_struct("ScalarFnVTable")
331            .field("id", &self.id())
332            .finish()
333    }
334}
335
336/// An empty options type for functions that do not require any configuration.
337#[derive(Default, Clone, Debug, PartialEq, Eq, Hash)]
338pub struct EmptyOptions;
339impl fmt::Display for EmptyOptions {
340    fn fmt(&self, _f: &mut Formatter<'_>) -> fmt::Result {
341        Ok(())
342    }
343}
344
345pub trait ScalarFnVTableExt: VTable {
346    /// Creates a new ScalarFn instance with the given options.
347    fn new_fn(self, options: Self::Options) -> ScalarFn {
348        ScalarFn::new(self, options)
349    }
350
351    /// Creates a new ScalarFn instance with the given options from a 'static vtable.
352    fn new_fn_static(&'static self, options: Self::Options) -> ScalarFn {
353        ScalarFn::new_static(self, options)
354    }
355}
356impl<V: VTable> ScalarFnVTableExt for V {}