vortex_array/vtable/
operator.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::{VortexResult, vortex_bail};
5use vortex_mask::Mask;
6use vortex_vector::Vector;
7
8use crate::ArrayRef;
9use crate::array::IntoArray;
10use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
11use crate::pipeline::PipelinedNode;
12use crate::vtable::{NotSupported, VTable};
13
14/// A vtable for the new operator-based array functionality. Eventually this vtable will be
15/// merged into the main `VTable`, but for now it is kept separate to allow for incremental
16/// adoption of the new operator framework.
17///
18/// See <https://github.com/vortex-data/vortex/pull/4726> for the operators RFC.
19pub trait OperatorVTable<V: VTable> {
20    /// Returns a canonical [`Vector`] containing the rows indicated by the given selection [`Mask`].
21    ///
22    /// The returned vector must be the appropriate one for the array's logical type (they are
23    /// one-to-one with Vortex `DType`s), and should respect the output nullability of the array.
24    ///
25    /// Debug builds will panic if the returned vector is of the wrong type, wrong length, or
26    /// incorrectly contains null values.
27    ///
28    /// Implementations should recursively call [`crate::ArrayOperator::execute_batch`] on child
29    /// arrays as needed.
30    // NOTE(ngates): in the future, we will add pipeline_execute to process chunks of 1k rows at
31    //  a time.
32    // TODO(ngates): we should fix array vtables such that we can take the array by ownership. This
33    //  allows for more efficient in-place compute, as well as avoids allocating additional memory
34    //  if the array's own memory can be reused by some reasonable allocator.
35    fn execute_batch(
36        array: &V::Array,
37        selection: &Mask,
38        _ctx: &mut dyn ExecutionCtx,
39    ) -> VortexResult<Vector> {
40        Self::bind(array, Some(&selection.clone().into_array()), &mut ())?.execute()
41    }
42
43    /// Downcast this array into a [`PipelinedNode`] if it supports pipelined execution.
44    ///
45    /// Each node is either a source node or a transformation node.
46    fn pipeline_node(_array: &V::Array) -> Option<&dyn PipelinedNode> {
47        None
48    }
49
50    /// Bind the array for execution in batch mode.
51    ///
52    /// This function should return a [`BatchKernelRef`] that can be used to execute the array in
53    /// batch mode.
54    ///
55    /// The selection parameter is a non-nullable boolean array that indicates which rows to
56    /// return. i.e. the result of the kernel should be a vector whose length is equal to the
57    /// true count of the selection array.
58    ///
59    /// The context should be used to bind child arrays in order to support common subtree
60    /// elimination. See also the utility functions on the `BindCtx` for efficiently extracting
61    /// common objects such as a [`vortex_mask::Mask`].
62    fn bind(
63        array: &V::Array,
64        _selection: Option<&ArrayRef>,
65        _ctx: &mut dyn BindCtx,
66    ) -> VortexResult<BatchKernelRef> {
67        vortex_bail!(
68            "Bind is not yet implemented for {} arrays",
69            array.encoding_id()
70        )
71    }
72
73    /// Attempt to optimize this array by analyzing its children.
74    ///
75    /// For example, if all the children are constant, this function should perform constant
76    /// folding and return a constant operator.
77    ///
78    /// This function should typically be implemented only for self-contained optimizations based
79    /// on child properties.
80    ///
81    /// Returns `None` if no optimization is possible.
82    fn reduce_children(_array: &V::Array) -> VortexResult<Option<ArrayRef>> {
83        Ok(None)
84    }
85
86    /// Attempt to push down a parent array through this node.
87    ///
88    /// The `child_idx` parameter indicates which child of the parent this array occupies.
89    /// For example, if the parent is a binary array, and this array is the left child,
90    /// then `child_idx` will be 0. If this array is the right child, then `child_idx` will be 1.
91    ///
92    /// The returned array will replace the parent in the tree.
93    ///
94    /// This function should typically be implemented for cross-array optimizations where the
95    /// child needs to adapt to the parent's requirements.
96    ///
97    /// Returns `None` if no optimization is possible.
98    fn reduce_parent(
99        _array: &V::Array,
100        _parent: &ArrayRef,
101        _child_idx: usize,
102    ) -> VortexResult<Option<ArrayRef>> {
103        Ok(None)
104    }
105}
106
107impl<V: VTable> OperatorVTable<V> for NotSupported {
108    fn bind(
109        array: &V::Array,
110        _selection: Option<&ArrayRef>,
111        _ctx: &mut dyn BindCtx,
112    ) -> VortexResult<BatchKernelRef> {
113        vortex_bail!(
114            "Pipeline execution not supported for this encoding: {:?}",
115            array.encoding_id(),
116        )
117    }
118}