Skip to main content

vortex_array/arrays/dict/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use kernel::PARENT_KERNELS;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_session::VortexSession;
13
14use super::DictArray;
15use super::DictMetadata;
16use super::take_canonical;
17use crate::Array;
18use crate::ArrayRef;
19use crate::Canonical;
20use crate::DeserializeMetadata;
21use crate::IntoArray;
22use crate::ProstMetadata;
23use crate::SerializeMetadata;
24use crate::arrays::ConstantArray;
25use crate::arrays::dict::compute::rules::PARENT_RULES;
26use crate::buffer::BufferHandle;
27use crate::executor::ExecutionCtx;
28use crate::scalar::Scalar;
29use crate::serde::ArrayChildren;
30use crate::vtable;
31use crate::vtable::ArrayId;
32use crate::vtable::VTable;
33
34mod array;
35mod kernel;
36mod operations;
37mod validity;
38mod visitor;
39
40vtable!(Dict);
41
42#[derive(Debug)]
43pub struct DictVTable;
44
45impl DictVTable {
46    pub const ID: ArrayId = ArrayId::new_ref("vortex.dict");
47}
48
49impl VTable for DictVTable {
50    type Array = DictArray;
51
52    type Metadata = ProstMetadata<DictMetadata>;
53
54    type ArrayVTable = Self;
55    type OperationsVTable = Self;
56    type ValidityVTable = Self;
57    type VisitorVTable = Self;
58
59    fn id(_array: &Self::Array) -> ArrayId {
60        Self::ID
61    }
62
63    fn metadata(array: &DictArray) -> VortexResult<Self::Metadata> {
64        Ok(ProstMetadata(DictMetadata {
65            codes_ptype: PType::try_from(array.codes().dtype())? as i32,
66            values_len: u32::try_from(array.values().len()).map_err(|_| {
67                vortex_err!(
68                    "Dictionary values size {} overflowed u32",
69                    array.values().len()
70                )
71            })?,
72            is_nullable_codes: Some(array.codes().dtype().is_nullable()),
73            all_values_referenced: Some(array.all_values_referenced),
74        }))
75    }
76
77    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
78        Ok(Some(metadata.serialize()))
79    }
80
81    fn deserialize(
82        bytes: &[u8],
83        _dtype: &DType,
84        _len: usize,
85        _buffers: &[BufferHandle],
86        _session: &VortexSession,
87    ) -> VortexResult<Self::Metadata> {
88        let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(bytes)?;
89        Ok(ProstMetadata(metadata))
90    }
91
92    fn build(
93        dtype: &DType,
94        len: usize,
95        metadata: &Self::Metadata,
96        _buffers: &[BufferHandle],
97        children: &dyn ArrayChildren,
98    ) -> VortexResult<DictArray> {
99        if children.len() != 2 {
100            vortex_bail!(
101                "Expected 2 children for dict encoding, found {}",
102                children.len()
103            )
104        }
105        let codes_nullable = metadata
106            .is_nullable_codes
107            .map(Nullability::from)
108            // If no `is_nullable_codes` metadata use the nullability of the values
109            // (and whole array) as before.
110            .unwrap_or_else(|| dtype.nullability());
111        let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
112        let codes = children.get(0, &codes_dtype, len)?;
113        let values = children.get(1, dtype, metadata.values_len as usize)?;
114        let all_values_referenced = metadata.all_values_referenced.unwrap_or(false);
115
116        // SAFETY: We've validated the metadata and children.
117        Ok(unsafe {
118            DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
119        })
120    }
121
122    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
123        vortex_ensure!(
124            children.len() == 2,
125            "DictArray expects exactly 2 children (codes, values), got {}",
126            children.len()
127        );
128        let [codes, values]: [ArrayRef; 2] = children
129            .try_into()
130            .map_err(|_| vortex_err!("Failed to convert children to array"))?;
131        array.codes = codes;
132        array.values = values;
133        Ok(())
134    }
135
136    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
137        if let Some(canonical) = execute_fast_path(array, ctx)? {
138            return Ok(canonical);
139        }
140
141        // TODO(joe): if the values are constant return a constant
142        let values = array.values().clone().execute::<Canonical>(ctx)?;
143        let codes = array
144            .codes()
145            .clone()
146            .execute::<Canonical>(ctx)?
147            .into_primitive();
148
149        // TODO(ngates): if indices are sorted and unique (strict-sorted), then we should delegate to
150        //  the filter function since they're typically optimised for this case.
151        // TODO(ngates): if indices min is quite high, we could slice self and offset the indices
152        //  such that canonicalize does less work.
153
154        Ok(take_canonical(values, &codes, ctx)?.into_array())
155    }
156
157    fn reduce_parent(
158        array: &Self::Array,
159        parent: &ArrayRef,
160        child_idx: usize,
161    ) -> VortexResult<Option<ArrayRef>> {
162        PARENT_RULES.evaluate(array, parent, child_idx)
163    }
164
165    fn execute_parent(
166        array: &Self::Array,
167        parent: &ArrayRef,
168        child_idx: usize,
169        ctx: &mut ExecutionCtx,
170    ) -> VortexResult<Option<ArrayRef>> {
171        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
172    }
173}
174
175/// Check for fast-path execution conditions.
176pub(super) fn execute_fast_path(
177    array: &DictArray,
178    _ctx: &mut ExecutionCtx,
179) -> VortexResult<Option<ArrayRef>> {
180    // Empty array - nothing to do
181    if array.is_empty() {
182        let result_dtype = array
183            .dtype()
184            .union_nullability(array.codes().dtype().nullability());
185        return Ok(Some(Canonical::empty(&result_dtype).into_array()));
186    }
187
188    // All codes are null - result is all nulls
189    if array.codes.all_invalid()? {
190        return Ok(Some(
191            ConstantArray::new(Scalar::null(array.dtype().as_nullable()), array.codes.len())
192                .into_array(),
193        ));
194    }
195
196    Ok(None)
197}