vortex_array/arrays/dict/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_compute::take::Take;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_vector::Vector;
13
14use super::DictArray;
15use super::DictMetadata;
16use crate::ArrayRef;
17use crate::DeserializeMetadata;
18use crate::ProstMetadata;
19use crate::SerializeMetadata;
20use crate::VectorExecutor;
21use crate::arrays::vtable::rules::PARENT_RULES;
22use crate::buffer::BufferHandle;
23use crate::executor::ExecutionCtx;
24use crate::serde::ArrayChildren;
25use crate::vtable;
26use crate::vtable::ArrayId;
27use crate::vtable::ArrayVTable;
28use crate::vtable::ArrayVTableExt;
29use crate::vtable::NotSupported;
30use crate::vtable::VTable;
31
32mod array;
33mod canonical;
34mod encode;
35mod operations;
36mod rules;
37mod validity;
38mod visitor;
39
40vtable!(Dict);
41
42#[derive(Debug)]
43pub struct DictVTable;
44
45impl VTable for DictVTable {
46    type Array = DictArray;
47
48    type Metadata = ProstMetadata<DictMetadata>;
49
50    type ArrayVTable = Self;
51    type CanonicalVTable = Self;
52    type OperationsVTable = Self;
53    type ValidityVTable = Self;
54    type VisitorVTable = Self;
55    type ComputeVTable = NotSupported;
56    type EncodeVTable = Self;
57
58    fn id(&self) -> ArrayId {
59        ArrayId::new_ref("vortex.dict")
60    }
61
62    fn encoding(_array: &Self::Array) -> ArrayVTable {
63        DictVTable.as_vtable()
64    }
65
66    fn metadata(array: &DictArray) -> VortexResult<Self::Metadata> {
67        Ok(ProstMetadata(DictMetadata {
68            codes_ptype: PType::try_from(array.codes().dtype())? as i32,
69            values_len: u32::try_from(array.values().len()).map_err(|_| {
70                vortex_err!(
71                    "Dictionary values size {} overflowed u32",
72                    array.values().len()
73                )
74            })?,
75            is_nullable_codes: Some(array.codes().dtype().is_nullable()),
76            all_values_referenced: Some(array.all_values_referenced),
77        }))
78    }
79
80    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
81        Ok(Some(metadata.serialize()))
82    }
83
84    fn deserialize(buffer: &[u8]) -> VortexResult<Self::Metadata> {
85        let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(buffer)?;
86        Ok(ProstMetadata(metadata))
87    }
88
89    fn build(
90        &self,
91        dtype: &DType,
92        len: usize,
93        metadata: &Self::Metadata,
94        _buffers: &[BufferHandle],
95        children: &dyn ArrayChildren,
96    ) -> VortexResult<DictArray> {
97        if children.len() != 2 {
98            vortex_bail!(
99                "Expected 2 children for dict encoding, found {}",
100                children.len()
101            )
102        }
103        let codes_nullable = metadata
104            .is_nullable_codes
105            .map(Nullability::from)
106            // If no `is_nullable_codes` metadata use the nullability of the values
107            // (and whole array) as before.
108            .unwrap_or_else(|| dtype.nullability());
109        let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
110        let codes = children.get(0, &codes_dtype, len)?;
111        let values = children.get(1, dtype, metadata.values_len as usize)?;
112        let all_values_referenced = metadata.all_values_referenced.unwrap_or(false);
113
114        // SAFETY: We've validated the metadata and children.
115        Ok(unsafe {
116            DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
117        })
118    }
119
120    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
121        vortex_ensure!(
122            children.len() == 2,
123            "DictArray expects exactly 2 children (codes, values), got {}",
124            children.len()
125        );
126        let [codes, values]: [ArrayRef; 2] = children
127            .try_into()
128            .map_err(|_| vortex_err!("Failed to convert children to array"))?;
129        array.codes = codes;
130        array.values = values;
131        Ok(())
132    }
133
134    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
135        let values = array.values().execute(ctx)?;
136        let codes = array.codes().execute(ctx)?.into_primitive();
137        Ok(values.take(&codes))
138    }
139
140    fn reduce_parent(
141        array: &Self::Array,
142        parent: &ArrayRef,
143        child_idx: usize,
144    ) -> VortexResult<Option<ArrayRef>> {
145        PARENT_RULES.evaluate(array, parent, child_idx)
146    }
147}