vortex_array/arrays/dict/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::BufferHandle;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_err;
11
12use super::DictArray;
13use super::DictMetadata;
14use crate::DeserializeMetadata;
15use crate::ProstMetadata;
16use crate::SerializeMetadata;
17use crate::serde::ArrayChildren;
18use crate::vtable;
19use crate::vtable::ArrayId;
20use crate::vtable::ArrayVTable;
21use crate::vtable::ArrayVTableExt;
22use crate::vtable::NotSupported;
23use crate::vtable::VTable;
24
25mod array;
26mod canonical;
27mod encode;
28mod operations;
29mod validity;
30mod visitor;
31
32vtable!(Dict);
33
34#[derive(Debug)]
35pub struct DictVTable;
36
37impl VTable for DictVTable {
38    type Array = DictArray;
39
40    type Metadata = ProstMetadata<DictMetadata>;
41
42    type ArrayVTable = Self;
43    type CanonicalVTable = Self;
44    type OperationsVTable = Self;
45    type ValidityVTable = Self;
46    type VisitorVTable = Self;
47    type ComputeVTable = NotSupported;
48    type EncodeVTable = Self;
49
50    fn id(&self) -> ArrayId {
51        ArrayId::new_ref("vortex.dict")
52    }
53
54    fn encoding(_array: &Self::Array) -> ArrayVTable {
55        DictVTable.as_vtable()
56    }
57
58    fn metadata(array: &DictArray) -> VortexResult<Self::Metadata> {
59        Ok(ProstMetadata(DictMetadata {
60            codes_ptype: PType::try_from(array.codes().dtype())? as i32,
61            values_len: u32::try_from(array.values().len()).map_err(|_| {
62                vortex_err!(
63                    "Dictionary values size {} overflowed u32",
64                    array.values().len()
65                )
66            })?,
67            is_nullable_codes: Some(array.codes().dtype().is_nullable()),
68            all_values_referenced: Some(array.all_values_referenced),
69        }))
70    }
71
72    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
73        Ok(Some(metadata.serialize()))
74    }
75
76    fn deserialize(buffer: &[u8]) -> VortexResult<Self::Metadata> {
77        let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(buffer)?;
78        Ok(ProstMetadata(metadata))
79    }
80
81    fn build(
82        &self,
83        dtype: &DType,
84        len: usize,
85        metadata: &Self::Metadata,
86        _buffers: &[BufferHandle],
87        children: &dyn ArrayChildren,
88    ) -> VortexResult<DictArray> {
89        if children.len() != 2 {
90            vortex_bail!(
91                "Expected 2 children for dict encoding, found {}",
92                children.len()
93            )
94        }
95        let codes_nullable = metadata
96            .is_nullable_codes
97            .map(Nullability::from)
98            // If no `is_nullable_codes` metadata use the nullability of the values
99            // (and whole array) as before.
100            .unwrap_or_else(|| dtype.nullability());
101        let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable);
102        let codes = children.get(0, &codes_dtype, len)?;
103        let values = children.get(1, dtype, metadata.values_len as usize)?;
104        let all_values_referenced = metadata.all_values_referenced.unwrap_or(false);
105
106        // SAFETY: We've validated the metadata and children.
107        Ok(unsafe {
108            DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
109        })
110    }
111}