vortex_array/arrays/varbin/
serde.rs

1use arrow_schema::DataType;
2use vortex_dtype::{DType, Nullability, PType};
3use vortex_error::{VortexExpect, VortexResult, vortex_bail};
4
5use super::VarBinEncoding;
6use crate::arrays::VarBinArray;
7use crate::arrow::{FromArrowArray, IntoArrowArray};
8use crate::serde::ArrayParts;
9use crate::validity::Validity;
10use crate::vtable::EncodingVTable;
11use crate::{
12    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayContext, ArrayRef, ArrayVisitorImpl,
13    Canonical, DeserializeMetadata, EncodingId, IntoArray, ProstMetadata,
14};
15
16#[derive(Clone, prost::Message)]
17pub struct VarBinMetadata {
18    #[prost(enumeration = "PType", tag = "1")]
19    pub(crate) offsets_ptype: i32,
20}
21
22impl EncodingVTable for VarBinEncoding {
23    fn id(&self) -> EncodingId {
24        EncodingId::new_ref("vortex.varbin")
25    }
26
27    fn decode(
28        &self,
29        parts: &ArrayParts,
30        ctx: &ArrayContext,
31        dtype: DType,
32        len: usize,
33    ) -> VortexResult<ArrayRef> {
34        let metadata = ProstMetadata::<VarBinMetadata>::deserialize(parts.metadata())?;
35
36        let validity = if parts.nchildren() == 1 {
37            Validity::from(dtype.nullability())
38        } else if parts.nchildren() == 2 {
39            let validity = parts.child(1).decode(ctx, Validity::DTYPE, len)?;
40            Validity::Array(validity)
41        } else {
42            vortex_bail!("Expected 1 or 2 children, got {}", parts.nchildren());
43        };
44
45        let offsets = parts.child(0).decode(
46            ctx,
47            DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable),
48            len + 1,
49        )?;
50
51        if parts.nbuffers() != 1 {
52            vortex_bail!("Expected 1 buffer, got {}", parts.nbuffers());
53        }
54        let bytes = parts.buffer(0)?;
55
56        Ok(VarBinArray::try_new(offsets, bytes, dtype, validity)?.into_array())
57    }
58
59    fn encode(
60        &self,
61        input: &Canonical,
62        _like: Option<&dyn Array>,
63    ) -> VortexResult<Option<ArrayRef>> {
64        let arrow_array = input.clone().into_array().into_arrow_preferred()?;
65        let array = match arrow_array.data_type() {
66            DataType::Utf8View => arrow_cast::cast(arrow_array.as_ref(), &DataType::Utf8)?,
67            DataType::BinaryView => arrow_cast::cast(arrow_array.as_ref(), &DataType::Binary)?,
68            _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
69        };
70        Ok(Some(ArrayRef::from_arrow(
71            array,
72            input.as_ref().dtype().nullability().into(),
73        )))
74    }
75}
76
77impl ArrayVisitorImpl<ProstMetadata<VarBinMetadata>> for VarBinArray {
78    fn _visit_buffers(&self, visitor: &mut dyn ArrayBufferVisitor) {
79        visitor.visit_buffer(self.bytes()); // TODO(ngates): sliced bytes?
80    }
81
82    fn _visit_children(&self, visitor: &mut dyn ArrayChildVisitor) {
83        visitor.visit_child("offsets", self.offsets());
84        visitor.visit_validity(self.validity(), self.len());
85    }
86
87    fn _metadata(&self) -> ProstMetadata<VarBinMetadata> {
88        ProstMetadata(VarBinMetadata {
89            offsets_ptype: PType::try_from(self.offsets().dtype())
90                .vortex_expect("Must be a valid PType") as i32,
91        })
92    }
93}