vortex_array/arrays/varbin/
serde.rs

1use std::fmt::Debug;
2
3use arrow_schema::DataType;
4use vortex_dtype::{DType, Nullability, PType};
5use vortex_error::{VortexExpect, VortexResult, vortex_bail};
6
7use super::VarBinEncoding;
8use crate::arrays::VarBinArray;
9use crate::arrow::{FromArrowArray, IntoArrowArray};
10use crate::serde::ArrayParts;
11use crate::validity::Validity;
12use crate::vtable::EncodingVTable;
13use crate::{
14    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayContext, ArrayRef, ArrayVisitorImpl,
15    Canonical, DeserializeMetadata, EncodingId, IntoArray, RkyvMetadata,
16};
17
18#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
19pub struct VarBinMetadata {
20    pub(crate) offsets_ptype: PType,
21}
22
23impl EncodingVTable for VarBinEncoding {
24    fn id(&self) -> EncodingId {
25        EncodingId::new_ref("vortex.varbin")
26    }
27
28    fn decode(
29        &self,
30        parts: &ArrayParts,
31        ctx: &ArrayContext,
32        dtype: DType,
33        len: usize,
34    ) -> VortexResult<ArrayRef> {
35        let metadata = RkyvMetadata::<VarBinMetadata>::deserialize(parts.metadata())?;
36
37        let validity = if parts.nchildren() == 1 {
38            Validity::from(dtype.nullability())
39        } else if parts.nchildren() == 2 {
40            let validity = parts.child(1).decode(ctx, Validity::DTYPE, len)?;
41            Validity::Array(validity)
42        } else {
43            vortex_bail!("Expected 1 or 2 children, got {}", parts.nchildren());
44        };
45
46        let offsets = parts.child(0).decode(
47            ctx,
48            DType::Primitive(metadata.offsets_ptype, Nullability::NonNullable),
49            len + 1,
50        )?;
51
52        if parts.nbuffers() != 1 {
53            vortex_bail!("Expected 1 buffer, got {}", parts.nbuffers());
54        }
55        let bytes = parts.buffer(0)?;
56
57        Ok(VarBinArray::try_new(offsets, bytes, dtype, validity)?.into_array())
58    }
59
60    fn encode(
61        &self,
62        input: &Canonical,
63        _like: Option<&dyn Array>,
64    ) -> VortexResult<Option<ArrayRef>> {
65        let arrow_array = input.clone().into_array().into_arrow_preferred()?;
66        let array = match arrow_array.data_type() {
67            DataType::Utf8View => arrow_cast::cast(arrow_array.as_ref(), &DataType::Utf8)?,
68            DataType::BinaryView => arrow_cast::cast(arrow_array.as_ref(), &DataType::Binary)?,
69            _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
70        };
71        Ok(Some(ArrayRef::from_arrow(
72            array,
73            input.as_ref().dtype().nullability().into(),
74        )))
75    }
76}
77
78impl ArrayVisitorImpl<RkyvMetadata<VarBinMetadata>> for VarBinArray {
79    fn _visit_buffers(&self, visitor: &mut dyn ArrayBufferVisitor) {
80        visitor.visit_buffer(self.bytes()); // TODO(ngates): sliced bytes?
81    }
82
83    fn _visit_children(&self, visitor: &mut dyn ArrayChildVisitor) {
84        visitor.visit_child("offsets", self.offsets());
85        visitor.visit_validity(self.validity(), self.len());
86    }
87
88    fn _metadata(&self) -> RkyvMetadata<VarBinMetadata> {
89        RkyvMetadata(VarBinMetadata {
90            offsets_ptype: PType::try_from(self.offsets().dtype())
91                .vortex_expect("Must be a valid PType"),
92        })
93    }
94}