vortex_array/arrays/varbin/
serde.rs1use arrow_schema::DataType;
2use vortex_dtype::{DType, Nullability, PType};
3use vortex_error::{VortexExpect, VortexResult, vortex_bail};
4
5use super::VarBinEncoding;
6use crate::arrays::VarBinArray;
7use crate::arrow::{FromArrowArray, IntoArrowArray};
8use crate::serde::ArrayParts;
9use crate::validity::Validity;
10use crate::vtable::EncodingVTable;
11use crate::{
12 Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayContext, ArrayRef, ArrayVisitorImpl,
13 Canonical, DeserializeMetadata, EncodingId, IntoArray, ProstMetadata,
14};
15
16#[derive(Clone, prost::Message)]
17pub struct VarBinMetadata {
18 #[prost(enumeration = "PType", tag = "1")]
19 pub(crate) offsets_ptype: i32,
20}
21
22impl EncodingVTable for VarBinEncoding {
23 fn id(&self) -> EncodingId {
24 EncodingId::new_ref("vortex.varbin")
25 }
26
27 fn decode(
28 &self,
29 parts: &ArrayParts,
30 ctx: &ArrayContext,
31 dtype: DType,
32 len: usize,
33 ) -> VortexResult<ArrayRef> {
34 let metadata = ProstMetadata::<VarBinMetadata>::deserialize(parts.metadata())?;
35
36 let validity = if parts.nchildren() == 1 {
37 Validity::from(dtype.nullability())
38 } else if parts.nchildren() == 2 {
39 let validity = parts.child(1).decode(ctx, Validity::DTYPE, len)?;
40 Validity::Array(validity)
41 } else {
42 vortex_bail!("Expected 1 or 2 children, got {}", parts.nchildren());
43 };
44
45 let offsets = parts.child(0).decode(
46 ctx,
47 DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable),
48 len + 1,
49 )?;
50
51 if parts.nbuffers() != 1 {
52 vortex_bail!("Expected 1 buffer, got {}", parts.nbuffers());
53 }
54 let bytes = parts.buffer(0)?;
55
56 Ok(VarBinArray::try_new(offsets, bytes, dtype, validity)?.into_array())
57 }
58
59 fn encode(
60 &self,
61 input: &Canonical,
62 _like: Option<&dyn Array>,
63 ) -> VortexResult<Option<ArrayRef>> {
64 let arrow_array = input.clone().into_array().into_arrow_preferred()?;
65 let array = match arrow_array.data_type() {
66 DataType::Utf8View => arrow_cast::cast(arrow_array.as_ref(), &DataType::Utf8)?,
67 DataType::BinaryView => arrow_cast::cast(arrow_array.as_ref(), &DataType::Binary)?,
68 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
69 };
70 Ok(Some(ArrayRef::from_arrow(
71 array,
72 input.as_ref().dtype().nullability().into(),
73 )))
74 }
75}
76
77impl ArrayVisitorImpl<ProstMetadata<VarBinMetadata>> for VarBinArray {
78 fn _visit_buffers(&self, visitor: &mut dyn ArrayBufferVisitor) {
79 visitor.visit_buffer(self.bytes()); }
81
82 fn _visit_children(&self, visitor: &mut dyn ArrayChildVisitor) {
83 visitor.visit_child("offsets", self.offsets());
84 visitor.visit_validity(self.validity(), self.len());
85 }
86
87 fn _metadata(&self) -> ProstMetadata<VarBinMetadata> {
88 ProstMetadata(VarBinMetadata {
89 offsets_ptype: PType::try_from(self.offsets().dtype())
90 .vortex_expect("Must be a valid PType") as i32,
91 })
92 }
93}