vortex_array/arrays/varbin/
serde.rs1use std::fmt::Debug;
2
3use arrow_schema::DataType;
4use vortex_dtype::{DType, Nullability, PType};
5use vortex_error::{VortexExpect, VortexResult, vortex_bail};
6
7use super::VarBinEncoding;
8use crate::arrays::VarBinArray;
9use crate::arrow::{FromArrowArray, IntoArrowArray};
10use crate::serde::ArrayParts;
11use crate::validity::Validity;
12use crate::vtable::EncodingVTable;
13use crate::{
14 Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayContext, ArrayRef, ArrayVisitorImpl,
15 Canonical, DeserializeMetadata, EncodingId, IntoArray, RkyvMetadata,
16};
17
18#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
19pub struct VarBinMetadata {
20 pub(crate) offsets_ptype: PType,
21}
22
23impl EncodingVTable for VarBinEncoding {
24 fn id(&self) -> EncodingId {
25 EncodingId::new_ref("vortex.varbin")
26 }
27
28 fn decode(
29 &self,
30 parts: &ArrayParts,
31 ctx: &ArrayContext,
32 dtype: DType,
33 len: usize,
34 ) -> VortexResult<ArrayRef> {
35 let metadata = RkyvMetadata::<VarBinMetadata>::deserialize(parts.metadata())?;
36
37 let validity = if parts.nchildren() == 1 {
38 Validity::from(dtype.nullability())
39 } else if parts.nchildren() == 2 {
40 let validity = parts.child(1).decode(ctx, Validity::DTYPE, len)?;
41 Validity::Array(validity)
42 } else {
43 vortex_bail!("Expected 1 or 2 children, got {}", parts.nchildren());
44 };
45
46 let offsets = parts.child(0).decode(
47 ctx,
48 DType::Primitive(metadata.offsets_ptype, Nullability::NonNullable),
49 len + 1,
50 )?;
51
52 if parts.nbuffers() != 1 {
53 vortex_bail!("Expected 1 buffer, got {}", parts.nbuffers());
54 }
55 let bytes = parts.buffer(0)?;
56
57 Ok(VarBinArray::try_new(offsets, bytes, dtype, validity)?.into_array())
58 }
59
60 fn encode(
61 &self,
62 input: &Canonical,
63 _like: Option<&dyn Array>,
64 ) -> VortexResult<Option<ArrayRef>> {
65 let arrow_array = input.clone().into_array().into_arrow_preferred()?;
66 let array = match arrow_array.data_type() {
67 DataType::Utf8View => arrow_cast::cast(arrow_array.as_ref(), &DataType::Utf8)?,
68 DataType::BinaryView => arrow_cast::cast(arrow_array.as_ref(), &DataType::Binary)?,
69 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
70 };
71 Ok(Some(ArrayRef::from_arrow(
72 array,
73 input.as_ref().dtype().nullability().into(),
74 )))
75 }
76}
77
78impl ArrayVisitorImpl<RkyvMetadata<VarBinMetadata>> for VarBinArray {
79 fn _visit_buffers(&self, visitor: &mut dyn ArrayBufferVisitor) {
80 visitor.visit_buffer(self.bytes()); }
82
83 fn _visit_children(&self, visitor: &mut dyn ArrayChildVisitor) {
84 visitor.visit_child("offsets", self.offsets());
85 visitor.visit_validity(self.validity(), self.len());
86 }
87
88 fn _metadata(&self) -> RkyvMetadata<VarBinMetadata> {
89 RkyvMetadata(VarBinMetadata {
90 offsets_ptype: PType::try_from(self.offsets().dtype())
91 .vortex_expect("Must be a valid PType"),
92 })
93 }
94}