vortex_array/arrays/varbin/vtable/
mod.rs1use vortex_error::VortexExpect;
5use vortex_error::VortexResult;
6use vortex_error::vortex_bail;
7use vortex_error::vortex_err;
8use vortex_error::vortex_panic;
9
10use crate::ArrayRef;
11use crate::DeserializeMetadata;
12use crate::ExecutionCtx;
13use crate::ExecutionResult;
14use crate::IntoArray;
15use crate::ProstMetadata;
16use crate::SerializeMetadata;
17use crate::arrays::VarBinArray;
18use crate::buffer::BufferHandle;
19use crate::dtype::DType;
20use crate::dtype::Nullability;
21use crate::dtype::PType;
22use crate::serde::ArrayChildren;
23use crate::validity::Validity;
24use crate::vtable;
25use crate::vtable::ArrayId;
26use crate::vtable::VTable;
27use crate::vtable::ValidityVTableFromValidityHelper;
28use crate::vtable::validity_nchildren;
29use crate::vtable::validity_to_child;
30mod canonical;
31mod kernel;
32mod operations;
33mod validity;
34use std::hash::Hash;
35use std::sync::Arc;
36
37use canonical::varbin_to_canonical;
38use kernel::PARENT_KERNELS;
39use vortex_session::VortexSession;
40
41use crate::Precision;
42use crate::arrays::varbin::compute::rules::PARENT_RULES;
43use crate::hash::ArrayEq;
44use crate::hash::ArrayHash;
45use crate::stats::StatsSetRef;
46
47vtable!(VarBin);
48
49#[derive(Clone, prost::Message)]
50pub struct VarBinMetadata {
51 #[prost(enumeration = "PType", tag = "1")]
52 pub(crate) offsets_ptype: i32,
53}
54
55impl VTable for VarBin {
56 type Array = VarBinArray;
57
58 type Metadata = ProstMetadata<VarBinMetadata>;
59 type OperationsVTable = Self;
60 type ValidityVTable = ValidityVTableFromValidityHelper;
61 fn vtable(_array: &Self::Array) -> &Self {
62 &VarBin
63 }
64
65 fn id(&self) -> ArrayId {
66 Self::ID
67 }
68
69 fn len(array: &VarBinArray) -> usize {
70 array.offsets().len().saturating_sub(1)
71 }
72
73 fn dtype(array: &VarBinArray) -> &DType {
74 &array.dtype
75 }
76
77 fn stats(array: &VarBinArray) -> StatsSetRef<'_> {
78 array.stats_set.to_ref(array.as_ref())
79 }
80
81 fn array_hash<H: std::hash::Hasher>(array: &VarBinArray, state: &mut H, precision: Precision) {
82 array.dtype.hash(state);
83 array.bytes().array_hash(state, precision);
84 array.offsets().array_hash(state, precision);
85 array.validity.array_hash(state, precision);
86 }
87
88 fn array_eq(array: &VarBinArray, other: &VarBinArray, precision: Precision) -> bool {
89 array.dtype == other.dtype
90 && array.bytes().array_eq(other.bytes(), precision)
91 && array.offsets().array_eq(other.offsets(), precision)
92 && array.validity.array_eq(&other.validity, precision)
93 }
94
95 fn nbuffers(_array: &VarBinArray) -> usize {
96 1
97 }
98
99 fn buffer(array: &VarBinArray, idx: usize) -> BufferHandle {
100 match idx {
101 0 => array.bytes_handle().clone(),
102 _ => vortex_panic!("VarBinArray buffer index {idx} out of bounds"),
103 }
104 }
105
106 fn buffer_name(_array: &VarBinArray, idx: usize) -> Option<String> {
107 match idx {
108 0 => Some("bytes".to_string()),
109 _ => vortex_panic!("VarBinArray buffer_name index {idx} out of bounds"),
110 }
111 }
112
113 fn nchildren(array: &VarBinArray) -> usize {
114 1 + validity_nchildren(&array.validity)
115 }
116
117 fn child(array: &VarBinArray, idx: usize) -> ArrayRef {
118 match idx {
119 0 => array.offsets().clone(),
120 1 => validity_to_child(&array.validity, array.len())
121 .vortex_expect("VarBinArray validity child out of bounds"),
122 _ => vortex_panic!("VarBinArray child index {idx} out of bounds"),
123 }
124 }
125
126 fn child_name(_array: &VarBinArray, idx: usize) -> String {
127 match idx {
128 0 => "offsets".to_string(),
129 1 => "validity".to_string(),
130 _ => vortex_panic!("VarBinArray child_name index {idx} out of bounds"),
131 }
132 }
133
134 fn metadata(array: &VarBinArray) -> VortexResult<Self::Metadata> {
135 Ok(ProstMetadata(VarBinMetadata {
136 offsets_ptype: PType::try_from(array.offsets().dtype())
137 .vortex_expect("Must be a valid PType") as i32,
138 }))
139 }
140
141 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
142 Ok(Some(metadata.serialize()))
143 }
144
145 fn deserialize(
146 bytes: &[u8],
147 _dtype: &DType,
148 _len: usize,
149 _buffers: &[BufferHandle],
150 _session: &VortexSession,
151 ) -> VortexResult<Self::Metadata> {
152 Ok(ProstMetadata(ProstMetadata::<VarBinMetadata>::deserialize(
153 bytes,
154 )?))
155 }
156
157 fn build(
158 dtype: &DType,
159 len: usize,
160 metadata: &Self::Metadata,
161 buffers: &[BufferHandle],
162 children: &dyn ArrayChildren,
163 ) -> VortexResult<VarBinArray> {
164 let validity = if children.len() == 1 {
165 Validity::from(dtype.nullability())
166 } else if children.len() == 2 {
167 let validity = children.get(1, &Validity::DTYPE, len)?;
168 Validity::Array(validity)
169 } else {
170 vortex_bail!("Expected 1 or 2 children, got {}", children.len());
171 };
172
173 let offsets = children.get(
174 0,
175 &DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable),
176 len + 1,
177 )?;
178
179 if buffers.len() != 1 {
180 vortex_bail!("Expected 1 buffer, got {}", buffers.len());
181 }
182 let bytes = buffers[0].clone().try_to_host_sync()?;
183
184 VarBinArray::try_new(offsets, bytes, dtype.clone(), validity)
185 }
186
187 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
188 match children.len() {
189 1 => {
190 let [offsets]: [ArrayRef; 1] = children
191 .try_into()
192 .map_err(|_| vortex_err!("Failed to convert children to array"))?;
193 array.offsets = offsets;
194 }
195 2 => {
196 let [offsets, validity]: [ArrayRef; 2] = children
197 .try_into()
198 .map_err(|_| vortex_err!("Failed to convert children to array"))?;
199 array.offsets = offsets;
200 array.validity = Validity::Array(validity);
201 }
202 _ => vortex_bail!(
203 "VarBinArray expects 1 or 2 children (offsets, validity?), got {}",
204 children.len()
205 ),
206 }
207 Ok(())
208 }
209
210 fn reduce_parent(
211 array: &Self::Array,
212 parent: &ArrayRef,
213 child_idx: usize,
214 ) -> VortexResult<Option<ArrayRef>> {
215 PARENT_RULES.evaluate(array, parent, child_idx)
216 }
217
218 fn execute_parent(
219 array: &Self::Array,
220 parent: &ArrayRef,
221 child_idx: usize,
222 ctx: &mut ExecutionCtx,
223 ) -> VortexResult<Option<ArrayRef>> {
224 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
225 }
226
227 fn execute(array: Arc<Self::Array>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
228 Ok(ExecutionResult::done(
229 varbin_to_canonical(&array, ctx)?.into_array(),
230 ))
231 }
232}
233
234#[derive(Clone, Debug)]
235pub struct VarBin;
236
237impl VarBin {
238 pub const ID: ArrayId = ArrayId::new_ref("vortex.varbin");
239}