Skip to main content

vortex_array/arrays/varbin/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexExpect;
5use vortex_error::VortexResult;
6use vortex_error::vortex_bail;
7use vortex_error::vortex_err;
8use vortex_error::vortex_panic;
9
10use crate::ArrayRef;
11use crate::DeserializeMetadata;
12use crate::ExecutionCtx;
13use crate::ExecutionResult;
14use crate::IntoArray;
15use crate::ProstMetadata;
16use crate::SerializeMetadata;
17use crate::arrays::VarBinArray;
18use crate::buffer::BufferHandle;
19use crate::dtype::DType;
20use crate::dtype::Nullability;
21use crate::dtype::PType;
22use crate::serde::ArrayChildren;
23use crate::validity::Validity;
24use crate::vtable;
25use crate::vtable::ArrayId;
26use crate::vtable::VTable;
27use crate::vtable::ValidityVTableFromValidityHelper;
28use crate::vtable::validity_nchildren;
29use crate::vtable::validity_to_child;
30mod canonical;
31mod kernel;
32mod operations;
33mod validity;
34use std::hash::Hash;
35use std::sync::Arc;
36
37use canonical::varbin_to_canonical;
38use kernel::PARENT_KERNELS;
39use vortex_session::VortexSession;
40
41use crate::Precision;
42use crate::arrays::varbin::compute::rules::PARENT_RULES;
43use crate::hash::ArrayEq;
44use crate::hash::ArrayHash;
45use crate::stats::StatsSetRef;
46
47vtable!(VarBin);
48
49#[derive(Clone, prost::Message)]
50pub struct VarBinMetadata {
51    #[prost(enumeration = "PType", tag = "1")]
52    pub(crate) offsets_ptype: i32,
53}
54
55impl VTable for VarBin {
56    type Array = VarBinArray;
57
58    type Metadata = ProstMetadata<VarBinMetadata>;
59    type OperationsVTable = Self;
60    type ValidityVTable = ValidityVTableFromValidityHelper;
61    fn vtable(_array: &Self::Array) -> &Self {
62        &VarBin
63    }
64
65    fn id(&self) -> ArrayId {
66        Self::ID
67    }
68
69    fn len(array: &VarBinArray) -> usize {
70        array.offsets().len().saturating_sub(1)
71    }
72
73    fn dtype(array: &VarBinArray) -> &DType {
74        &array.dtype
75    }
76
77    fn stats(array: &VarBinArray) -> StatsSetRef<'_> {
78        array.stats_set.to_ref(array.as_ref())
79    }
80
81    fn array_hash<H: std::hash::Hasher>(array: &VarBinArray, state: &mut H, precision: Precision) {
82        array.dtype.hash(state);
83        array.bytes().array_hash(state, precision);
84        array.offsets().array_hash(state, precision);
85        array.validity.array_hash(state, precision);
86    }
87
88    fn array_eq(array: &VarBinArray, other: &VarBinArray, precision: Precision) -> bool {
89        array.dtype == other.dtype
90            && array.bytes().array_eq(other.bytes(), precision)
91            && array.offsets().array_eq(other.offsets(), precision)
92            && array.validity.array_eq(&other.validity, precision)
93    }
94
95    fn nbuffers(_array: &VarBinArray) -> usize {
96        1
97    }
98
99    fn buffer(array: &VarBinArray, idx: usize) -> BufferHandle {
100        match idx {
101            0 => array.bytes_handle().clone(),
102            _ => vortex_panic!("VarBinArray buffer index {idx} out of bounds"),
103        }
104    }
105
106    fn buffer_name(_array: &VarBinArray, idx: usize) -> Option<String> {
107        match idx {
108            0 => Some("bytes".to_string()),
109            _ => vortex_panic!("VarBinArray buffer_name index {idx} out of bounds"),
110        }
111    }
112
113    fn nchildren(array: &VarBinArray) -> usize {
114        1 + validity_nchildren(&array.validity)
115    }
116
117    fn child(array: &VarBinArray, idx: usize) -> ArrayRef {
118        match idx {
119            0 => array.offsets().clone(),
120            1 => validity_to_child(&array.validity, array.len())
121                .vortex_expect("VarBinArray validity child out of bounds"),
122            _ => vortex_panic!("VarBinArray child index {idx} out of bounds"),
123        }
124    }
125
126    fn child_name(_array: &VarBinArray, idx: usize) -> String {
127        match idx {
128            0 => "offsets".to_string(),
129            1 => "validity".to_string(),
130            _ => vortex_panic!("VarBinArray child_name index {idx} out of bounds"),
131        }
132    }
133
134    fn metadata(array: &VarBinArray) -> VortexResult<Self::Metadata> {
135        Ok(ProstMetadata(VarBinMetadata {
136            offsets_ptype: PType::try_from(array.offsets().dtype())
137                .vortex_expect("Must be a valid PType") as i32,
138        }))
139    }
140
141    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
142        Ok(Some(metadata.serialize()))
143    }
144
145    fn deserialize(
146        bytes: &[u8],
147        _dtype: &DType,
148        _len: usize,
149        _buffers: &[BufferHandle],
150        _session: &VortexSession,
151    ) -> VortexResult<Self::Metadata> {
152        Ok(ProstMetadata(ProstMetadata::<VarBinMetadata>::deserialize(
153            bytes,
154        )?))
155    }
156
157    fn build(
158        dtype: &DType,
159        len: usize,
160        metadata: &Self::Metadata,
161        buffers: &[BufferHandle],
162        children: &dyn ArrayChildren,
163    ) -> VortexResult<VarBinArray> {
164        let validity = if children.len() == 1 {
165            Validity::from(dtype.nullability())
166        } else if children.len() == 2 {
167            let validity = children.get(1, &Validity::DTYPE, len)?;
168            Validity::Array(validity)
169        } else {
170            vortex_bail!("Expected 1 or 2 children, got {}", children.len());
171        };
172
173        let offsets = children.get(
174            0,
175            &DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable),
176            len + 1,
177        )?;
178
179        if buffers.len() != 1 {
180            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
181        }
182        let bytes = buffers[0].clone().try_to_host_sync()?;
183
184        VarBinArray::try_new(offsets, bytes, dtype.clone(), validity)
185    }
186
187    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
188        match children.len() {
189            1 => {
190                let [offsets]: [ArrayRef; 1] = children
191                    .try_into()
192                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
193                array.offsets = offsets;
194            }
195            2 => {
196                let [offsets, validity]: [ArrayRef; 2] = children
197                    .try_into()
198                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
199                array.offsets = offsets;
200                array.validity = Validity::Array(validity);
201            }
202            _ => vortex_bail!(
203                "VarBinArray expects 1 or 2 children (offsets, validity?), got {}",
204                children.len()
205            ),
206        }
207        Ok(())
208    }
209
210    fn reduce_parent(
211        array: &Self::Array,
212        parent: &ArrayRef,
213        child_idx: usize,
214    ) -> VortexResult<Option<ArrayRef>> {
215        PARENT_RULES.evaluate(array, parent, child_idx)
216    }
217
218    fn execute_parent(
219        array: &Self::Array,
220        parent: &ArrayRef,
221        child_idx: usize,
222        ctx: &mut ExecutionCtx,
223    ) -> VortexResult<Option<ArrayRef>> {
224        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
225    }
226
227    fn execute(array: Arc<Self::Array>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
228        Ok(ExecutionResult::done(
229            varbin_to_canonical(&array, ctx)?.into_array(),
230        ))
231    }
232}
233
234#[derive(Clone, Debug)]
235pub struct VarBin;
236
237impl VarBin {
238    pub const ID: ArrayId = ArrayId::new_ref("vortex.varbin");
239}