Skip to main content

vortex_array/arrays/varbinview/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::mem::size_of;
6use std::sync::Arc;
7
8use kernel::PARENT_KERNELS;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_session::VortexSession;
16
17use crate::ArrayRef;
18use crate::EmptyMetadata;
19use crate::ExecutionCtx;
20use crate::Precision;
21use crate::arrays::BinaryView;
22use crate::arrays::varbinview::VarBinViewArray;
23use crate::arrays::varbinview::compute::rules::PARENT_RULES;
24use crate::buffer::BufferHandle;
25use crate::dtype::DType;
26use crate::hash::ArrayEq;
27use crate::hash::ArrayHash;
28use crate::serde::ArrayChildren;
29use crate::stats::StatsSetRef;
30use crate::validity::Validity;
31use crate::vtable;
32use crate::vtable::ArrayId;
33use crate::vtable::VTable;
34use crate::vtable::ValidityVTableFromValidityHelper;
35use crate::vtable::validity_nchildren;
36use crate::vtable::validity_to_child;
37mod kernel;
38mod operations;
39mod validity;
40vtable!(VarBinView);
41
42#[derive(Debug)]
43pub struct VarBinViewVTable;
44
45impl VarBinViewVTable {
46    pub const ID: ArrayId = ArrayId::new_ref("vortex.varbinview");
47}
48
49impl VTable for VarBinViewVTable {
50    type Array = VarBinViewArray;
51
52    type Metadata = EmptyMetadata;
53    type OperationsVTable = Self;
54    type ValidityVTable = ValidityVTableFromValidityHelper;
55    fn id(_array: &Self::Array) -> ArrayId {
56        Self::ID
57    }
58
59    fn len(array: &VarBinViewArray) -> usize {
60        array.views_handle().len() / size_of::<BinaryView>()
61    }
62
63    fn dtype(array: &VarBinViewArray) -> &DType {
64        &array.dtype
65    }
66
67    fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
68        array.stats_set.to_ref(array.as_ref())
69    }
70
71    fn array_hash<H: std::hash::Hasher>(
72        array: &VarBinViewArray,
73        state: &mut H,
74        precision: Precision,
75    ) {
76        array.dtype.hash(state);
77        for buffer in array.buffers.iter() {
78            buffer.array_hash(state, precision);
79        }
80        array.views.array_hash(state, precision);
81        array.validity.array_hash(state, precision);
82    }
83
84    fn array_eq(array: &VarBinViewArray, other: &VarBinViewArray, precision: Precision) -> bool {
85        array.dtype == other.dtype
86            && array.buffers.len() == other.buffers.len()
87            && array
88                .buffers
89                .iter()
90                .zip(other.buffers.iter())
91                .all(|(a, b)| a.array_eq(b, precision))
92            && array.views.array_eq(&other.views, precision)
93            && array.validity.array_eq(&other.validity, precision)
94    }
95
96    fn nbuffers(array: &VarBinViewArray) -> usize {
97        array.buffers().len() + 1
98    }
99
100    fn buffer(array: &VarBinViewArray, idx: usize) -> BufferHandle {
101        let ndata = array.buffers().len();
102        if idx < ndata {
103            array.buffers()[idx].clone()
104        } else if idx == ndata {
105            array.views_handle().clone()
106        } else {
107            vortex_panic!("VarBinViewArray buffer index {idx} out of bounds")
108        }
109    }
110
111    fn buffer_name(array: &VarBinViewArray, idx: usize) -> Option<String> {
112        let ndata = array.buffers().len();
113        if idx < ndata {
114            Some(format!("buffer_{idx}"))
115        } else if idx == ndata {
116            Some("views".to_string())
117        } else {
118            vortex_panic!("VarBinViewArray buffer_name index {idx} out of bounds")
119        }
120    }
121
122    fn nchildren(array: &VarBinViewArray) -> usize {
123        validity_nchildren(&array.validity)
124    }
125
126    fn child(array: &VarBinViewArray, idx: usize) -> ArrayRef {
127        match idx {
128            0 => validity_to_child(&array.validity, array.len())
129                .vortex_expect("VarBinViewArray validity child out of bounds"),
130            _ => vortex_panic!("VarBinViewArray child index {idx} out of bounds"),
131        }
132    }
133
134    fn child_name(_array: &VarBinViewArray, idx: usize) -> String {
135        match idx {
136            0 => "validity".to_string(),
137            _ => vortex_panic!("VarBinViewArray child_name index {idx} out of bounds"),
138        }
139    }
140
141    fn metadata(_array: &VarBinViewArray) -> VortexResult<Self::Metadata> {
142        Ok(EmptyMetadata)
143    }
144
145    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
146        Ok(Some(vec![]))
147    }
148
149    fn deserialize(
150        _bytes: &[u8],
151        _dtype: &DType,
152        _len: usize,
153        _buffers: &[BufferHandle],
154        _session: &VortexSession,
155    ) -> VortexResult<Self::Metadata> {
156        Ok(EmptyMetadata)
157    }
158
159    fn build(
160        dtype: &DType,
161        len: usize,
162        _metadata: &Self::Metadata,
163        buffers: &[BufferHandle],
164        children: &dyn ArrayChildren,
165    ) -> VortexResult<VarBinViewArray> {
166        let Some((views_handle, data_handles)) = buffers.split_last() else {
167            vortex_bail!("Expected at least 1 buffer, got 0");
168        };
169
170        let validity = if children.is_empty() {
171            Validity::from(dtype.nullability())
172        } else if children.len() == 1 {
173            let validity = children.get(0, &Validity::DTYPE, len)?;
174            Validity::Array(validity)
175        } else {
176            vortex_bail!("Expected 0 or 1 children, got {}", children.len());
177        };
178
179        let views_nbytes = views_handle.len();
180        let expected_views_nbytes = len
181            .checked_mul(size_of::<BinaryView>())
182            .ok_or_else(|| vortex_err!("views byte length overflow for len={len}"))?;
183        if views_nbytes != expected_views_nbytes {
184            vortex_bail!(
185                "Expected views buffer length {} bytes, got {} bytes",
186                expected_views_nbytes,
187                views_nbytes
188            );
189        }
190
191        // If any buffer is on device, skip host validation and use try_new_handle.
192        if buffers.iter().any(|b| b.is_on_device()) {
193            return VarBinViewArray::try_new_handle(
194                views_handle.clone(),
195                Arc::from(data_handles.to_vec()),
196                dtype.clone(),
197                validity,
198            );
199        }
200
201        let data_buffers = data_handles
202            .iter()
203            .map(|b| b.as_host().clone())
204            .collect::<Vec<_>>();
205        let views = Buffer::<BinaryView>::from_byte_buffer(views_handle.clone().as_host().clone());
206
207        VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity)
208    }
209
210    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
211        match children.len() {
212            0 => {}
213            1 => {
214                let [validity]: [ArrayRef; 1] = children
215                    .try_into()
216                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
217                array.validity = Validity::Array(validity);
218            }
219            _ => vortex_bail!(
220                "VarBinViewArray expects 0 or 1 children (validity?), got {}",
221                children.len()
222            ),
223        }
224        Ok(())
225    }
226
227    fn reduce_parent(
228        array: &Self::Array,
229        parent: &ArrayRef,
230        child_idx: usize,
231    ) -> VortexResult<Option<ArrayRef>> {
232        PARENT_RULES.evaluate(array, parent, child_idx)
233    }
234
235    fn execute_parent(
236        array: &Self::Array,
237        parent: &ArrayRef,
238        child_idx: usize,
239        ctx: &mut ExecutionCtx,
240    ) -> VortexResult<Option<ArrayRef>> {
241        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
242    }
243
244    fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
245        Ok(array.to_array())
246    }
247}