Skip to main content

vortex_array/arrays/varbinview/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::mem::size_of;
6use std::sync::Arc;
7
8use kernel::PARENT_KERNELS;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_session::VortexSession;
16
17use crate::ArrayRef;
18use crate::EmptyMetadata;
19use crate::ExecutionCtx;
20use crate::ExecutionResult;
21use crate::Precision;
22use crate::arrays::VarBinViewArray;
23use crate::arrays::varbinview::BinaryView;
24use crate::arrays::varbinview::compute::rules::PARENT_RULES;
25use crate::buffer::BufferHandle;
26use crate::dtype::DType;
27use crate::hash::ArrayEq;
28use crate::hash::ArrayHash;
29use crate::serde::ArrayChildren;
30use crate::stats::StatsSetRef;
31use crate::validity::Validity;
32use crate::vtable;
33use crate::vtable::ArrayId;
34use crate::vtable::VTable;
35use crate::vtable::ValidityVTableFromValidityHelper;
36use crate::vtable::validity_nchildren;
37use crate::vtable::validity_to_child;
38mod kernel;
39mod operations;
40mod validity;
41vtable!(VarBinView);
42
43#[derive(Clone, Debug)]
44pub struct VarBinView;
45
46impl VarBinView {
47    pub const ID: ArrayId = ArrayId::new_ref("vortex.varbinview");
48}
49
50impl VTable for VarBinView {
51    type Array = VarBinViewArray;
52
53    type Metadata = EmptyMetadata;
54    type OperationsVTable = Self;
55    type ValidityVTable = ValidityVTableFromValidityHelper;
56    fn vtable(_array: &Self::Array) -> &Self {
57        &VarBinView
58    }
59
60    fn id(&self) -> ArrayId {
61        Self::ID
62    }
63
64    fn len(array: &VarBinViewArray) -> usize {
65        array.views_handle().len() / size_of::<BinaryView>()
66    }
67
68    fn dtype(array: &VarBinViewArray) -> &DType {
69        &array.dtype
70    }
71
72    fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
73        array.stats_set.to_ref(array.as_ref())
74    }
75
76    fn array_hash<H: std::hash::Hasher>(
77        array: &VarBinViewArray,
78        state: &mut H,
79        precision: Precision,
80    ) {
81        array.dtype.hash(state);
82        for buffer in array.buffers.iter() {
83            buffer.array_hash(state, precision);
84        }
85        array.views.array_hash(state, precision);
86        array.validity.array_hash(state, precision);
87    }
88
89    fn array_eq(array: &VarBinViewArray, other: &VarBinViewArray, precision: Precision) -> bool {
90        array.dtype == other.dtype
91            && array.buffers.len() == other.buffers.len()
92            && array
93                .buffers
94                .iter()
95                .zip(other.buffers.iter())
96                .all(|(a, b)| a.array_eq(b, precision))
97            && array.views.array_eq(&other.views, precision)
98            && array.validity.array_eq(&other.validity, precision)
99    }
100
101    fn nbuffers(array: &VarBinViewArray) -> usize {
102        array.buffers().len() + 1
103    }
104
105    fn buffer(array: &VarBinViewArray, idx: usize) -> BufferHandle {
106        let ndata = array.buffers().len();
107        if idx < ndata {
108            array.buffers()[idx].clone()
109        } else if idx == ndata {
110            array.views_handle().clone()
111        } else {
112            vortex_panic!("VarBinViewArray buffer index {idx} out of bounds")
113        }
114    }
115
116    fn buffer_name(array: &VarBinViewArray, idx: usize) -> Option<String> {
117        let ndata = array.buffers().len();
118        if idx < ndata {
119            Some(format!("buffer_{idx}"))
120        } else if idx == ndata {
121            Some("views".to_string())
122        } else {
123            vortex_panic!("VarBinViewArray buffer_name index {idx} out of bounds")
124        }
125    }
126
127    fn nchildren(array: &VarBinViewArray) -> usize {
128        validity_nchildren(&array.validity)
129    }
130
131    fn child(array: &VarBinViewArray, idx: usize) -> ArrayRef {
132        match idx {
133            0 => validity_to_child(&array.validity, array.len())
134                .vortex_expect("VarBinViewArray validity child out of bounds"),
135            _ => vortex_panic!("VarBinViewArray child index {idx} out of bounds"),
136        }
137    }
138
139    fn child_name(_array: &VarBinViewArray, idx: usize) -> String {
140        match idx {
141            0 => "validity".to_string(),
142            _ => vortex_panic!("VarBinViewArray child_name index {idx} out of bounds"),
143        }
144    }
145
146    fn metadata(_array: &VarBinViewArray) -> VortexResult<Self::Metadata> {
147        Ok(EmptyMetadata)
148    }
149
150    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
151        Ok(Some(vec![]))
152    }
153
154    fn deserialize(
155        _bytes: &[u8],
156        _dtype: &DType,
157        _len: usize,
158        _buffers: &[BufferHandle],
159        _session: &VortexSession,
160    ) -> VortexResult<Self::Metadata> {
161        Ok(EmptyMetadata)
162    }
163
164    fn build(
165        dtype: &DType,
166        len: usize,
167        _metadata: &Self::Metadata,
168        buffers: &[BufferHandle],
169        children: &dyn ArrayChildren,
170    ) -> VortexResult<VarBinViewArray> {
171        let Some((views_handle, data_handles)) = buffers.split_last() else {
172            vortex_bail!("Expected at least 1 buffer, got 0");
173        };
174
175        let validity = if children.is_empty() {
176            Validity::from(dtype.nullability())
177        } else if children.len() == 1 {
178            let validity = children.get(0, &Validity::DTYPE, len)?;
179            Validity::Array(validity)
180        } else {
181            vortex_bail!("Expected 0 or 1 children, got {}", children.len());
182        };
183
184        let views_nbytes = views_handle.len();
185        let expected_views_nbytes = len
186            .checked_mul(size_of::<BinaryView>())
187            .ok_or_else(|| vortex_err!("views byte length overflow for len={len}"))?;
188        if views_nbytes != expected_views_nbytes {
189            vortex_bail!(
190                "Expected views buffer length {} bytes, got {} bytes",
191                expected_views_nbytes,
192                views_nbytes
193            );
194        }
195
196        // If any buffer is on device, skip host validation and use try_new_handle.
197        if buffers.iter().any(|b| b.is_on_device()) {
198            return VarBinViewArray::try_new_handle(
199                views_handle.clone(),
200                Arc::from(data_handles.to_vec()),
201                dtype.clone(),
202                validity,
203            );
204        }
205
206        let data_buffers = data_handles
207            .iter()
208            .map(|b| b.as_host().clone())
209            .collect::<Vec<_>>();
210        let views = Buffer::<BinaryView>::from_byte_buffer(views_handle.clone().as_host().clone());
211
212        VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity)
213    }
214
215    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
216        match children.len() {
217            0 => {}
218            1 => {
219                let [validity]: [ArrayRef; 1] = children
220                    .try_into()
221                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
222                array.validity = Validity::Array(validity);
223            }
224            _ => vortex_bail!(
225                "VarBinViewArray expects 0 or 1 children (validity?), got {}",
226                children.len()
227            ),
228        }
229        Ok(())
230    }
231
232    fn reduce_parent(
233        array: &Self::Array,
234        parent: &ArrayRef,
235        child_idx: usize,
236    ) -> VortexResult<Option<ArrayRef>> {
237        PARENT_RULES.evaluate(array, parent, child_idx)
238    }
239
240    fn execute_parent(
241        array: &Self::Array,
242        parent: &ArrayRef,
243        child_idx: usize,
244        ctx: &mut ExecutionCtx,
245    ) -> VortexResult<Option<ArrayRef>> {
246        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
247    }
248
249    fn execute(array: Arc<Self::Array>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
250        Ok(ExecutionResult::done_upcast::<Self>(array))
251    }
252}