Skip to main content

vortex_array/arrays/varbinview/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::mem::size_of;
6use std::sync::Arc;
7
8use kernel::PARENT_KERNELS;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_session::VortexSession;
16
17use crate::ArrayRef;
18use crate::EmptyMetadata;
19use crate::ExecutionCtx;
20use crate::ExecutionStep;
21use crate::IntoArray;
22use crate::Precision;
23use crate::arrays::VarBinViewArray;
24use crate::arrays::varbinview::BinaryView;
25use crate::arrays::varbinview::compute::rules::PARENT_RULES;
26use crate::buffer::BufferHandle;
27use crate::dtype::DType;
28use crate::hash::ArrayEq;
29use crate::hash::ArrayHash;
30use crate::serde::ArrayChildren;
31use crate::stats::StatsSetRef;
32use crate::validity::Validity;
33use crate::vtable;
34use crate::vtable::ArrayId;
35use crate::vtable::VTable;
36use crate::vtable::ValidityVTableFromValidityHelper;
37use crate::vtable::validity_nchildren;
38use crate::vtable::validity_to_child;
39mod kernel;
40mod operations;
41mod validity;
42vtable!(VarBinView);
43
44#[derive(Debug)]
45pub struct VarBinViewVTable;
46
47impl VarBinViewVTable {
48    pub const ID: ArrayId = ArrayId::new_ref("vortex.varbinview");
49}
50
51impl VTable for VarBinViewVTable {
52    type Array = VarBinViewArray;
53
54    type Metadata = EmptyMetadata;
55    type OperationsVTable = Self;
56    type ValidityVTable = ValidityVTableFromValidityHelper;
57    fn id(_array: &Self::Array) -> ArrayId {
58        Self::ID
59    }
60
61    fn len(array: &VarBinViewArray) -> usize {
62        array.views_handle().len() / size_of::<BinaryView>()
63    }
64
65    fn dtype(array: &VarBinViewArray) -> &DType {
66        &array.dtype
67    }
68
69    fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
70        array.stats_set.to_ref(array.as_ref())
71    }
72
73    fn array_hash<H: std::hash::Hasher>(
74        array: &VarBinViewArray,
75        state: &mut H,
76        precision: Precision,
77    ) {
78        array.dtype.hash(state);
79        for buffer in array.buffers.iter() {
80            buffer.array_hash(state, precision);
81        }
82        array.views.array_hash(state, precision);
83        array.validity.array_hash(state, precision);
84    }
85
86    fn array_eq(array: &VarBinViewArray, other: &VarBinViewArray, precision: Precision) -> bool {
87        array.dtype == other.dtype
88            && array.buffers.len() == other.buffers.len()
89            && array
90                .buffers
91                .iter()
92                .zip(other.buffers.iter())
93                .all(|(a, b)| a.array_eq(b, precision))
94            && array.views.array_eq(&other.views, precision)
95            && array.validity.array_eq(&other.validity, precision)
96    }
97
98    fn nbuffers(array: &VarBinViewArray) -> usize {
99        array.buffers().len() + 1
100    }
101
102    fn buffer(array: &VarBinViewArray, idx: usize) -> BufferHandle {
103        let ndata = array.buffers().len();
104        if idx < ndata {
105            array.buffers()[idx].clone()
106        } else if idx == ndata {
107            array.views_handle().clone()
108        } else {
109            vortex_panic!("VarBinViewArray buffer index {idx} out of bounds")
110        }
111    }
112
113    fn buffer_name(array: &VarBinViewArray, idx: usize) -> Option<String> {
114        let ndata = array.buffers().len();
115        if idx < ndata {
116            Some(format!("buffer_{idx}"))
117        } else if idx == ndata {
118            Some("views".to_string())
119        } else {
120            vortex_panic!("VarBinViewArray buffer_name index {idx} out of bounds")
121        }
122    }
123
124    fn nchildren(array: &VarBinViewArray) -> usize {
125        validity_nchildren(&array.validity)
126    }
127
128    fn child(array: &VarBinViewArray, idx: usize) -> ArrayRef {
129        match idx {
130            0 => validity_to_child(&array.validity, array.len())
131                .vortex_expect("VarBinViewArray validity child out of bounds"),
132            _ => vortex_panic!("VarBinViewArray child index {idx} out of bounds"),
133        }
134    }
135
136    fn child_name(_array: &VarBinViewArray, idx: usize) -> String {
137        match idx {
138            0 => "validity".to_string(),
139            _ => vortex_panic!("VarBinViewArray child_name index {idx} out of bounds"),
140        }
141    }
142
143    fn metadata(_array: &VarBinViewArray) -> VortexResult<Self::Metadata> {
144        Ok(EmptyMetadata)
145    }
146
147    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
148        Ok(Some(vec![]))
149    }
150
151    fn deserialize(
152        _bytes: &[u8],
153        _dtype: &DType,
154        _len: usize,
155        _buffers: &[BufferHandle],
156        _session: &VortexSession,
157    ) -> VortexResult<Self::Metadata> {
158        Ok(EmptyMetadata)
159    }
160
161    fn build(
162        dtype: &DType,
163        len: usize,
164        _metadata: &Self::Metadata,
165        buffers: &[BufferHandle],
166        children: &dyn ArrayChildren,
167    ) -> VortexResult<VarBinViewArray> {
168        let Some((views_handle, data_handles)) = buffers.split_last() else {
169            vortex_bail!("Expected at least 1 buffer, got 0");
170        };
171
172        let validity = if children.is_empty() {
173            Validity::from(dtype.nullability())
174        } else if children.len() == 1 {
175            let validity = children.get(0, &Validity::DTYPE, len)?;
176            Validity::Array(validity)
177        } else {
178            vortex_bail!("Expected 0 or 1 children, got {}", children.len());
179        };
180
181        let views_nbytes = views_handle.len();
182        let expected_views_nbytes = len
183            .checked_mul(size_of::<BinaryView>())
184            .ok_or_else(|| vortex_err!("views byte length overflow for len={len}"))?;
185        if views_nbytes != expected_views_nbytes {
186            vortex_bail!(
187                "Expected views buffer length {} bytes, got {} bytes",
188                expected_views_nbytes,
189                views_nbytes
190            );
191        }
192
193        // If any buffer is on device, skip host validation and use try_new_handle.
194        if buffers.iter().any(|b| b.is_on_device()) {
195            return VarBinViewArray::try_new_handle(
196                views_handle.clone(),
197                Arc::from(data_handles.to_vec()),
198                dtype.clone(),
199                validity,
200            );
201        }
202
203        let data_buffers = data_handles
204            .iter()
205            .map(|b| b.as_host().clone())
206            .collect::<Vec<_>>();
207        let views = Buffer::<BinaryView>::from_byte_buffer(views_handle.clone().as_host().clone());
208
209        VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity)
210    }
211
212    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
213        match children.len() {
214            0 => {}
215            1 => {
216                let [validity]: [ArrayRef; 1] = children
217                    .try_into()
218                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
219                array.validity = Validity::Array(validity);
220            }
221            _ => vortex_bail!(
222                "VarBinViewArray expects 0 or 1 children (validity?), got {}",
223                children.len()
224            ),
225        }
226        Ok(())
227    }
228
229    fn reduce_parent(
230        array: &Self::Array,
231        parent: &ArrayRef,
232        child_idx: usize,
233    ) -> VortexResult<Option<ArrayRef>> {
234        PARENT_RULES.evaluate(array, parent, child_idx)
235    }
236
237    fn execute_parent(
238        array: &Self::Array,
239        parent: &ArrayRef,
240        child_idx: usize,
241        ctx: &mut ExecutionCtx,
242    ) -> VortexResult<Option<ArrayRef>> {
243        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
244    }
245
246    fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionStep> {
247        Ok(ExecutionStep::Done(array.clone().into_array()))
248    }
249}