Skip to main content

vortex_array/arrays/varbinview/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::mem::size_of;
6use std::sync::Arc;
7
8use kernel::PARENT_KERNELS;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_err;
14use vortex_error::vortex_panic;
15use vortex_session::VortexSession;
16
17use crate::ArrayRef;
18use crate::EmptyMetadata;
19use crate::ExecutionCtx;
20use crate::ExecutionResult;
21use crate::Precision;
22use crate::arrays::VarBinViewArray;
23use crate::arrays::varbinview::BinaryView;
24use crate::arrays::varbinview::compute::rules::PARENT_RULES;
25use crate::buffer::BufferHandle;
26use crate::dtype::DType;
27use crate::hash::ArrayEq;
28use crate::hash::ArrayHash;
29use crate::serde::ArrayChildren;
30use crate::stats::StatsSetRef;
31use crate::validity::Validity;
32use crate::vtable;
33use crate::vtable::Array;
34use crate::vtable::ArrayId;
35use crate::vtable::VTable;
36use crate::vtable::ValidityVTableFromValidityHelper;
37use crate::vtable::validity_nchildren;
38use crate::vtable::validity_to_child;
39mod kernel;
40mod operations;
41mod validity;
42vtable!(VarBinView);
43
44#[derive(Clone, Debug)]
45pub struct VarBinView;
46
47impl VarBinView {
48    pub const ID: ArrayId = ArrayId::new_ref("vortex.varbinview");
49}
50
51impl VTable for VarBinView {
52    type Array = VarBinViewArray;
53
54    type Metadata = EmptyMetadata;
55    type OperationsVTable = Self;
56    type ValidityVTable = ValidityVTableFromValidityHelper;
57    fn vtable(_array: &Self::Array) -> &Self {
58        &VarBinView
59    }
60
61    fn id(&self) -> ArrayId {
62        Self::ID
63    }
64
65    fn len(array: &VarBinViewArray) -> usize {
66        array.views_handle().len() / size_of::<BinaryView>()
67    }
68
69    fn dtype(array: &VarBinViewArray) -> &DType {
70        &array.dtype
71    }
72
73    fn stats(array: &VarBinViewArray) -> StatsSetRef<'_> {
74        array.stats_set.to_ref(array.as_ref())
75    }
76
77    fn array_hash<H: std::hash::Hasher>(
78        array: &VarBinViewArray,
79        state: &mut H,
80        precision: Precision,
81    ) {
82        array.dtype.hash(state);
83        for buffer in array.buffers.iter() {
84            buffer.array_hash(state, precision);
85        }
86        array.views.array_hash(state, precision);
87        array.validity.array_hash(state, precision);
88    }
89
90    fn array_eq(array: &VarBinViewArray, other: &VarBinViewArray, precision: Precision) -> bool {
91        array.dtype == other.dtype
92            && array.buffers.len() == other.buffers.len()
93            && array
94                .buffers
95                .iter()
96                .zip(other.buffers.iter())
97                .all(|(a, b)| a.array_eq(b, precision))
98            && array.views.array_eq(&other.views, precision)
99            && array.validity.array_eq(&other.validity, precision)
100    }
101
102    fn nbuffers(array: &VarBinViewArray) -> usize {
103        array.buffers().len() + 1
104    }
105
106    fn buffer(array: &VarBinViewArray, idx: usize) -> BufferHandle {
107        let ndata = array.buffers().len();
108        if idx < ndata {
109            array.buffers()[idx].clone()
110        } else if idx == ndata {
111            array.views_handle().clone()
112        } else {
113            vortex_panic!("VarBinViewArray buffer index {idx} out of bounds")
114        }
115    }
116
117    fn buffer_name(array: &VarBinViewArray, idx: usize) -> Option<String> {
118        let ndata = array.buffers().len();
119        if idx < ndata {
120            Some(format!("buffer_{idx}"))
121        } else if idx == ndata {
122            Some("views".to_string())
123        } else {
124            vortex_panic!("VarBinViewArray buffer_name index {idx} out of bounds")
125        }
126    }
127
128    fn nchildren(array: &VarBinViewArray) -> usize {
129        validity_nchildren(&array.validity)
130    }
131
132    fn child(array: &VarBinViewArray, idx: usize) -> ArrayRef {
133        match idx {
134            0 => validity_to_child(&array.validity, array.len())
135                .vortex_expect("VarBinViewArray validity child out of bounds"),
136            _ => vortex_panic!("VarBinViewArray child index {idx} out of bounds"),
137        }
138    }
139
140    fn child_name(_array: &VarBinViewArray, idx: usize) -> String {
141        match idx {
142            0 => "validity".to_string(),
143            _ => vortex_panic!("VarBinViewArray child_name index {idx} out of bounds"),
144        }
145    }
146
147    fn metadata(_array: &VarBinViewArray) -> VortexResult<Self::Metadata> {
148        Ok(EmptyMetadata)
149    }
150
151    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
152        Ok(Some(vec![]))
153    }
154
155    fn deserialize(
156        _bytes: &[u8],
157        _dtype: &DType,
158        _len: usize,
159        _buffers: &[BufferHandle],
160        _session: &VortexSession,
161    ) -> VortexResult<Self::Metadata> {
162        Ok(EmptyMetadata)
163    }
164
165    fn build(
166        dtype: &DType,
167        len: usize,
168        _metadata: &Self::Metadata,
169        buffers: &[BufferHandle],
170        children: &dyn ArrayChildren,
171    ) -> VortexResult<VarBinViewArray> {
172        let Some((views_handle, data_handles)) = buffers.split_last() else {
173            vortex_bail!("Expected at least 1 buffer, got 0");
174        };
175
176        let validity = if children.is_empty() {
177            Validity::from(dtype.nullability())
178        } else if children.len() == 1 {
179            let validity = children.get(0, &Validity::DTYPE, len)?;
180            Validity::Array(validity)
181        } else {
182            vortex_bail!("Expected 0 or 1 children, got {}", children.len());
183        };
184
185        let views_nbytes = views_handle.len();
186        let expected_views_nbytes = len
187            .checked_mul(size_of::<BinaryView>())
188            .ok_or_else(|| vortex_err!("views byte length overflow for len={len}"))?;
189        if views_nbytes != expected_views_nbytes {
190            vortex_bail!(
191                "Expected views buffer length {} bytes, got {} bytes",
192                expected_views_nbytes,
193                views_nbytes
194            );
195        }
196
197        // If any buffer is on device, skip host validation and use try_new_handle.
198        if buffers.iter().any(|b| b.is_on_device()) {
199            return VarBinViewArray::try_new_handle(
200                views_handle.clone(),
201                Arc::from(data_handles.to_vec()),
202                dtype.clone(),
203                validity,
204            );
205        }
206
207        let data_buffers = data_handles
208            .iter()
209            .map(|b| b.as_host().clone())
210            .collect::<Vec<_>>();
211        let views = Buffer::<BinaryView>::from_byte_buffer(views_handle.clone().as_host().clone());
212
213        VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity)
214    }
215
216    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
217        match children.len() {
218            0 => {}
219            1 => {
220                let [validity]: [ArrayRef; 1] = children
221                    .try_into()
222                    .map_err(|_| vortex_err!("Failed to convert children to array"))?;
223                array.validity = Validity::Array(validity);
224            }
225            _ => vortex_bail!(
226                "VarBinViewArray expects 0 or 1 children (validity?), got {}",
227                children.len()
228            ),
229        }
230        Ok(())
231    }
232
233    fn reduce_parent(
234        array: &Array<Self>,
235        parent: &ArrayRef,
236        child_idx: usize,
237    ) -> VortexResult<Option<ArrayRef>> {
238        PARENT_RULES.evaluate(array, parent, child_idx)
239    }
240
241    fn execute_parent(
242        array: &Array<Self>,
243        parent: &ArrayRef,
244        child_idx: usize,
245        ctx: &mut ExecutionCtx,
246    ) -> VortexResult<Option<ArrayRef>> {
247        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
248    }
249
250    fn execute(array: Arc<Array<Self>>, _ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
251        Ok(ExecutionResult::done(array))
252    }
253}