vortex_array/arrays/chunked/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_buffer::BufferHandle;
6use vortex_dtype::DType;
7use vortex_dtype::Nullability;
8use vortex_dtype::PType;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_err;
12use vortex_vector::Vector;
13use vortex_vector::VectorMut;
14use vortex_vector::VectorMutOps;
15
16use crate::EmptyMetadata;
17use crate::ToCanonical;
18use crate::arrays::ChunkedArray;
19use crate::arrays::PrimitiveArray;
20use crate::execution::ExecutionCtx;
21use crate::serde::ArrayChildren;
22use crate::validity::Validity;
23use crate::vtable;
24use crate::vtable::ArrayId;
25use crate::vtable::ArrayVTable;
26use crate::vtable::ArrayVTableExt;
27use crate::vtable::NotSupported;
28use crate::vtable::VTable;
29
30mod array;
31mod canonical;
32mod compute;
33mod operations;
34mod validity;
35mod visitor;
36
37vtable!(Chunked);
38
39impl VTable for ChunkedVTable {
40    type Array = ChunkedArray;
41
42    type Metadata = EmptyMetadata;
43
44    type ArrayVTable = Self;
45    type CanonicalVTable = Self;
46    type OperationsVTable = Self;
47    type ValidityVTable = Self;
48    type VisitorVTable = Self;
49    type ComputeVTable = Self;
50    type EncodeVTable = NotSupported;
51
52    fn id(&self) -> ArrayId {
53        ArrayId::new_ref("vortex.chunked")
54    }
55
56    fn encoding(_array: &Self::Array) -> ArrayVTable {
57        ChunkedVTable.as_vtable()
58    }
59
60    fn metadata(_array: &ChunkedArray) -> VortexResult<Self::Metadata> {
61        Ok(EmptyMetadata)
62    }
63
64    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
65        Ok(Some(vec![]))
66    }
67
68    fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
69        Ok(EmptyMetadata)
70    }
71
72    fn build(
73        &self,
74        dtype: &DType,
75        _len: usize,
76        _metadata: &Self::Metadata,
77        _buffers: &[BufferHandle],
78        children: &dyn ArrayChildren,
79    ) -> VortexResult<ChunkedArray> {
80        if children.is_empty() {
81            vortex_bail!("Chunked array needs at least one child");
82        }
83
84        let nchunks = children.len() - 1;
85
86        // The first child contains the row offsets of the chunks
87        let chunk_offsets_array = children
88            .get(
89                0,
90                &DType::Primitive(PType::U64, Nullability::NonNullable),
91                // 1 extra offset for the end of the last chunk
92                nchunks + 1,
93            )?
94            .to_primitive();
95
96        let chunk_offsets_buf = chunk_offsets_array.buffer::<u64>();
97
98        // The remaining children contain the actual data of the chunks
99        let chunks = chunk_offsets_buf
100            .iter()
101            .tuple_windows()
102            .enumerate()
103            .map(|(idx, (start, end))| {
104                let chunk_len = usize::try_from(end - start)
105                    .map_err(|_| vortex_err!("chunk_len {} exceeds usize range", end - start))?;
106                children.get(idx + 1, dtype, chunk_len)
107            })
108            .try_collect()?;
109
110        let chunk_offsets = PrimitiveArray::new(chunk_offsets_buf.clone(), Validity::NonNullable);
111
112        let total_len = chunk_offsets_buf
113            .last()
114            .ok_or_else(|| vortex_err!("chunk_offsets must not be empty"))?;
115        let len = usize::try_from(*total_len)
116            .map_err(|_| vortex_err!("total length {} exceeds usize range", total_len))?;
117
118        // Construct directly using the struct fields to avoid recomputing chunk_offsets
119        Ok(ChunkedArray {
120            dtype: dtype.clone(),
121            len,
122            chunk_offsets,
123            chunks,
124            stats_set: Default::default(),
125        })
126    }
127
128    fn batch_execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
129        let mut vector = VectorMut::with_capacity(array.dtype(), 0);
130        for chunk in array.chunks() {
131            let chunk_vector = chunk.batch_execute(ctx)?;
132            vector.extend_from_vector(&chunk_vector);
133        }
134        Ok(vector.freeze())
135    }
136}
137
138#[derive(Debug)]
139pub struct ChunkedVTable;