vortex_array/arrays/chunked/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability;
7use vortex_dtype::PType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12
13use crate::ArrayRef;
14use crate::Canonical;
15use crate::EmptyMetadata;
16use crate::IntoArray;
17use crate::ToCanonical;
18use crate::arrays::ChunkedArray;
19use crate::arrays::PrimitiveArray;
20use crate::arrays::chunked::vtable::rules::PARENT_RULES;
21use crate::buffer::BufferHandle;
22use crate::serde::ArrayChildren;
23use crate::validity::Validity;
24use crate::vtable;
25use crate::vtable::ArrayId;
26use crate::vtable::ArrayVTable;
27use crate::vtable::ArrayVTableExt;
28use crate::vtable::NotSupported;
29use crate::vtable::VTable;
30
31mod array;
32mod canonical;
33mod compute;
34mod operations;
35mod rules;
36mod validity;
37mod visitor;
38
39vtable!(Chunked);
40
41#[derive(Debug)]
42pub struct ChunkedVTable;
43
44impl VTable for ChunkedVTable {
45    type Array = ChunkedArray;
46
47    type Metadata = EmptyMetadata;
48
49    type ArrayVTable = Self;
50    type CanonicalVTable = Self;
51    type OperationsVTable = Self;
52    type ValidityVTable = Self;
53    type VisitorVTable = Self;
54    type ComputeVTable = Self;
55    type EncodeVTable = NotSupported;
56
57    fn id(&self) -> ArrayId {
58        ArrayId::new_ref("vortex.chunked")
59    }
60
61    fn encoding(_array: &Self::Array) -> ArrayVTable {
62        ChunkedVTable.as_vtable()
63    }
64
65    fn metadata(_array: &ChunkedArray) -> VortexResult<Self::Metadata> {
66        Ok(EmptyMetadata)
67    }
68
69    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
70        Ok(Some(vec![]))
71    }
72
73    fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
74        Ok(EmptyMetadata)
75    }
76
77    fn build(
78        &self,
79        dtype: &DType,
80        _len: usize,
81        _metadata: &Self::Metadata,
82        _buffers: &[BufferHandle],
83        children: &dyn ArrayChildren,
84    ) -> VortexResult<ChunkedArray> {
85        if children.is_empty() {
86            vortex_bail!("Chunked array needs at least one child");
87        }
88
89        let nchunks = children.len() - 1;
90
91        // The first child contains the row offsets of the chunks
92        let chunk_offsets_array = children
93            .get(
94                0,
95                &DType::Primitive(PType::U64, Nullability::NonNullable),
96                // 1 extra offset for the end of the last chunk
97                nchunks + 1,
98            )?
99            .to_primitive();
100
101        let chunk_offsets_buf = chunk_offsets_array.buffer::<u64>();
102
103        // The remaining children contain the actual data of the chunks
104        let chunks = chunk_offsets_buf
105            .iter()
106            .tuple_windows()
107            .enumerate()
108            .map(|(idx, (start, end))| {
109                let chunk_len = usize::try_from(end - start)
110                    .map_err(|_| vortex_err!("chunk_len {} exceeds usize range", end - start))?;
111                children.get(idx + 1, dtype, chunk_len)
112            })
113            .try_collect()?;
114
115        let chunk_offsets = PrimitiveArray::new(chunk_offsets_buf.clone(), Validity::NonNullable);
116
117        let total_len = chunk_offsets_buf
118            .last()
119            .ok_or_else(|| vortex_err!("chunk_offsets must not be empty"))?;
120        let len = usize::try_from(*total_len)
121            .map_err(|_| vortex_err!("total length {} exceeds usize range", total_len))?;
122
123        // Construct directly using the struct fields to avoid recomputing chunk_offsets
124        Ok(ChunkedArray {
125            dtype: dtype.clone(),
126            len,
127            chunk_offsets,
128            chunks,
129            stats_set: Default::default(),
130        })
131    }
132
133    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
134        // Children: chunk_offsets, then chunks...
135        vortex_ensure!(
136            !children.is_empty(),
137            "Chunked array needs at least one child"
138        );
139
140        let nchunks = children.len() - 1;
141        let chunk_offsets_array = children[0].to_primitive();
142        let chunk_offsets_buf = chunk_offsets_array.buffer::<u64>();
143
144        vortex_ensure!(
145            chunk_offsets_buf.len() == nchunks + 1,
146            "Expected {} chunk offsets, found {}",
147            nchunks + 1,
148            chunk_offsets_buf.len()
149        );
150
151        let chunks = children.into_iter().skip(1).collect();
152        array.chunk_offsets = PrimitiveArray::new(chunk_offsets_buf.clone(), Validity::NonNullable);
153        array.chunks = chunks;
154
155        let total_len = chunk_offsets_buf
156            .last()
157            .ok_or_else(|| vortex_err!("chunk_offsets must not be empty"))?;
158        array.len = usize::try_from(*total_len)
159            .map_err(|_| vortex_err!("total length {} exceeds usize range", total_len))?;
160
161        Ok(())
162    }
163
164    fn reduce(array: &Self::Array) -> VortexResult<Option<ArrayRef>> {
165        Ok(match array.chunks.len() {
166            0 => Some(Canonical::empty(array.dtype()).into_array()),
167            1 => Some(array.chunks[0].clone()),
168            _ => None,
169        })
170    }
171
172    fn reduce_parent(
173        array: &Self::Array,
174        parent: &ArrayRef,
175        child_idx: usize,
176    ) -> VortexResult<Option<ArrayRef>> {
177        PARENT_RULES.evaluate(array, parent, child_idx)
178    }
179}