vortex_array/arrays/chunked/vtable/
serde.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_buffer::ByteBuffer;
6use vortex_dtype::{DType, Nullability, PType};
7use vortex_error::{VortexResult, vortex_bail, vortex_err};
8
9use crate::arrays::{ChunkedArray, ChunkedEncoding, ChunkedVTable};
10use crate::serde::ArrayChildren;
11use crate::vtable::SerdeVTable;
12use crate::{EmptyMetadata, ToCanonical};
13
14impl SerdeVTable<ChunkedVTable> for ChunkedVTable {
15    type Metadata = EmptyMetadata;
16
17    fn metadata(_array: &ChunkedArray) -> VortexResult<Option<Self::Metadata>> {
18        Ok(Some(EmptyMetadata))
19    }
20
21    fn build(
22        _encoding: &ChunkedEncoding,
23        dtype: &DType,
24        _len: usize,
25        _metadata: &Self::Metadata,
26        _buffers: &[ByteBuffer],
27        children: &dyn ArrayChildren,
28    ) -> VortexResult<ChunkedArray> {
29        if children.is_empty() {
30            vortex_bail!("Chunked array needs at least one child");
31        }
32
33        let nchunks = children.len() - 1;
34
35        // The first child contains the row offsets of the chunks
36        let chunk_offsets = children
37            .get(
38                0,
39                &DType::Primitive(PType::U64, Nullability::NonNullable),
40                // 1 extra offset for the end of the last chunk
41                nchunks + 1,
42            )?
43            .to_primitive()
44            .buffer::<u64>();
45
46        // The remaining children contain the actual data of the chunks
47        let chunks = chunk_offsets
48            .iter()
49            .tuple_windows()
50            .enumerate()
51            .map(|(idx, (start, end))| {
52                let chunk_len = usize::try_from(end - start)
53                    .map_err(|_| vortex_err!("chunk_len {} exceeds usize range", end - start))?;
54                children.get(idx + 1, dtype, chunk_len)
55            })
56            .try_collect()?;
57
58        // SAFETY: All chunks are deserialized with the same dtype that was serialized.
59        // Each chunk was validated during deserialization to match the expected dtype.
60        unsafe { Ok(ChunkedArray::new_unchecked(chunks, dtype.clone())) }
61    }
62}