vortex_array/arrays/chunked/
serde.rs

1use itertools::Itertools;
2use vortex_buffer::ByteBuffer;
3use vortex_dtype::{DType, Nullability, PType};
4use vortex_error::{VortexExpect, VortexResult, vortex_bail};
5
6use super::ChunkedEncoding;
7use crate::arrays::{ChunkedArray, ChunkedVTable, PrimitiveArray};
8use crate::serde::ArrayChildren;
9use crate::validity::Validity;
10use crate::vtable::{SerdeVTable, VisitorVTable};
11use crate::{ArrayBufferVisitor, ArrayChildVisitor, EmptyMetadata, ToCanonical};
12
13impl SerdeVTable<ChunkedVTable> for ChunkedVTable {
14    type Metadata = EmptyMetadata;
15
16    fn metadata(_array: &ChunkedArray) -> VortexResult<Option<Self::Metadata>> {
17        Ok(Some(EmptyMetadata))
18    }
19
20    fn build(
21        _encoding: &ChunkedEncoding,
22        dtype: &DType,
23        _len: usize,
24        _metadata: &Self::Metadata,
25        _buffers: &[ByteBuffer],
26        children: &dyn ArrayChildren,
27    ) -> VortexResult<ChunkedArray> {
28        if children.is_empty() {
29            vortex_bail!("Chunked array needs at least one child");
30        }
31
32        let nchunks = children.len() - 1;
33
34        // The first child contains the row offsets of the chunks
35        let chunk_offsets = children
36            .get(
37                0,
38                &DType::Primitive(PType::U64, Nullability::NonNullable),
39                // 1 extra offset for the end of the last chunk
40                nchunks + 1,
41            )?
42            .to_primitive()?
43            .buffer::<u64>();
44
45        // The remaining children contain the actual data of the chunks
46        let chunks = chunk_offsets
47            .iter()
48            .tuple_windows()
49            .enumerate()
50            .map(|(idx, (start, end))| {
51                let chunk_len =
52                    usize::try_from(end - start).vortex_expect("chunk length exceeds usize");
53                children.get(idx + 1, dtype, chunk_len)
54            })
55            .try_collect()?;
56
57        // Unchecked because we just created each chunk with the same DType.
58        Ok(ChunkedArray::new_unchecked(chunks, dtype.clone()))
59    }
60}
61
62impl VisitorVTable<ChunkedVTable> for ChunkedVTable {
63    fn visit_buffers(_array: &ChunkedArray, _visitor: &mut dyn ArrayBufferVisitor) {}
64
65    fn visit_children(array: &ChunkedArray, visitor: &mut dyn ArrayChildVisitor) {
66        let chunk_offsets =
67            PrimitiveArray::new(array.chunk_offsets().clone(), Validity::NonNullable);
68        visitor.visit_child("chunk_offsets", chunk_offsets.as_ref());
69
70        for (idx, chunk) in array.chunks().iter().enumerate() {
71            visitor.visit_child(format!("chunks[{idx}]").as_str(), chunk);
72        }
73    }
74}