vortex_array/arrays/chunked/
serde.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_buffer::ByteBuffer;
6use vortex_dtype::{DType, Nullability, PType};
7use vortex_error::{VortexExpect, VortexResult, vortex_bail};
8
9use super::ChunkedEncoding;
10use crate::arrays::{ChunkedArray, ChunkedVTable, PrimitiveArray};
11use crate::serde::ArrayChildren;
12use crate::validity::Validity;
13use crate::vtable::{SerdeVTable, VisitorVTable};
14use crate::{ArrayBufferVisitor, ArrayChildVisitor, EmptyMetadata, ToCanonical};
15
16impl SerdeVTable<ChunkedVTable> for ChunkedVTable {
17    type Metadata = EmptyMetadata;
18
19    fn metadata(_array: &ChunkedArray) -> VortexResult<Option<Self::Metadata>> {
20        Ok(Some(EmptyMetadata))
21    }
22
23    fn build(
24        _encoding: &ChunkedEncoding,
25        dtype: &DType,
26        _len: usize,
27        _metadata: &Self::Metadata,
28        _buffers: &[ByteBuffer],
29        children: &dyn ArrayChildren,
30    ) -> VortexResult<ChunkedArray> {
31        if children.is_empty() {
32            vortex_bail!("Chunked array needs at least one child");
33        }
34
35        let nchunks = children.len() - 1;
36
37        // The first child contains the row offsets of the chunks
38        let chunk_offsets = children
39            .get(
40                0,
41                &DType::Primitive(PType::U64, Nullability::NonNullable),
42                // 1 extra offset for the end of the last chunk
43                nchunks + 1,
44            )?
45            .to_primitive()?
46            .buffer::<u64>();
47
48        // The remaining children contain the actual data of the chunks
49        let chunks = chunk_offsets
50            .iter()
51            .tuple_windows()
52            .enumerate()
53            .map(|(idx, (start, end))| {
54                let chunk_len =
55                    usize::try_from(end - start).vortex_expect("chunk length exceeds usize");
56                children.get(idx + 1, dtype, chunk_len)
57            })
58            .try_collect()?;
59
60        // Unchecked because we just created each chunk with the same DType.
61        Ok(ChunkedArray::new_unchecked(chunks, dtype.clone()))
62    }
63}
64
65impl VisitorVTable<ChunkedVTable> for ChunkedVTable {
66    fn visit_buffers(_array: &ChunkedArray, _visitor: &mut dyn ArrayBufferVisitor) {}
67
68    fn visit_children(array: &ChunkedArray, visitor: &mut dyn ArrayChildVisitor) {
69        let chunk_offsets =
70            PrimitiveArray::new(array.chunk_offsets().clone(), Validity::NonNullable);
71        visitor.visit_child("chunk_offsets", chunk_offsets.as_ref());
72
73        for (idx, chunk) in array.chunks().iter().enumerate() {
74            visitor.visit_child(format!("chunks[{idx}]").as_str(), chunk);
75        }
76    }
77}