vortex_array/arrays/chunked/
serde.rs

1use itertools::Itertools;
2use vortex_dtype::{DType, Nullability, PType};
3use vortex_error::{VortexExpect, VortexResult, vortex_bail};
4
5use super::ChunkedEncoding;
6use crate::arrays::{ChunkedArray, PrimitiveArray};
7use crate::serde::ArrayParts;
8use crate::validity::Validity;
9use crate::vtable::EncodingVTable;
10use crate::{
11    Array, ArrayChildVisitor, ArrayContext, ArrayRef, ArrayVisitorImpl, EmptyMetadata, EncodingId,
12    ToCanonical,
13};
14
15impl EncodingVTable for ChunkedEncoding {
16    fn id(&self) -> EncodingId {
17        EncodingId::new_ref("vortex.chunked")
18    }
19
20    fn decode(
21        &self,
22        parts: &ArrayParts,
23        ctx: &ArrayContext,
24        dtype: DType,
25        // TODO(ngates): should we avoid storing the final chunk offset and push the length instead?
26        _len: usize,
27    ) -> VortexResult<ArrayRef> {
28        if parts.nchildren() == 0 {
29            vortex_bail!("Chunked array needs at least one child");
30        }
31
32        let nchunks = parts.nchildren() - 1;
33
34        // The first child contains the row offsets of the chunks
35        let chunk_offsets = parts
36            .child(0)
37            .decode(
38                ctx,
39                DType::Primitive(PType::U64, Nullability::NonNullable),
40                // 1 extra offset for the end of the last chunk
41                nchunks + 1,
42            )?
43            .to_primitive()?
44            .buffer::<u64>();
45
46        // The remaining children contain the actual data of the chunks
47        let chunks = chunk_offsets
48            .iter()
49            .tuple_windows()
50            .enumerate()
51            .map(|(idx, (start, end))| {
52                let chunk_len =
53                    usize::try_from(end - start).vortex_expect("chunk length exceeds usize");
54                parts.child(idx + 1).decode(ctx, dtype.clone(), chunk_len)
55            })
56            .try_collect()?;
57
58        // Unchecked because we just created each chunk with the same DType.
59        Ok(ChunkedArray::new_unchecked(chunks, dtype).into_array())
60    }
61}
62
63impl ArrayVisitorImpl for ChunkedArray {
64    fn _visit_children(&self, visitor: &mut dyn ArrayChildVisitor) {
65        let chunk_offsets = PrimitiveArray::new(self.chunk_offsets.clone(), Validity::NonNullable);
66        visitor.visit_child("chunk_offsets", &chunk_offsets);
67
68        for (idx, chunk) in self.chunks().iter().enumerate() {
69            visitor.visit_child(format!("chunks[{}]", idx).as_str(), chunk);
70        }
71    }
72
73    fn _metadata(&self) -> EmptyMetadata {
74        EmptyMetadata
75    }
76}