vortex_array/arrays/chunked/vtable/
mod.rs1use itertools::Itertools;
5use vortex_buffer::BufferHandle;
6use vortex_dtype::DType;
7use vortex_dtype::Nullability;
8use vortex_dtype::PType;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_err;
12use vortex_vector::Vector;
13use vortex_vector::VectorMut;
14use vortex_vector::VectorMutOps;
15
16use crate::EmptyMetadata;
17use crate::ToCanonical;
18use crate::arrays::ChunkedArray;
19use crate::arrays::PrimitiveArray;
20use crate::execution::ExecutionCtx;
21use crate::serde::ArrayChildren;
22use crate::validity::Validity;
23use crate::vtable;
24use crate::vtable::ArrayId;
25use crate::vtable::ArrayVTable;
26use crate::vtable::ArrayVTableExt;
27use crate::vtable::NotSupported;
28use crate::vtable::VTable;
29
30mod array;
31mod canonical;
32mod compute;
33mod operations;
34mod validity;
35mod visitor;
36
37vtable!(Chunked);
38
39impl VTable for ChunkedVTable {
40 type Array = ChunkedArray;
41
42 type Metadata = EmptyMetadata;
43
44 type ArrayVTable = Self;
45 type CanonicalVTable = Self;
46 type OperationsVTable = Self;
47 type ValidityVTable = Self;
48 type VisitorVTable = Self;
49 type ComputeVTable = Self;
50 type EncodeVTable = NotSupported;
51
52 fn id(&self) -> ArrayId {
53 ArrayId::new_ref("vortex.chunked")
54 }
55
56 fn encoding(_array: &Self::Array) -> ArrayVTable {
57 ChunkedVTable.as_vtable()
58 }
59
60 fn metadata(_array: &ChunkedArray) -> VortexResult<Self::Metadata> {
61 Ok(EmptyMetadata)
62 }
63
64 fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
65 Ok(Some(vec![]))
66 }
67
68 fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
69 Ok(EmptyMetadata)
70 }
71
72 fn build(
73 &self,
74 dtype: &DType,
75 _len: usize,
76 _metadata: &Self::Metadata,
77 _buffers: &[BufferHandle],
78 children: &dyn ArrayChildren,
79 ) -> VortexResult<ChunkedArray> {
80 if children.is_empty() {
81 vortex_bail!("Chunked array needs at least one child");
82 }
83
84 let nchunks = children.len() - 1;
85
86 let chunk_offsets_array = children
88 .get(
89 0,
90 &DType::Primitive(PType::U64, Nullability::NonNullable),
91 nchunks + 1,
93 )?
94 .to_primitive();
95
96 let chunk_offsets_buf = chunk_offsets_array.buffer::<u64>();
97
98 let chunks = chunk_offsets_buf
100 .iter()
101 .tuple_windows()
102 .enumerate()
103 .map(|(idx, (start, end))| {
104 let chunk_len = usize::try_from(end - start)
105 .map_err(|_| vortex_err!("chunk_len {} exceeds usize range", end - start))?;
106 children.get(idx + 1, dtype, chunk_len)
107 })
108 .try_collect()?;
109
110 let chunk_offsets = PrimitiveArray::new(chunk_offsets_buf.clone(), Validity::NonNullable);
111
112 let total_len = chunk_offsets_buf
113 .last()
114 .ok_or_else(|| vortex_err!("chunk_offsets must not be empty"))?;
115 let len = usize::try_from(*total_len)
116 .map_err(|_| vortex_err!("total length {} exceeds usize range", total_len))?;
117
118 Ok(ChunkedArray {
120 dtype: dtype.clone(),
121 len,
122 chunk_offsets,
123 chunks,
124 stats_set: Default::default(),
125 })
126 }
127
128 fn batch_execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
129 let mut vector = VectorMut::with_capacity(array.dtype(), 0);
130 for chunk in array.chunks() {
131 let chunk_vector = chunk.batch_execute(ctx)?;
132 vector.extend_from_vector(&chunk_vector);
133 }
134 Ok(vector.freeze())
135 }
136}
137
138#[derive(Debug)]
139pub struct ChunkedVTable;