vortex_array/arrays/chunked/vtable/
mod.rs1use std::hash::Hasher;
5
6use itertools::Itertools;
7use smallvec::SmallVec;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_ensure;
12use vortex_error::vortex_err;
13use vortex_error::vortex_panic;
14use vortex_session::VortexSession;
15use vortex_session::registry::CachedId;
16
17use crate::ArrayEq;
18use crate::ArrayHash;
19use crate::ArrayRef;
20use crate::Canonical;
21use crate::ExecutionCtx;
22use crate::ExecutionResult;
23use crate::IntoArray;
24use crate::Precision;
25#[expect(deprecated)]
26use crate::ToCanonical as _;
27use crate::array::Array;
28use crate::array::ArrayId;
29use crate::array::ArrayParts;
30use crate::array::ArrayView;
31use crate::array::VTable;
32use crate::arrays::chunked::ChunkedArrayExt;
33use crate::arrays::chunked::ChunkedData;
34use crate::arrays::chunked::array::CHUNK_OFFSETS_SLOT;
35use crate::arrays::chunked::array::CHUNKS_OFFSET;
36use crate::arrays::chunked::compute::kernel::PARENT_KERNELS;
37use crate::arrays::chunked::compute::rules::PARENT_RULES;
38use crate::arrays::chunked::vtable::canonical::_canonicalize;
39use crate::buffer::BufferHandle;
40use crate::builders::ArrayBuilder;
41use crate::dtype::DType;
42use crate::dtype::Nullability;
43use crate::dtype::PType;
44use crate::serde::ArrayChildren;
45mod canonical;
46mod operations;
47mod validity;
48
49pub type ChunkedArray = Array<Chunked>;
51
52#[derive(Clone, Debug)]
53pub struct Chunked;
54
55impl ArrayHash for ChunkedData {
56 fn array_hash<H: Hasher>(&self, _state: &mut H, _precision: Precision) {
57 }
60}
61
62impl ArrayEq for ChunkedData {
63 fn array_eq(&self, _other: &Self, _precision: Precision) -> bool {
64 true
67 }
68}
69
70impl VTable for Chunked {
71 type TypedArrayData = ChunkedData;
72
73 type OperationsVTable = Self;
74 type ValidityVTable = Self;
75 fn id(&self) -> ArrayId {
76 static ID: CachedId = CachedId::new("vortex.chunked");
77 *ID
78 }
79
80 fn validate(
81 &self,
82 data: &ChunkedData,
83 dtype: &DType,
84 len: usize,
85 slots: &[Option<ArrayRef>],
86 ) -> VortexResult<()> {
87 vortex_ensure!(
88 !slots.is_empty(),
89 "ChunkedArray must have at least a chunk offsets slot"
90 );
91 let chunk_offsets = slots[CHUNK_OFFSETS_SLOT]
92 .as_ref()
93 .vortex_expect("validated chunk offsets slot");
94 vortex_ensure!(
95 chunk_offsets.dtype() == &DType::Primitive(PType::U64, Nullability::NonNullable),
96 "ChunkedArray chunk offsets must be non-nullable u64, found {}",
97 chunk_offsets.dtype()
98 );
99 vortex_ensure!(
100 chunk_offsets.len() == data.chunk_offsets.len(),
101 "ChunkedArray chunk offsets slot length {} does not match cached offsets length {}",
102 chunk_offsets.len(),
103 data.chunk_offsets.len()
104 );
105 vortex_ensure!(
106 data.chunk_offsets.len() == slots.len() - CHUNKS_OFFSET + 1,
107 "ChunkedArray chunk offsets length {} does not match {} chunks",
108 data.chunk_offsets.len(),
109 slots.len() - CHUNKS_OFFSET
110 );
111 vortex_ensure!(
112 data.chunk_offsets
113 .last()
114 .copied()
115 .vortex_expect("chunked arrays always have a leading 0 offset")
116 == len,
117 "ChunkedArray length {} does not match outer length {}",
118 data.chunk_offsets.last().copied().unwrap_or_default(),
119 len
120 );
121 for (idx, (start, end)) in data
122 .chunk_offsets
123 .iter()
124 .copied()
125 .tuple_windows()
126 .enumerate()
127 {
128 let chunk = slots[CHUNKS_OFFSET + idx]
129 .as_ref()
130 .vortex_expect("validated chunk slot");
131 vortex_ensure!(
132 chunk.dtype() == dtype,
133 "ChunkedArray chunk dtype {} does not match outer dtype {}",
134 chunk.dtype(),
135 dtype
136 );
137 vortex_ensure!(
138 chunk.len() == end - start,
139 "ChunkedArray chunk {} len {} does not match offsets span {}",
140 idx,
141 chunk.len(),
142 end - start
143 );
144 }
145 Ok(())
146 }
147
148 fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
149 0
150 }
151
152 fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
153 vortex_panic!("ChunkedArray buffer index {idx} out of bounds")
154 }
155
156 fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
157 vortex_panic!("ChunkedArray buffer_name index {idx} out of bounds")
158 }
159
160 fn serialize(
161 _array: ArrayView<'_, Self>,
162 _session: &VortexSession,
163 ) -> VortexResult<Option<Vec<u8>>> {
164 Ok(Some(vec![]))
165 }
166
167 fn deserialize(
168 &self,
169 dtype: &DType,
170 len: usize,
171 metadata: &[u8],
172 _buffers: &[BufferHandle],
173 children: &dyn ArrayChildren,
174 _session: &VortexSession,
175 ) -> VortexResult<ArrayParts<Self>> {
176 if !metadata.is_empty() {
177 vortex_bail!(
178 "ChunkedArray expects empty metadata, got {} bytes",
179 metadata.len()
180 );
181 }
182 if children.is_empty() {
183 vortex_bail!("Chunked array needs at least one child");
184 }
185
186 let nchunks = children.len() - 1;
187 let chunk_offsets = children.get(
188 CHUNK_OFFSETS_SLOT,
189 &DType::Primitive(PType::U64, Nullability::NonNullable),
190 nchunks + 1,
191 )?;
192 #[expect(deprecated)]
193 let chunk_offsets_buf = chunk_offsets.to_primitive().to_buffer::<u64>();
194 let chunk_offsets_usize = chunk_offsets_buf
195 .iter()
196 .copied()
197 .map(|offset| {
198 usize::try_from(offset)
199 .map_err(|_| vortex_err!("chunk offset {offset} exceeds usize range"))
200 })
201 .collect::<VortexResult<Vec<_>>>()?;
202 let mut slots = SmallVec::with_capacity(children.len());
203 slots.push(Some(chunk_offsets));
204 for (idx, (start, end)) in chunk_offsets_usize
205 .iter()
206 .copied()
207 .tuple_windows()
208 .enumerate()
209 {
210 let chunk_len = end - start;
211 slots.push(Some(children.get(idx + CHUNKS_OFFSET, dtype, chunk_len)?));
212 }
213
214 Ok(ArrayParts::new(
215 self.clone(),
216 dtype.clone(),
217 len,
218 ChunkedData::new(chunk_offsets_usize),
219 )
220 .with_slots(slots))
221 }
222
223 fn append_to_builder(
224 array: ArrayView<'_, Self>,
225 builder: &mut dyn ArrayBuilder,
226 ctx: &mut ExecutionCtx,
227 ) -> VortexResult<()> {
228 for chunk in array.iter_chunks() {
229 chunk.append_to_builder(builder, ctx)?;
230 }
231 Ok(())
232 }
233
234 fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
235 match idx {
236 CHUNK_OFFSETS_SLOT => "chunk_offsets".to_string(),
237 n => format!("chunks[{}]", n - CHUNKS_OFFSET),
238 }
239 }
240
241 fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
242 match array.dtype() {
243 DType::Struct(..) | DType::List(..) | DType::Variant(..) => {
245 Ok(ExecutionResult::done(_canonicalize(array.as_view(), ctx)?))
247 }
248 _ => {
250 let slot_idx = array.next_builder_slot.max(CHUNKS_OFFSET);
251 if slot_idx < array.slots().len() {
252 Ok(ExecutionResult::append_child(
253 array.with_next_builder_slot(slot_idx + 1),
254 slot_idx,
255 ))
256 } else {
257 Ok(ExecutionResult::done(
258 Canonical::empty(array.dtype()).into_array(),
259 ))
260 }
261 }
262 }
263 }
264
265 fn execute_parent(
266 array: ArrayView<'_, Self>,
267 parent: &ArrayRef,
268 child_idx: usize,
269 ctx: &mut ExecutionCtx,
270 ) -> VortexResult<Option<ArrayRef>> {
271 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
272 }
273
274 fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
275 Ok(match array.nchunks() {
276 0 => Some(Canonical::empty(array.dtype()).into_array()),
277 1 => Some(array.chunk(0).clone()),
278 _ => None,
279 })
280 }
281
282 fn reduce_parent(
283 array: ArrayView<'_, Self>,
284 parent: &ArrayRef,
285 child_idx: usize,
286 ) -> VortexResult<Option<ArrayRef>> {
287 PARENT_RULES.evaluate(array, parent, child_idx)
288 }
289}