vortex_array/arrays/chunked/vtable/
mod.rs1use std::hash::Hasher;
5
6use itertools::Itertools;
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_error::vortex_panic;
13use vortex_session::VortexSession;
14use vortex_session::registry::CachedId;
15
16use crate::ArrayEq;
17use crate::ArrayHash;
18use crate::ArrayRef;
19use crate::Canonical;
20use crate::ExecutionCtx;
21use crate::ExecutionResult;
22use crate::IntoArray;
23use crate::Precision;
24#[expect(deprecated)]
25use crate::ToCanonical as _;
26use crate::array::Array;
27use crate::array::ArrayId;
28use crate::array::ArrayParts;
29use crate::array::ArrayView;
30use crate::array::VTable;
31use crate::arrays::chunked::ChunkedArrayExt;
32use crate::arrays::chunked::ChunkedData;
33use crate::arrays::chunked::array::CHUNK_OFFSETS_SLOT;
34use crate::arrays::chunked::array::CHUNKS_OFFSET;
35use crate::arrays::chunked::compute::kernel::PARENT_KERNELS;
36use crate::arrays::chunked::compute::rules::PARENT_RULES;
37use crate::arrays::chunked::vtable::canonical::_canonicalize;
38use crate::buffer::BufferHandle;
39use crate::builders::ArrayBuilder;
40use crate::dtype::DType;
41use crate::dtype::Nullability;
42use crate::dtype::PType;
43use crate::serde::ArrayChildren;
44mod canonical;
45mod operations;
46mod validity;
47
48pub type ChunkedArray = Array<Chunked>;
50
51#[derive(Clone, Debug)]
52pub struct Chunked;
53
54impl ArrayHash for ChunkedData {
55 fn array_hash<H: Hasher>(&self, _state: &mut H, _precision: Precision) {
56 }
59}
60
61impl ArrayEq for ChunkedData {
62 fn array_eq(&self, _other: &Self, _precision: Precision) -> bool {
63 true
66 }
67}
68
69impl VTable for Chunked {
70 type ArrayData = ChunkedData;
71
72 type OperationsVTable = Self;
73 type ValidityVTable = Self;
74 fn id(&self) -> ArrayId {
75 static ID: CachedId = CachedId::new("vortex.chunked");
76 *ID
77 }
78
79 fn validate(
80 &self,
81 data: &ChunkedData,
82 dtype: &DType,
83 len: usize,
84 slots: &[Option<ArrayRef>],
85 ) -> VortexResult<()> {
86 vortex_ensure!(
87 !slots.is_empty(),
88 "ChunkedArray must have at least a chunk offsets slot"
89 );
90 let chunk_offsets = slots[CHUNK_OFFSETS_SLOT]
91 .as_ref()
92 .vortex_expect("validated chunk offsets slot");
93 vortex_ensure!(
94 chunk_offsets.dtype() == &DType::Primitive(PType::U64, Nullability::NonNullable),
95 "ChunkedArray chunk offsets must be non-nullable u64, found {}",
96 chunk_offsets.dtype()
97 );
98 vortex_ensure!(
99 chunk_offsets.len() == data.chunk_offsets.len(),
100 "ChunkedArray chunk offsets slot length {} does not match cached offsets length {}",
101 chunk_offsets.len(),
102 data.chunk_offsets.len()
103 );
104 vortex_ensure!(
105 data.chunk_offsets.len() == slots.len() - CHUNKS_OFFSET + 1,
106 "ChunkedArray chunk offsets length {} does not match {} chunks",
107 data.chunk_offsets.len(),
108 slots.len() - CHUNKS_OFFSET
109 );
110 vortex_ensure!(
111 data.chunk_offsets
112 .last()
113 .copied()
114 .vortex_expect("chunked arrays always have a leading 0 offset")
115 == len,
116 "ChunkedArray length {} does not match outer length {}",
117 data.chunk_offsets.last().copied().unwrap_or_default(),
118 len
119 );
120 for (idx, (start, end)) in data
121 .chunk_offsets
122 .iter()
123 .copied()
124 .tuple_windows()
125 .enumerate()
126 {
127 let chunk = slots[CHUNKS_OFFSET + idx]
128 .as_ref()
129 .vortex_expect("validated chunk slot");
130 vortex_ensure!(
131 chunk.dtype() == dtype,
132 "ChunkedArray chunk dtype {} does not match outer dtype {}",
133 chunk.dtype(),
134 dtype
135 );
136 vortex_ensure!(
137 chunk.len() == end - start,
138 "ChunkedArray chunk {} len {} does not match offsets span {}",
139 idx,
140 chunk.len(),
141 end - start
142 );
143 }
144 Ok(())
145 }
146
147 fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
148 0
149 }
150
151 fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
152 vortex_panic!("ChunkedArray buffer index {idx} out of bounds")
153 }
154
155 fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
156 vortex_panic!("ChunkedArray buffer_name index {idx} out of bounds")
157 }
158
159 fn serialize(
160 _array: ArrayView<'_, Self>,
161 _session: &VortexSession,
162 ) -> VortexResult<Option<Vec<u8>>> {
163 Ok(Some(vec![]))
164 }
165
166 fn deserialize(
167 &self,
168 dtype: &DType,
169 len: usize,
170 metadata: &[u8],
171 _buffers: &[BufferHandle],
172 children: &dyn ArrayChildren,
173 _session: &VortexSession,
174 ) -> VortexResult<ArrayParts<Self>> {
175 if !metadata.is_empty() {
176 vortex_bail!(
177 "ChunkedArray expects empty metadata, got {} bytes",
178 metadata.len()
179 );
180 }
181 if children.is_empty() {
182 vortex_bail!("Chunked array needs at least one child");
183 }
184
185 let nchunks = children.len() - 1;
186 let chunk_offsets = children.get(
187 CHUNK_OFFSETS_SLOT,
188 &DType::Primitive(PType::U64, Nullability::NonNullable),
189 nchunks + 1,
190 )?;
191 #[expect(deprecated)]
192 let chunk_offsets_buf = chunk_offsets.to_primitive().to_buffer::<u64>();
193 let chunk_offsets_usize = chunk_offsets_buf
194 .iter()
195 .copied()
196 .map(|offset| {
197 usize::try_from(offset)
198 .map_err(|_| vortex_err!("chunk offset {offset} exceeds usize range"))
199 })
200 .collect::<VortexResult<Vec<_>>>()?;
201 let mut slots = Vec::with_capacity(children.len());
202 slots.push(Some(chunk_offsets));
203 for (idx, (start, end)) in chunk_offsets_usize
204 .iter()
205 .copied()
206 .tuple_windows()
207 .enumerate()
208 {
209 let chunk_len = end - start;
210 slots.push(Some(children.get(idx + CHUNKS_OFFSET, dtype, chunk_len)?));
211 }
212
213 Ok(ArrayParts::new(
214 self.clone(),
215 dtype.clone(),
216 len,
217 ChunkedData::new(chunk_offsets_usize),
218 )
219 .with_slots(slots))
220 }
221
222 fn append_to_builder(
223 array: ArrayView<'_, Self>,
224 builder: &mut dyn ArrayBuilder,
225 ctx: &mut ExecutionCtx,
226 ) -> VortexResult<()> {
227 for chunk in array.iter_chunks() {
228 chunk.append_to_builder(builder, ctx)?;
229 }
230 Ok(())
231 }
232
233 fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
234 match idx {
235 CHUNK_OFFSETS_SLOT => "chunk_offsets".to_string(),
236 n => format!("chunks[{}]", n - CHUNKS_OFFSET),
237 }
238 }
239
240 fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
241 match array.dtype() {
242 DType::Struct(..) | DType::List(..) => {
244 Ok(ExecutionResult::done(_canonicalize(array.as_view(), ctx)?))
246 }
247 _ => {
249 let slot_idx = array.next_builder_slot.max(CHUNKS_OFFSET);
250 if slot_idx < array.slots().len() {
251 Ok(ExecutionResult::append_child(
252 array.with_next_builder_slot(slot_idx + 1),
253 slot_idx,
254 ))
255 } else {
256 Ok(ExecutionResult::done(
257 Canonical::empty(array.dtype()).into_array(),
258 ))
259 }
260 }
261 }
262 }
263
264 fn execute_parent(
265 array: ArrayView<'_, Self>,
266 parent: &ArrayRef,
267 child_idx: usize,
268 ctx: &mut ExecutionCtx,
269 ) -> VortexResult<Option<ArrayRef>> {
270 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
271 }
272
273 fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
274 Ok(match array.nchunks() {
275 0 => Some(Canonical::empty(array.dtype()).into_array()),
276 1 => Some(array.chunk(0).clone()),
277 _ => None,
278 })
279 }
280
281 fn reduce_parent(
282 array: ArrayView<'_, Self>,
283 parent: &ArrayRef,
284 child_idx: usize,
285 ) -> VortexResult<Option<ArrayRef>> {
286 PARENT_RULES.evaluate(array, parent, child_idx)
287 }
288}