Skip to main content

vortex_array/arrays/chunked/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hasher;
5
6use itertools::Itertools;
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_err;
12use vortex_error::vortex_panic;
13use vortex_session::VortexSession;
14use vortex_session::registry::CachedId;
15
16use crate::ArrayEq;
17use crate::ArrayHash;
18use crate::ArrayRef;
19use crate::Canonical;
20use crate::ExecutionCtx;
21use crate::ExecutionResult;
22use crate::IntoArray;
23use crate::Precision;
24#[expect(deprecated)]
25use crate::ToCanonical as _;
26use crate::array::Array;
27use crate::array::ArrayId;
28use crate::array::ArrayParts;
29use crate::array::ArrayView;
30use crate::array::VTable;
31use crate::arrays::chunked::ChunkedArrayExt;
32use crate::arrays::chunked::ChunkedData;
33use crate::arrays::chunked::array::CHUNK_OFFSETS_SLOT;
34use crate::arrays::chunked::array::CHUNKS_OFFSET;
35use crate::arrays::chunked::compute::kernel::PARENT_KERNELS;
36use crate::arrays::chunked::compute::rules::PARENT_RULES;
37use crate::arrays::chunked::vtable::canonical::_canonicalize;
38use crate::buffer::BufferHandle;
39use crate::builders::ArrayBuilder;
40use crate::dtype::DType;
41use crate::dtype::Nullability;
42use crate::dtype::PType;
43use crate::serde::ArrayChildren;
44mod canonical;
45mod operations;
46mod validity;
47
48/// A [`Chunked`]-encoded Vortex array.
49pub type ChunkedArray = Array<Chunked>;
50
51#[derive(Clone, Debug)]
52pub struct Chunked;
53
54impl ArrayHash for ChunkedData {
55    fn array_hash<H: Hasher>(&self, _state: &mut H, _precision: Precision) {
56        // Chunk offsets are cached derived data. Slot 0 already stores the logical offsets array,
57        // and ArrayInner hashing includes every slot before ArrayData.
58    }
59}
60
61impl ArrayEq for ChunkedData {
62    fn array_eq(&self, _other: &Self, _precision: Precision) -> bool {
63        // Chunk offsets are cached derived data. Slot 0 already stores the logical offsets array,
64        // and ArrayInner equality compares every slot before ArrayData.
65        true
66    }
67}
68
69impl VTable for Chunked {
70    type ArrayData = ChunkedData;
71
72    type OperationsVTable = Self;
73    type ValidityVTable = Self;
74    fn id(&self) -> ArrayId {
75        static ID: CachedId = CachedId::new("vortex.chunked");
76        *ID
77    }
78
79    fn validate(
80        &self,
81        data: &ChunkedData,
82        dtype: &DType,
83        len: usize,
84        slots: &[Option<ArrayRef>],
85    ) -> VortexResult<()> {
86        vortex_ensure!(
87            !slots.is_empty(),
88            "ChunkedArray must have at least a chunk offsets slot"
89        );
90        let chunk_offsets = slots[CHUNK_OFFSETS_SLOT]
91            .as_ref()
92            .vortex_expect("validated chunk offsets slot");
93        vortex_ensure!(
94            chunk_offsets.dtype() == &DType::Primitive(PType::U64, Nullability::NonNullable),
95            "ChunkedArray chunk offsets must be non-nullable u64, found {}",
96            chunk_offsets.dtype()
97        );
98        vortex_ensure!(
99            chunk_offsets.len() == data.chunk_offsets.len(),
100            "ChunkedArray chunk offsets slot length {} does not match cached offsets length {}",
101            chunk_offsets.len(),
102            data.chunk_offsets.len()
103        );
104        vortex_ensure!(
105            data.chunk_offsets.len() == slots.len() - CHUNKS_OFFSET + 1,
106            "ChunkedArray chunk offsets length {} does not match {} chunks",
107            data.chunk_offsets.len(),
108            slots.len() - CHUNKS_OFFSET
109        );
110        vortex_ensure!(
111            data.chunk_offsets
112                .last()
113                .copied()
114                .vortex_expect("chunked arrays always have a leading 0 offset")
115                == len,
116            "ChunkedArray length {} does not match outer length {}",
117            data.chunk_offsets.last().copied().unwrap_or_default(),
118            len
119        );
120        for (idx, (start, end)) in data
121            .chunk_offsets
122            .iter()
123            .copied()
124            .tuple_windows()
125            .enumerate()
126        {
127            let chunk = slots[CHUNKS_OFFSET + idx]
128                .as_ref()
129                .vortex_expect("validated chunk slot");
130            vortex_ensure!(
131                chunk.dtype() == dtype,
132                "ChunkedArray chunk dtype {} does not match outer dtype {}",
133                chunk.dtype(),
134                dtype
135            );
136            vortex_ensure!(
137                chunk.len() == end - start,
138                "ChunkedArray chunk {} len {} does not match offsets span {}",
139                idx,
140                chunk.len(),
141                end - start
142            );
143        }
144        Ok(())
145    }
146
147    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
148        0
149    }
150
151    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
152        vortex_panic!("ChunkedArray buffer index {idx} out of bounds")
153    }
154
155    fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
156        vortex_panic!("ChunkedArray buffer_name index {idx} out of bounds")
157    }
158
159    fn serialize(
160        _array: ArrayView<'_, Self>,
161        _session: &VortexSession,
162    ) -> VortexResult<Option<Vec<u8>>> {
163        Ok(Some(vec![]))
164    }
165
166    fn deserialize(
167        &self,
168        dtype: &DType,
169        len: usize,
170        metadata: &[u8],
171        _buffers: &[BufferHandle],
172        children: &dyn ArrayChildren,
173        _session: &VortexSession,
174    ) -> VortexResult<ArrayParts<Self>> {
175        if !metadata.is_empty() {
176            vortex_bail!(
177                "ChunkedArray expects empty metadata, got {} bytes",
178                metadata.len()
179            );
180        }
181        if children.is_empty() {
182            vortex_bail!("Chunked array needs at least one child");
183        }
184
185        let nchunks = children.len() - 1;
186        let chunk_offsets = children.get(
187            CHUNK_OFFSETS_SLOT,
188            &DType::Primitive(PType::U64, Nullability::NonNullable),
189            nchunks + 1,
190        )?;
191        #[expect(deprecated)]
192        let chunk_offsets_buf = chunk_offsets.to_primitive().to_buffer::<u64>();
193        let chunk_offsets_usize = chunk_offsets_buf
194            .iter()
195            .copied()
196            .map(|offset| {
197                usize::try_from(offset)
198                    .map_err(|_| vortex_err!("chunk offset {offset} exceeds usize range"))
199            })
200            .collect::<VortexResult<Vec<_>>>()?;
201        let mut slots = Vec::with_capacity(children.len());
202        slots.push(Some(chunk_offsets));
203        for (idx, (start, end)) in chunk_offsets_usize
204            .iter()
205            .copied()
206            .tuple_windows()
207            .enumerate()
208        {
209            let chunk_len = end - start;
210            slots.push(Some(children.get(idx + CHUNKS_OFFSET, dtype, chunk_len)?));
211        }
212
213        Ok(ArrayParts::new(
214            self.clone(),
215            dtype.clone(),
216            len,
217            ChunkedData::new(chunk_offsets_usize),
218        )
219        .with_slots(slots))
220    }
221
222    fn append_to_builder(
223        array: ArrayView<'_, Self>,
224        builder: &mut dyn ArrayBuilder,
225        ctx: &mut ExecutionCtx,
226    ) -> VortexResult<()> {
227        for chunk in array.iter_chunks() {
228            chunk.append_to_builder(builder, ctx)?;
229        }
230        Ok(())
231    }
232
233    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
234        match idx {
235            CHUNK_OFFSETS_SLOT => "chunk_offsets".to_string(),
236            n => format!("chunks[{}]", n - CHUNKS_OFFSET),
237        }
238    }
239
240    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
241        match array.dtype() {
242            // Struct and List need special swizzling logic, use the existing canonicalize path.
243            DType::Struct(..) | DType::List(..) => {
244                // TODO(joe)[#7674]: iterative execution here too
245                Ok(ExecutionResult::done(_canonicalize(array.as_view(), ctx)?))
246            }
247            // For all other types, use the builder path via AppendChild.
248            _ => {
249                let slot_idx = array.next_builder_slot.max(CHUNKS_OFFSET);
250                if slot_idx < array.slots().len() {
251                    Ok(ExecutionResult::append_child(
252                        array.with_next_builder_slot(slot_idx + 1),
253                        slot_idx,
254                    ))
255                } else {
256                    Ok(ExecutionResult::done(
257                        Canonical::empty(array.dtype()).into_array(),
258                    ))
259                }
260            }
261        }
262    }
263
264    fn execute_parent(
265        array: ArrayView<'_, Self>,
266        parent: &ArrayRef,
267        child_idx: usize,
268        ctx: &mut ExecutionCtx,
269    ) -> VortexResult<Option<ArrayRef>> {
270        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
271    }
272
273    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
274        Ok(match array.nchunks() {
275            0 => Some(Canonical::empty(array.dtype()).into_array()),
276            1 => Some(array.chunk(0).clone()),
277            _ => None,
278        })
279    }
280
281    fn reduce_parent(
282        array: ArrayView<'_, Self>,
283        parent: &ArrayRef,
284        child_idx: usize,
285    ) -> VortexResult<Option<ArrayRef>> {
286        PARENT_RULES.evaluate(array, parent, child_idx)
287    }
288}