Skip to main content

vortex_array/arrays/chunked/compute/
zip.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_error::VortexResult;
5
6use crate::ArrayRef;
7use crate::ExecutionCtx;
8use crate::IntoArray;
9use crate::array::ArrayView;
10use crate::arrays::Chunked;
11use crate::arrays::ChunkedArray;
12use crate::arrays::chunked::ChunkedArrayExt;
13use crate::arrays::chunked::paired_chunks::PairedChunksExt;
14use crate::builtins::ArrayBuiltins;
15use crate::scalar_fn::fns::zip::ZipKernel;
16
17// Push down the zip call to the chunks. Without this rule
18// the default implementation canonicalises the chunked array
19// then zips once.
20impl ZipKernel for Chunked {
21    fn zip(
22        if_true: ArrayView<'_, Chunked>,
23        if_false: &ArrayRef,
24        mask: &ArrayRef,
25        _ctx: &mut ExecutionCtx,
26    ) -> VortexResult<Option<ArrayRef>> {
27        let Some(if_false) = if_false.as_opt::<Chunked>() else {
28            return Ok(None);
29        };
30        let dtype = if_true
31            .dtype()
32            .union_nullability(if_false.dtype().nullability());
33        let mut out_chunks = Vec::with_capacity(if_true.nchunks() + if_false.nchunks());
34
35        for pair in if_true.paired_chunks(&if_false) {
36            let pair = pair?;
37            let mask_slice = mask.slice(pair.pos)?;
38            out_chunks.push(mask_slice.zip(pair.left, pair.right)?);
39        }
40
41        // SAFETY: chunks originate from zipping slices of inputs that share dtype/nullability.
42        let chunked = unsafe { ChunkedArray::new_unchecked(out_chunks, dtype) };
43        Ok(Some(chunked.into_array()))
44    }
45}
46
47#[cfg(test)]
48mod tests {
49    use vortex_buffer::buffer;
50    use vortex_mask::Mask;
51
52    use crate::ArrayRef;
53    use crate::IntoArray;
54    use crate::LEGACY_SESSION;
55    #[expect(deprecated)]
56    use crate::ToCanonical as _;
57    use crate::VortexSessionExecute;
58    use crate::arrays::Chunked;
59    use crate::arrays::ChunkedArray;
60    use crate::arrays::chunked::ChunkedArrayExt;
61    use crate::builtins::ArrayBuiltins;
62    use crate::dtype::DType;
63    use crate::dtype::Nullability;
64    use crate::dtype::PType;
65
66    #[test]
67    fn test_chunked_zip_aligns_across_boundaries() {
68        let if_true = ChunkedArray::try_new(
69            vec![
70                buffer![1i32, 2].into_array(),
71                buffer![3i32].into_array(),
72                buffer![4i32, 5].into_array(),
73            ],
74            DType::Primitive(PType::I32, Nullability::NonNullable),
75        )
76        .unwrap();
77
78        let if_false = ChunkedArray::try_new(
79            vec![
80                buffer![10i32].into_array(),
81                buffer![11i32, 12].into_array(),
82                buffer![13i32, 14].into_array(),
83            ],
84            DType::Primitive(PType::I32, Nullability::NonNullable),
85        )
86        .unwrap();
87
88        let mask = Mask::from_iter([true, false, true, false, true]);
89
90        let zipped = &mask
91            .into_array()
92            .zip(if_true.into_array(), if_false.into_array())
93            .unwrap();
94        // One step of execution will push down the zip.
95        let zipped = zipped
96            .clone()
97            .execute::<ArrayRef>(&mut LEGACY_SESSION.create_execution_ctx())
98            .unwrap();
99        let zipped = zipped
100            .as_opt::<Chunked>()
101            .expect("zip should keep chunked encoding");
102
103        assert_eq!(zipped.nchunks(), 4);
104        let mut values: Vec<i32> = Vec::new();
105        for chunk in zipped.chunks() {
106            #[expect(deprecated)]
107            let primitive = chunk.to_primitive();
108            values.extend_from_slice(primitive.as_slice::<i32>());
109        }
110        assert_eq!(values, vec![1, 11, 3, 13, 5]);
111    }
112}