Skip to main content

vortex_array/arrays/listview/compute/
zip.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::BitAnd;
5use std::ops::BitOr;
6use std::ops::Not;
7
8use vortex_buffer::Buffer;
9use vortex_buffer::BufferMut;
10use vortex_error::VortexResult;
11use vortex_mask::Mask;
12
13use crate::ArrayRef;
14use crate::ExecutionCtx;
15use crate::IntoArray;
16use crate::array::ArrayView;
17use crate::arrays::Chunked;
18use crate::arrays::ChunkedArray;
19use crate::arrays::ListView;
20use crate::arrays::ListViewArray;
21use crate::arrays::chunked::ChunkedArrayExt;
22use crate::arrays::listview::ListViewArrayExt;
23use crate::builtins::ArrayBuiltins;
24use crate::dtype::DType;
25use crate::dtype::Nullability;
26use crate::dtype::PType;
27use crate::scalar_fn::fns::zip::ZipKernel;
28use crate::validity::Validity;
29
30/// Zip two [`ListViewArray`]s by selecting whole list views per row.
31///
32/// A [`ListViewArray`] addresses each list by an `(offset, size)` pair into a shared `elements`
33/// array, and unlike [`ListArray`](crate::arrays::ListArray) it does not require lists to be stored
34/// contiguously or in order. Zipping two list views is therefore a metadata-only operation over the
35/// `offsets`, `sizes` and `validity` child arrays: we concatenate the two `elements` arrays
36/// (without rewriting them) and, for each row, select the `(offset, size)` pair from `if_true` or
37/// `if_false` per the mask. `if_false` views are shifted past the end of `if_true`'s elements so
38/// they continue to address the correct half of the concatenated elements array.
39impl ZipKernel for ListView {
40    fn zip(
41        if_true: ArrayView<'_, ListView>,
42        if_false: &ArrayRef,
43        mask: &ArrayRef,
44        ctx: &mut ExecutionCtx,
45    ) -> VortexResult<Option<ArrayRef>> {
46        let Some(if_false) = if_false.as_opt::<ListView>() else {
47            return Ok(None);
48        };
49
50        // Null mask entries select `if_false`, matching `Zip`'s SQL ELSE semantics.
51        let mask = mask.try_to_mask_fill_null_false(ctx)?;
52        match &mask {
53            // Defer the trivial masks to the generic zip, which just casts one side.
54            Mask::AllTrue(_) | Mask::AllFalse(_) => return Ok(None),
55            Mask::Values(_) => {}
56        }
57
58        let len = if_true.len();
59
60        let result_elements_dtype = if_true
61            .elements()
62            .dtype()
63            .union_nullability(if_false.elements().dtype().nullability());
64
65        // `if_false`'s elements share the element dtype up to nullability; normalize so both chunks
66        // of the concatenated elements array have an identical dtype.
67        let true_elements = if_true.elements().cast(result_elements_dtype.clone())?;
68        let false_elements = if_false.elements().cast(result_elements_dtype.clone())?;
69
70        // `if_false` views index into the second half of the concatenated elements.
71        let false_shift = true_elements.len() as u64;
72
73        // Concatenate the two `elements` arrays without copying. If either side is already a
74        // `ChunkedArray` (e.g. the result of a previous list-view zip), splice its chunks in
75        // directly rather than nesting chunked arrays.
76        let mut chunks = Vec::with_capacity(2);
77        push_element_chunks(true_elements, &mut chunks);
78        push_element_chunks(false_elements, &mut chunks);
79        let elements = ChunkedArray::try_new(chunks, result_elements_dtype)?.into_array();
80
81        let true_offsets = to_u64(if_true.offsets(), ctx)?;
82        let true_sizes = to_u64(if_true.sizes(), ctx)?;
83        let false_offsets = to_u64(if_false.offsets(), ctx)?;
84        let false_sizes = to_u64(if_false.sizes(), ctx)?;
85
86        let mut offsets = BufferMut::<u64>::with_capacity(len);
87        let mut sizes = BufferMut::<u64>::with_capacity(len);
88        for ((idx, (out_offsets, out_sizes)), selected) in offsets
89            .spare_capacity_mut()
90            .iter_mut()
91            .zip(sizes.spare_capacity_mut().iter_mut())
92            .take(len)
93            .enumerate()
94            .zip(mask.iter())
95        {
96            if selected {
97                out_offsets.write(true_offsets[idx]);
98                out_sizes.write(true_sizes[idx]);
99            } else {
100                out_offsets.write(false_offsets[idx] + false_shift);
101                out_sizes.write(false_sizes[idx]);
102            }
103        }
104
105        // SAFETY: the loop above initialized exactly `len` slots in both buffers.
106        unsafe {
107            offsets.set_len(len);
108            sizes.set_len(len);
109        }
110
111        let validity = zip_validity(if_true.validity()?, if_false.validity()?, &mask, ctx)?;
112
113        Ok(Some(
114            ListViewArray::try_new(
115                elements,
116                offsets.freeze().into_array(),
117                sizes.freeze().into_array(),
118                validity,
119            )?
120            .into_array(),
121        ))
122    }
123}
124
125/// Appends `array`'s element chunks to `chunks`, flattening a top-level [`ChunkedArray`] so the
126/// concatenated elements never nest chunked arrays.
127fn push_element_chunks(array: ArrayRef, chunks: &mut Vec<ArrayRef>) {
128    match array.as_opt::<Chunked>() {
129        Some(chunked) => chunks.extend(chunked.iter_chunks().cloned()),
130        None => chunks.push(array),
131    }
132}
133
134/// Read a non-nullable integer array into a `u64` buffer.
135fn to_u64(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Buffer<u64>> {
136    array
137        .clone()
138        .cast(DType::Primitive(PType::U64, Nullability::NonNullable))?
139        .execute::<Buffer<u64>>(ctx)
140}
141
142/// Combine the two list-level validities, taking `if_true`'s validity where `mask` is set and
143/// `if_false`'s where it is not.
144fn zip_validity(
145    if_true: Validity,
146    if_false: Validity,
147    mask: &Mask,
148    ctx: &mut ExecutionCtx,
149) -> VortexResult<Validity> {
150    Ok(match (&if_true, &if_false) {
151        (Validity::NonNullable, Validity::NonNullable) => Validity::NonNullable,
152        (Validity::AllValid, Validity::AllValid) => Validity::AllValid,
153        (Validity::AllInvalid, Validity::AllInvalid) => Validity::AllInvalid,
154        _ => {
155            let true_mask = if_true.execute_mask(mask.len(), ctx)?;
156            let false_mask = if_false.execute_mask(mask.len(), ctx)?;
157            let combined = true_mask
158                .bitand(mask)
159                .bitor(&false_mask.bitand(&mask.not()));
160            Validity::from_mask(combined, if_true.nullability() | if_false.nullability())
161        }
162    })
163}
164
165#[cfg(test)]
166mod tests {
167    use vortex_buffer::buffer;
168    use vortex_error::VortexResult;
169    use vortex_mask::Mask;
170
171    use crate::ArrayRef;
172    use crate::IntoArray;
173    use crate::LEGACY_SESSION;
174    use crate::VortexSessionExecute;
175    use crate::arrays::BoolArray;
176    use crate::arrays::Chunked;
177    use crate::arrays::ChunkedArray;
178    use crate::arrays::ListView;
179    use crate::arrays::ListViewArray;
180    use crate::arrays::chunked::ChunkedArrayExt;
181    use crate::arrays::listview::ListViewArrayExt;
182    use crate::assert_arrays_eq;
183    use crate::builtins::ArrayBuiltins;
184    use crate::dtype::DType;
185    use crate::dtype::Nullability;
186    use crate::dtype::PType;
187    use crate::validity::Validity;
188
189    fn list_view(
190        elements: ArrayRef,
191        offsets: ArrayRef,
192        sizes: ArrayRef,
193        validity: Validity,
194    ) -> ArrayRef {
195        ListViewArray::try_new(elements, offsets, sizes, validity)
196            .unwrap()
197            .into_array()
198    }
199
200    /// `zip` of two list views selects whole lists per the mask and keeps the list encoding.
201    #[test]
202    fn zip_selects_lists() -> VortexResult<()> {
203        // [[1, 2], [3], [4, 5, 6]]
204        let if_true = list_view(
205            buffer![1i32, 2, 3, 4, 5, 6].into_array(),
206            buffer![0u32, 2, 3].into_array(),
207            buffer![2u32, 1, 3].into_array(),
208            Validity::NonNullable,
209        );
210        // [[10], [20, 21], [30]]
211        let if_false = list_view(
212            buffer![10i32, 20, 21, 30].into_array(),
213            buffer![0u32, 1, 3].into_array(),
214            buffer![1u32, 2, 1].into_array(),
215            Validity::NonNullable,
216        );
217        let mask = Mask::from_iter([true, false, true]);
218
219        let mut ctx = LEGACY_SESSION.create_execution_ctx();
220        let result = mask
221            .into_array()
222            .zip(if_true, if_false)?
223            .execute::<ArrayRef>(&mut ctx)?;
224
225        // The kernel should keep the list-view encoding rather than canonicalizing.
226        assert!(result.is::<ListView>());
227
228        // Expected: [[1, 2], [20, 21], [4, 5, 6]]
229        let expected = list_view(
230            buffer![1i32, 2, 20, 21, 4, 5, 6].into_array(),
231            buffer![0u32, 2, 4].into_array(),
232            buffer![2u32, 2, 3].into_array(),
233            Validity::NonNullable,
234        );
235        assert_arrays_eq!(result, expected);
236        Ok(())
237    }
238
239    /// `zip` selects list-level validity from the chosen side and widens nullability.
240    #[test]
241    fn zip_selects_validity() -> VortexResult<()> {
242        // [[1], null, [2]] (list-level nulls)
243        let if_true = list_view(
244            buffer![1i32, 2].into_array(),
245            buffer![0u32, 1, 1].into_array(),
246            buffer![1u32, 0, 1].into_array(),
247            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
248        );
249        // [[10], [20], null]
250        let if_false = list_view(
251            buffer![10i32, 20].into_array(),
252            buffer![0u32, 1, 2].into_array(),
253            buffer![1u32, 1, 0].into_array(),
254            Validity::Array(BoolArray::from_iter([true, true, false]).into_array()),
255        );
256        // true -> if_true, false -> if_false
257        let mask = Mask::from_iter([false, true, true]);
258
259        let mut ctx = LEGACY_SESSION.create_execution_ctx();
260        let result = mask
261            .into_array()
262            .zip(if_true, if_false)?
263            .execute::<ArrayRef>(&mut ctx)?;
264
265        // Row 0 -> if_false[0] = [10]; row 1 -> if_true[1] = null; row 2 -> if_true[2] = [2]
266        let expected = list_view(
267            buffer![10i32, 2].into_array(),
268            buffer![0u32, 1, 1].into_array(),
269            buffer![1u32, 0, 1].into_array(),
270            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
271        );
272        assert_arrays_eq!(result, expected);
273        Ok(())
274    }
275
276    /// `zip` handles out-of-order/non-contiguous offsets and widens nullability when only one side
277    /// is nullable.
278    #[test]
279    fn zip_out_of_order_offsets_and_widening() -> VortexResult<()> {
280        // [[5, 6], [7], [8, 9]] expressed with out-of-order offsets.
281        let if_true = list_view(
282            buffer![7i32, 8, 9, 5, 6].into_array(),
283            buffer![3u32, 0, 1].into_array(),
284            buffer![2u32, 1, 2].into_array(),
285            Validity::NonNullable,
286        );
287        // [[100], null, [200, 201]]
288        let if_false = list_view(
289            buffer![100i32, 200, 201].into_array(),
290            buffer![0u32, 1, 1].into_array(),
291            buffer![1u32, 0, 2].into_array(),
292            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
293        );
294        let mask = Mask::from_iter([true, true, false]);
295
296        let mut ctx = LEGACY_SESSION.create_execution_ctx();
297        let result = mask
298            .into_array()
299            .zip(if_true, if_false)?
300            .execute::<ArrayRef>(&mut ctx)?;
301        assert!(result.is::<ListView>());
302
303        // [[5, 6], [7], [200, 201]], all valid but nullable (widened by if_false).
304        let expected = list_view(
305            buffer![5i32, 6, 7, 200, 201].into_array(),
306            buffer![0u32, 2, 3].into_array(),
307            buffer![2u32, 1, 2].into_array(),
308            Validity::AllValid,
309        );
310        assert_arrays_eq!(result, expected);
311        Ok(())
312    }
313
314    /// When an input's `elements` is already a [`ChunkedArray`], its chunks are spliced in rather
315    /// than nesting a chunked array inside the concatenated elements.
316    #[test]
317    fn zip_flattens_chunked_elements() -> VortexResult<()> {
318        // elements [1, 2, 3] stored as two chunks; lists [[1, 2], [3]].
319        let chunked_elements = ChunkedArray::try_new(
320            vec![buffer![1i32, 2].into_array(), buffer![3i32].into_array()],
321            DType::Primitive(PType::I32, Nullability::NonNullable),
322        )?
323        .into_array();
324        let if_true = list_view(
325            chunked_elements,
326            buffer![0u32, 2].into_array(),
327            buffer![2u32, 1].into_array(),
328            Validity::NonNullable,
329        );
330        // [[10], [20]]
331        let if_false = list_view(
332            buffer![10i32, 20].into_array(),
333            buffer![0u32, 1].into_array(),
334            buffer![1u32, 1].into_array(),
335            Validity::NonNullable,
336        );
337        let mask = Mask::from_iter([true, false]);
338
339        let mut ctx = LEGACY_SESSION.create_execution_ctx();
340        let result = mask
341            .into_array()
342            .zip(if_true, if_false)?
343            .execute::<ArrayRef>(&mut ctx)?;
344
345        // The concatenated elements are chunked, but no chunk is itself a `ChunkedArray`.
346        let result_lv = result
347            .as_opt::<ListView>()
348            .expect("zip keeps the list-view encoding");
349        let chunked = result_lv
350            .elements()
351            .as_opt::<Chunked>()
352            .expect("zip concatenates elements into a chunked array");
353        assert!(
354            chunked.iter_chunks().all(|chunk| !chunk.is::<Chunked>()),
355            "chunked elements must be flattened, not nested",
356        );
357
358        // [[1, 2], [20]]
359        let expected = list_view(
360            buffer![1i32, 2, 20].into_array(),
361            buffer![0u32, 2].into_array(),
362            buffer![2u32, 1].into_array(),
363            Validity::NonNullable,
364        );
365        assert_arrays_eq!(result, expected);
366        Ok(())
367    }
368}