vortex_array/arrays/listview/
conversion.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
7use vortex_error::VortexExpect;
8
9use crate::arrays::{ExtensionArray, FixedSizeListArray, ListArray, ListViewArray, StructArray};
10use crate::builders::{ArrayBuilder, ListBuilder, PrimitiveBuilder};
11use crate::vtable::ValidityHelper;
12use crate::{Array, ArrayRef, Canonical, IntoArray, ToCanonical};
13
14/// Creates a [`ListViewArray`] from a [`ListArray`] by computing `sizes` from `offsets`.
15pub fn list_view_from_list(list: ListArray) -> ListViewArray {
16    // If the list is empty, create an empty `ListViewArray` with the same offset `DType` as the
17    // input.
18    if list.is_empty() {
19        return Canonical::empty(list.dtype()).into_listview();
20    }
21
22    let len = list.len();
23
24    // Get the `offsets` array directly from the `ListArray` (preserving its type).
25    let list_offsets = list.offsets().clone();
26
27    // We need to slice the `offsets` to remove the last element (`ListArray` has n+1 offsets).
28    let adjusted_offsets = list_offsets.slice(0..len);
29
30    // Create sizes array by computing differences between consecutive offsets.
31    // Use the same dtype as the offsets array to ensure compatibility.
32    let sizes = match_each_integer_ptype!(list_offsets.dtype().as_ptype(), |O| {
33        build_sizes_from_offsets::<O>(&list)
34    });
35
36    // SAFETY: Since everything came from an existing valid `ListArray`, and the `sizes` were
37    // derived from valid and in-order `offsets`, we know these fields are valid.
38    unsafe {
39        ListViewArray::new_unchecked(
40            list.elements().clone(),
41            adjusted_offsets,
42            sizes,
43            list.validity().clone(),
44        )
45    }
46}
47
48/// Builds a sizes array from a [`ListArray`] by computing differences between consecutive offsets.
49fn build_sizes_from_offsets<O: IntegerPType>(list: &ListArray) -> ArrayRef {
50    let len = list.len();
51    let mut sizes_builder = PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, len);
52
53    // Create `UninitRange` for direct memory access.
54    let mut sizes_range = sizes_builder.uninit_range(len);
55    let offsets = list.offsets().to_primitive();
56    let offsets_slice = offsets.as_slice::<O>();
57
58    // Compute sizes as the difference between consecutive offsets.
59    for i in 0..len {
60        let size = offsets_slice[i + 1] - offsets_slice[i];
61        sizes_range.set_value(i, size);
62    }
63
64    // SAFETY: We have initialized all values in the range.
65    unsafe {
66        sizes_range.finish();
67    }
68
69    sizes_builder.finish_into_primitive().into_array()
70}
71
72/// Creates a [`ListArray`] from a [`ListViewArray`].
73pub fn list_from_list_view(list_view: ListViewArray) -> ListArray {
74    let elements_dtype = list_view
75        .dtype()
76        .as_list_element_opt()
77        .vortex_expect("`DType` of `ListView` was somehow not a `List`");
78    let nullability = list_view.dtype().nullability();
79    let len = list_view.len();
80
81    match_each_integer_ptype!(list_view.offsets().dtype().as_ptype(), |O| {
82        let mut builder = ListBuilder::<O>::with_capacity(
83            elements_dtype.clone(),
84            nullability,
85            list_view.elements().len(),
86            len,
87        );
88
89        for i in 0..len {
90            builder
91                .append_scalar(&list_view.scalar_at(i))
92                .vortex_expect(
93                    "The `ListView` scalars are `ListScalar`, which the `ListBuilder` must accept",
94                )
95        }
96
97        builder.finish_into_list()
98    })
99}
100
101/// Recursively converts all [`ListViewArray`]s to [`ListArray`]s in a nested array structure.
102///
103/// The conversion happens bottom-up, processing children before parents.
104pub fn recursive_list_from_list_view(array: ArrayRef) -> ArrayRef {
105    if !array.dtype().is_nested() {
106        return array;
107    }
108
109    let canonical = array.to_canonical();
110
111    match canonical {
112        Canonical::List(listview) => {
113            let converted_elements = recursive_list_from_list_view(listview.elements().clone());
114
115            // Avoid cloning if elements didn't change.
116            let listview_with_converted_elements =
117                if !Arc::ptr_eq(&converted_elements, listview.elements()) {
118                    ListViewArray::try_new(
119                        converted_elements,
120                        listview.offsets().clone(),
121                        listview.sizes().clone(),
122                        listview.validity().clone(),
123                    )
124                    .vortex_expect("ListView reconstruction should not fail with valid components")
125                } else {
126                    listview
127                };
128
129            // Make the conversion to `ListArray`.
130            let list_array = list_from_list_view(listview_with_converted_elements);
131            list_array.into_array()
132        }
133        Canonical::FixedSizeList(fixed_size_list) => {
134            let converted_elements =
135                recursive_list_from_list_view(fixed_size_list.elements().clone());
136
137            // Avoid cloning if elements didn't change.
138            if !Arc::ptr_eq(&converted_elements, fixed_size_list.elements()) {
139                FixedSizeListArray::try_new(
140                    converted_elements,
141                    fixed_size_list.list_size(),
142                    fixed_size_list.validity().clone(),
143                    fixed_size_list.len(),
144                )
145                .vortex_expect(
146                    "FixedSizeListArray reconstruction should not fail with valid components",
147                )
148                .into_array()
149            } else {
150                fixed_size_list.into_array()
151            }
152        }
153        Canonical::Struct(struct_array) => {
154            let fields = struct_array.fields();
155            let mut converted_fields = Vec::with_capacity(fields.len());
156            let mut any_changed = false;
157
158            for field in fields.iter() {
159                let converted_field = recursive_list_from_list_view(field.clone());
160                // Avoid cloning if elements didn't change.
161                any_changed |= !Arc::ptr_eq(&converted_field, field);
162                converted_fields.push(converted_field);
163            }
164
165            if any_changed {
166                StructArray::try_new(
167                    struct_array.names().clone(),
168                    converted_fields,
169                    struct_array.len(),
170                    struct_array.validity().clone(),
171                )
172                .vortex_expect("StructArray reconstruction should not fail with valid components")
173                .into_array()
174            } else {
175                struct_array.into_array()
176            }
177        }
178        Canonical::Extension(ext_array) => {
179            let converted_storage = recursive_list_from_list_view(ext_array.storage().clone());
180
181            // Avoid cloning if elements didn't change.
182            if !Arc::ptr_eq(&converted_storage, ext_array.storage()) {
183                ExtensionArray::new(ext_array.ext_dtype().clone(), converted_storage).into_array()
184            } else {
185                ext_array.into_array()
186            }
187        }
188        _ => unreachable!(),
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use std::sync::Arc;
195
196    use vortex_buffer::buffer;
197    use vortex_dtype::FieldNames;
198
199    use super::super::tests::common::{
200        create_basic_listview, create_empty_lists_listview, create_nullable_listview,
201        create_overlapping_listview,
202    };
203    use super::recursive_list_from_list_view;
204    use crate::arrays::{
205        BoolArray, FixedSizeListArray, ListArray, ListViewArray, PrimitiveArray, StructArray,
206        list_from_list_view, list_view_from_list,
207    };
208    use crate::validity::Validity;
209    use crate::vtable::ValidityHelper;
210    use crate::{IntoArray, assert_arrays_eq};
211
212    #[test]
213    fn test_list_to_listview_basic() {
214        // Create a basic ListArray: [[0,1,2], [3,4], [5,6], [7,8,9]].
215        let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
216        let offsets = buffer![0u32, 3, 5, 7, 10].into_array();
217        let list_array =
218            ListArray::try_new(elements.clone(), offsets.clone(), Validity::NonNullable).unwrap();
219
220        let list_view = list_view_from_list(list_array.clone());
221
222        // Verify structure.
223        assert_eq!(list_view.len(), 4);
224        assert_arrays_eq!(elements, list_view.elements().clone());
225
226        // Verify offsets (should be same but without last element).
227        let expected_offsets = buffer![0u32, 3, 5, 7].into_array();
228        assert_arrays_eq!(expected_offsets, list_view.offsets().clone());
229
230        // Verify sizes.
231        let expected_sizes = buffer![3u32, 2, 2, 3].into_array();
232        assert_arrays_eq!(expected_sizes, list_view.sizes().clone());
233
234        // Verify data integrity.
235        assert_arrays_eq!(list_array, list_view);
236    }
237
238    #[test]
239    fn test_listview_to_list_zero_copy() {
240        let list_view = create_basic_listview();
241        let list_array = list_from_list_view(list_view.clone());
242
243        // Should have same elements.
244        assert_arrays_eq!(list_view.elements().clone(), list_array.elements().clone());
245
246        // ListArray offsets should have n+1 elements for n lists (add the final offset).
247        // Check that the first n offsets match.
248        let list_array_offsets_without_last = list_array.offsets().slice(0..list_view.len());
249        assert_arrays_eq!(list_view.offsets().clone(), list_array_offsets_without_last);
250
251        // Verify data integrity.
252        assert_arrays_eq!(list_view, list_array);
253    }
254
255    #[test]
256    fn test_empty_array_conversions() {
257        // Empty ListArray to ListViewArray.
258        let empty_elements = PrimitiveArray::from_iter::<[i32; 0]>([]).into_array();
259        let empty_offsets = buffer![0u32].into_array();
260        let empty_list =
261            ListArray::try_new(empty_elements.clone(), empty_offsets, Validity::NonNullable)
262                .unwrap();
263
264        // This conversion will create an empty ListViewArray.
265        // Note: list_view_from_list handles the empty case specially.
266        let empty_list_view = list_view_from_list(empty_list.clone());
267        assert_eq!(empty_list_view.len(), 0);
268
269        // Convert back.
270        let converted_back = list_from_list_view(empty_list_view);
271        assert_eq!(converted_back.len(), 0);
272        // For empty arrays, we can't use assert_arrays_eq directly since the offsets might differ.
273        // Just check that it's empty.
274        assert_eq!(empty_list.len(), converted_back.len());
275    }
276
277    #[test]
278    fn test_nullable_conversions() {
279        // Create nullable ListArray: [[10,20], null, [50]].
280        let elements = buffer![10i32, 20, 30, 40, 50].into_array();
281        let offsets = buffer![0u32, 2, 4, 5].into_array();
282        let validity = Validity::Array(BoolArray::from_iter(vec![true, false, true]).into_array());
283        let nullable_list =
284            ListArray::try_new(elements.clone(), offsets.clone(), validity.clone()).unwrap();
285
286        let nullable_list_view = list_view_from_list(nullable_list.clone());
287
288        // Verify validity is preserved.
289        assert_eq!(nullable_list_view.validity(), &validity);
290        assert_eq!(nullable_list_view.len(), 3);
291
292        // Round-trip conversion.
293        let converted_back = list_from_list_view(nullable_list_view);
294        assert_arrays_eq!(nullable_list, converted_back);
295    }
296
297    #[test]
298    fn test_non_zero_copy_listview_to_list() {
299        // Create ListViewArray with overlapping lists (not zero-copyable).
300        let list_view = create_overlapping_listview();
301        let list_array = list_from_list_view(list_view.clone());
302
303        // The data should still be correct even though it required a rebuild.
304        assert_arrays_eq!(list_view, list_array.clone());
305
306        // The resulting ListArray should have monotonic offsets.
307        for i in 0..list_array.len() {
308            let start = list_array.offset_at(i);
309            let end = list_array.offset_at(i + 1);
310            assert!(end >= start, "Offsets should be monotonic after conversion");
311        }
312    }
313
314    #[test]
315    fn test_empty_sublists() {
316        let empty_lists_view = create_empty_lists_listview();
317
318        // Convert to ListArray.
319        let list_array = list_from_list_view(empty_lists_view.clone());
320        assert_eq!(list_array.len(), 4);
321
322        // All sublists should be empty.
323        for i in 0..list_array.len() {
324            assert_eq!(list_array.list_elements_at(i).len(), 0);
325        }
326
327        // Round-trip.
328        let converted_back = list_view_from_list(list_array);
329        assert_arrays_eq!(empty_lists_view, converted_back);
330    }
331
332    #[test]
333    fn test_different_offset_types() {
334        // Test with i32 offsets.
335        let elements = buffer![1i32, 2, 3, 4, 5].into_array();
336        let i32_offsets = buffer![0i32, 2, 5].into_array();
337        let list_i32 =
338            ListArray::try_new(elements.clone(), i32_offsets.clone(), Validity::NonNullable)
339                .unwrap();
340
341        let list_view_i32 = list_view_from_list(list_i32.clone());
342        assert_eq!(list_view_i32.offsets().dtype(), i32_offsets.dtype());
343        assert_eq!(list_view_i32.sizes().dtype(), i32_offsets.dtype());
344
345        // Test with i64 offsets.
346        let i64_offsets = buffer![0i64, 2, 5].into_array();
347        let list_i64 =
348            ListArray::try_new(elements.clone(), i64_offsets.clone(), Validity::NonNullable)
349                .unwrap();
350
351        let list_view_i64 = list_view_from_list(list_i64.clone());
352        assert_eq!(list_view_i64.offsets().dtype(), i64_offsets.dtype());
353        assert_eq!(list_view_i64.sizes().dtype(), i64_offsets.dtype());
354
355        // Verify data integrity.
356        assert_arrays_eq!(list_i32, list_view_i32);
357        assert_arrays_eq!(list_i64, list_view_i64);
358    }
359
360    #[test]
361    fn test_round_trip_conversions() {
362        // Test 1: Basic round-trip.
363        let original = create_basic_listview();
364        let to_list = list_from_list_view(original.clone());
365        let back_to_view = list_view_from_list(to_list);
366        assert_arrays_eq!(original, back_to_view);
367
368        // Test 2: Nullable round-trip.
369        let nullable = create_nullable_listview();
370        let nullable_to_list = list_from_list_view(nullable.clone());
371        let nullable_back = list_view_from_list(nullable_to_list);
372        assert_arrays_eq!(nullable, nullable_back);
373
374        // Test 3: Non-zero-copyable round-trip.
375        let overlapping = create_overlapping_listview();
376
377        let overlapping_to_list = list_from_list_view(overlapping.clone());
378        let overlapping_back = list_view_from_list(overlapping_to_list);
379        assert_arrays_eq!(overlapping, overlapping_back);
380    }
381
382    #[test]
383    fn test_single_element_lists() {
384        // Create lists with single elements: [[100], [200], [300]].
385        let elements = buffer![100i32, 200, 300].into_array();
386        let offsets = buffer![0u32, 1, 2, 3].into_array();
387        let single_elem_list =
388            ListArray::try_new(elements.clone(), offsets, Validity::NonNullable).unwrap();
389
390        let list_view = list_view_from_list(single_elem_list.clone());
391        assert_eq!(list_view.len(), 3);
392
393        // Verify sizes are all 1.
394        let expected_sizes = buffer![1u32, 1, 1].into_array();
395        assert_arrays_eq!(expected_sizes, list_view.sizes().clone());
396
397        // Round-trip.
398        let converted_back = list_from_list_view(list_view.clone());
399        assert_arrays_eq!(single_elem_list, converted_back);
400    }
401
402    #[test]
403    fn test_mixed_empty_and_non_empty_lists() {
404        // Create: [[1,2], [], [3], [], [4,5,6]].
405        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
406        let offsets = buffer![0u32, 2, 2, 3, 3, 6].into_array();
407        let mixed_list =
408            ListArray::try_new(elements.clone(), offsets.clone(), Validity::NonNullable).unwrap();
409
410        let list_view = list_view_from_list(mixed_list.clone());
411        assert_eq!(list_view.len(), 5);
412
413        // Verify sizes.
414        let expected_sizes = buffer![2u32, 0, 1, 0, 3].into_array();
415        assert_arrays_eq!(expected_sizes, list_view.sizes().clone());
416
417        // Round-trip.
418        let converted_back = list_from_list_view(list_view.clone());
419        assert_arrays_eq!(mixed_list, converted_back);
420    }
421
422    #[test]
423    fn test_recursive_simple_listview() {
424        let list_view = create_basic_listview();
425        let result = recursive_list_from_list_view(list_view.clone().into_array());
426
427        assert_eq!(result.len(), list_view.len());
428        assert_arrays_eq!(list_view.into_array(), result);
429    }
430
431    #[test]
432    fn test_recursive_nested_listview() {
433        let inner_elements = buffer![1i32, 2, 3].into_array();
434        let inner_offsets = buffer![0u32, 2].into_array();
435        let inner_sizes = buffer![2u32, 1].into_array();
436        let inner_listview = ListViewArray::try_new(
437            inner_elements,
438            inner_offsets,
439            inner_sizes,
440            Validity::NonNullable,
441        )
442        .unwrap();
443
444        let outer_offsets = buffer![0u32, 1].into_array();
445        let outer_sizes = buffer![1u32, 1].into_array();
446        let outer_listview = ListViewArray::try_new(
447            inner_listview.into_array(),
448            outer_offsets,
449            outer_sizes,
450            Validity::NonNullable,
451        )
452        .unwrap();
453
454        let result = recursive_list_from_list_view(outer_listview.clone().into_array());
455
456        assert_eq!(result.len(), 2);
457        assert_arrays_eq!(outer_listview.into_array(), result);
458    }
459
460    #[test]
461    fn test_recursive_struct_with_listview_fields() {
462        let listview_field = create_basic_listview().into_array();
463        let primitive_field = buffer![10i32, 20, 30, 40].into_array();
464
465        let struct_array = StructArray::try_new(
466            FieldNames::from(["lists", "values"]),
467            vec![listview_field, primitive_field],
468            4,
469            Validity::NonNullable,
470        )
471        .unwrap();
472
473        let result = recursive_list_from_list_view(struct_array.clone().into_array());
474
475        assert_eq!(result.len(), 4);
476        assert_arrays_eq!(struct_array.into_array(), result);
477    }
478
479    #[test]
480    fn test_recursive_fixed_size_list_with_listview_elements() {
481        let lv1_elements = buffer![1i32, 2].into_array();
482        let lv1_offsets = buffer![0u32].into_array();
483        let lv1_sizes = buffer![2u32].into_array();
484        let lv1 =
485            ListViewArray::try_new(lv1_elements, lv1_offsets, lv1_sizes, Validity::NonNullable)
486                .unwrap();
487
488        let lv2_elements = buffer![3i32, 4].into_array();
489        let lv2_offsets = buffer![0u32].into_array();
490        let lv2_sizes = buffer![2u32].into_array();
491        let lv2 =
492            ListViewArray::try_new(lv2_elements, lv2_offsets, lv2_sizes, Validity::NonNullable)
493                .unwrap();
494
495        let dtype = lv1.dtype().clone();
496        let chunked_listviews =
497            crate::arrays::ChunkedArray::try_new(vec![lv1.into_array(), lv2.into_array()], dtype)
498                .unwrap();
499
500        let fixed_list =
501            FixedSizeListArray::new(chunked_listviews.into_array(), 1, Validity::NonNullable, 2);
502
503        let result = recursive_list_from_list_view(fixed_list.clone().into_array());
504
505        assert_eq!(result.len(), 2);
506        assert_arrays_eq!(fixed_list.into_array(), result);
507    }
508
509    #[test]
510    fn test_recursive_deep_nesting() {
511        let innermost_elements = buffer![1i32, 2, 3].into_array();
512        let innermost_offsets = buffer![0u32, 2].into_array();
513        let innermost_sizes = buffer![2u32, 1].into_array();
514        let innermost_listview = ListViewArray::try_new(
515            innermost_elements,
516            innermost_offsets,
517            innermost_sizes,
518            Validity::NonNullable,
519        )
520        .unwrap();
521
522        let struct_array = StructArray::try_new(
523            FieldNames::from(["inner_lists"]),
524            vec![innermost_listview.into_array()],
525            2,
526            Validity::NonNullable,
527        )
528        .unwrap();
529
530        let outer_offsets = buffer![0u32, 1].into_array();
531        let outer_sizes = buffer![1u32, 1].into_array();
532        let outer_listview = ListViewArray::try_new(
533            struct_array.into_array(),
534            outer_offsets,
535            outer_sizes,
536            Validity::NonNullable,
537        )
538        .unwrap();
539
540        let result = recursive_list_from_list_view(outer_listview.clone().into_array());
541
542        assert_eq!(result.len(), 2);
543        assert_arrays_eq!(outer_listview.into_array(), result);
544    }
545
546    #[test]
547    fn test_recursive_primitive_unchanged() {
548        let prim = buffer![1i32, 2, 3].into_array();
549        let prim_clone = prim.clone();
550        let result = recursive_list_from_list_view(prim);
551
552        assert!(Arc::ptr_eq(&result, &prim_clone));
553    }
554
555    #[test]
556    fn test_recursive_mixed_listview_and_list() {
557        let listview = create_basic_listview();
558        let list = list_from_list_view(listview.clone());
559
560        let struct_array = StructArray::try_new(
561            FieldNames::from(["listview_field", "list_field"]),
562            vec![listview.into_array(), list.into_array()],
563            4,
564            Validity::NonNullable,
565        )
566        .unwrap();
567
568        let result = recursive_list_from_list_view(struct_array.clone().into_array());
569
570        assert_eq!(result.len(), 4);
571        assert_arrays_eq!(struct_array.into_array(), result);
572    }
573}