vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13    ArrayBuilder as _, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22    match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, vortex_panic};
25use vortex_scalar::{
26    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27    match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33    fn canonicalize(array: &SparseArray) -> Canonical {
34        if array.patches().num_patches() == 0 {
35            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36        }
37
38        match array.dtype() {
39            DType::Null => {
40                assert!(array.fill_scalar().is_null());
41                Canonical::Null(NullArray::new(array.len()))
42            }
43            DType::Bool(..) => {
44                let resolved_patches = array.resolved_patches();
45                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46            }
47            DType::Primitive(ptype, ..) => {
48                let resolved_patches = array.resolved_patches();
49                match_each_native_ptype!(ptype, |P| {
50                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51                })
52            }
53            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54                struct_fields,
55                array.fill_scalar().as_struct(),
56                array.dtype(),
57                array.patches(),
58                array.len(),
59            ),
60            DType::Decimal(decimal_dtype, nullability) => {
61                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62                let fill_value = array.fill_scalar().as_decimal();
63                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64                    canonicalize_sparse_decimal::<D>(
65                        *decimal_dtype,
66                        *nullability,
67                        fill_value,
68                        array.patches(),
69                        array.len(),
70                    )
71                })
72            }
73            dtype @ DType::Utf8(..) => {
74                let fill_value = array.fill_scalar().as_utf8().value();
75                let fill_value = fill_value.map(BufferString::into_inner);
76                canonicalize_varbin(array, dtype.clone(), fill_value)
77            }
78            dtype @ DType::Binary(..) => {
79                let fill_value = array.fill_scalar().as_binary().value();
80                canonicalize_varbin(array, dtype.clone(), fill_value)
81            }
82            DType::List(values_dtype, nullability) => {
83                let resolved_patches = array.resolved_patches();
84                canonicalize_sparse_lists(
85                    array,
86                    resolved_patches,
87                    values_dtype.clone(),
88                    *nullability,
89                )
90            }
91            DType::FixedSizeList(..) => {
92                unimplemented!("TODO(connor)[FixedSizeList]")
93            }
94            DType::Extension(_ext_dtype) => todo!(),
95        }
96    }
97}
98
99/// The elements of this [ListScalar] as an array or `None` if scalar is null.
100fn list_scalar_to_elements_array(scalar: ListScalar) -> Option<ArrayRef> {
101    let elements = scalar.elements()?;
102
103    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
104    for s in elements {
105        builder
106            .append_scalar(&s)
107            .vortex_expect("Scalar dtype must match");
108    }
109    Some(builder.finish())
110}
111
112/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
113fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> Option<ArrayRef> {
114    let nullability = scalar.dtype().nullability();
115    let elements = list_scalar_to_elements_array(scalar)?;
116
117    let validity = match nullability {
118        Nullability::NonNullable => Validity::NonNullable,
119        Nullability::Nullable => Validity::AllValid,
120    };
121
122    let n = elements.len();
123    Some(
124        unsafe {
125            ListArray::new_unchecked(elements, buffer![0_u64, n as u64].into_array(), validity)
126        }
127        .into_array(),
128    )
129}
130
131#[allow(clippy::cognitive_complexity)]
132fn canonicalize_sparse_lists(
133    array: &SparseArray,
134    resolved_patches: Patches,
135    values_dtype: Arc<DType>,
136    nullability: Nullability,
137) -> Canonical {
138    macro_rules! match_smallest_offset_type {
139        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
140            let n_elements = $n_elements;
141            if n_elements <= u8::MAX as usize {
142                type $offset_type = u8;
143                $body
144            } else if n_elements <= u16::MAX as usize {
145                type $offset_type = u16;
146                $body
147            } else if n_elements <= u32::MAX as usize {
148                type $offset_type = u32;
149                $body
150            } else {
151                assert!(u64::try_from(n_elements).is_ok());
152                type $offset_type = u64;
153                $body
154            }
155        }};
156    }
157
158    let indices = resolved_patches.indices().to_primitive();
159    let values = resolved_patches.values().to_list();
160    let fill_value = array.fill_scalar().as_list();
161
162    let n_filled = array.len() - resolved_patches.num_patches();
163    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
164
165    let validity = Validity::from_mask(array.validity_mask(), nullability);
166
167    match_each_integer_ptype!(indices.ptype(), |I| {
168        match_smallest_offset_type!(total_canonical_values, |O| {
169            canonicalize_sparse_lists_inner::<I, O>(
170                indices.as_slice(),
171                values,
172                fill_value,
173                values_dtype,
174                array.len(),
175                total_canonical_values,
176                validity,
177            )
178        })
179    })
180}
181
182fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
183    indices: &[I],
184    values: ListArray,
185    fill_value: ListScalar,
186    values_dtype: Arc<DType>,
187    len: usize,
188    total_canonical_values: usize,
189    validity: Validity,
190) -> Canonical {
191    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value) else {
192        let sparse_list_elements = values.elements().clone();
193        let sparse_list_offsets = values.offsets().to_primitive();
194        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
195            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
196            // If the values are a small slice of a large array, their offsets may not fit in
197            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
198            // therefore must use the offset type of values.offsets().
199            return canonicalize_sparse_lists_inner_with_null_fill_value(
200                indices,
201                sparse_list_elements,
202                sparse_list_offsets,
203                len,
204                validity,
205            );
206        });
207    };
208
209    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
210        values_dtype,
211        validity.nullability(),
212        total_canonical_values,
213        len + 1,
214    );
215    let mut next_index = 0_usize;
216    let enumerated_indices_usize = indices
217        .iter()
218        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
219        .enumerate();
220    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
221        for _ in next_index..next_patched_index {
222            builder.extend_from_array(&fill_value_array);
223        }
224        builder.extend_from_array(&values.slice(patch_values_index..patch_values_index + 1));
225        next_index = next_patched_index + 1;
226    }
227
228    for _ in next_index..len {
229        builder.extend_from_array(&fill_value_array);
230    }
231
232    builder.finish_into_canonical()
233}
234
235fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
236    indices: &[I],
237    elements: ArrayRef,
238    offsets: &[O],
239    len: usize,
240    validity: Validity,
241) -> Canonical {
242    assert!(indices.len() < len + 1);
243    let mut dense_offsets = BufferMut::with_capacity(len + 1);
244
245    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
246    dense_offsets.push(offsets[0]);
247    let mut dense_last_set_index = 0_usize;
248    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
249        let sparse_end_index = sparse_start_index + 1;
250        let dense_start_index = (*dense_start_index)
251            .to_usize()
252            .vortex_expect("index must fit in usize");
253        let dense_end_index = dense_start_index + 1;
254
255        for _ in (dense_last_set_index + 1)..dense_end_index {
256            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
257            dense_offsets.push(dense_offsets[dense_last_set_index]);
258        }
259        dense_offsets.push(offsets[sparse_end_index]);
260        dense_last_set_index = dense_end_index;
261    }
262    for _ in (dense_last_set_index + 1)..len {
263        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
264        dense_offsets.push(dense_offsets[dense_last_set_index]);
265    }
266    Canonical::List(unsafe {
267        ListArray::new_unchecked(elements, dense_offsets.into_array(), validity)
268    })
269}
270
271fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
272    let (fill_bool, validity) = if fill_value.is_null() {
273        (false, Validity::AllInvalid)
274    } else {
275        (
276            fill_value
277                .try_into()
278                .vortex_expect("Fill value must convert to bool"),
279            if patches.dtype().nullability() == Nullability::NonNullable {
280                Validity::NonNullable
281            } else {
282                Validity::AllValid
283            },
284        )
285    };
286
287    let bools = BoolArray::new(
288        if fill_bool {
289            BooleanBuffer::new_set(patches.array_len())
290        } else {
291            BooleanBuffer::new_unset(patches.array_len())
292        },
293        validity,
294    );
295
296    Canonical::Bool(bools.patch(patches))
297}
298
299fn canonicalize_sparse_primitives<
300    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
301>(
302    patches: &Patches,
303    fill_value: &Scalar,
304) -> Canonical {
305    let (primitive_fill, validity) = if fill_value.is_null() {
306        (T::default(), Validity::AllInvalid)
307    } else {
308        (
309            fill_value
310                .try_into()
311                .vortex_expect("Fill value must convert to target T"),
312            if patches.dtype().nullability() == Nullability::NonNullable {
313                Validity::NonNullable
314            } else {
315                Validity::AllValid
316            },
317        )
318    };
319
320    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
321
322    Canonical::Primitive(parray.patch(patches))
323}
324
325fn canonicalize_sparse_struct(
326    struct_fields: &StructFields,
327    fill_struct: StructScalar,
328    dtype: &DType,
329    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
330    unresolved_patches: &Patches,
331    len: usize,
332) -> Canonical {
333    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
334        Some(fill_values) => (fill_values, Validity::AllValid),
335        None => (
336            struct_fields.fields().map(Scalar::default_value).collect(),
337            Validity::AllInvalid,
338        ),
339    };
340    let patch_values_as_struct = unresolved_patches.values().to_struct();
341    let columns_patch_values = patch_values_as_struct.fields();
342    let names = patch_values_as_struct.names();
343    let validity = if dtype.is_nullable() {
344        top_level_fill_validity.patch(
345            len,
346            unresolved_patches.offset(),
347            unresolved_patches.indices(),
348            &Validity::from_mask(
349                unresolved_patches.values().validity_mask(),
350                Nullability::Nullable,
351            ),
352        )
353    } else {
354        top_level_fill_validity
355            .into_non_nullable()
356            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
357    };
358
359    StructArray::try_from_iter_with_validity(
360        names.iter().zip_eq(
361            columns_patch_values
362                .iter()
363                .cloned()
364                .zip_eq(fill_values)
365                .map(|(patch_values, fill_value)| unsafe {
366                    SparseArray::new_unchecked(
367                        unresolved_patches
368                            .clone()
369                            .map_values(|_| Ok(patch_values))
370                            .vortex_expect("Replacing patch values"),
371                        fill_value,
372                    )
373                }),
374        ),
375        validity,
376    )
377    .map(Canonical::Struct)
378    .vortex_expect("Creating struct array")
379}
380
381fn canonicalize_sparse_decimal<D: NativeDecimalType>(
382    decimal_dtype: DecimalDType,
383    nullability: Nullability,
384    fill_value: DecimalScalar,
385    patches: &Patches,
386    len: usize,
387) -> Canonical {
388    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
389    match fill_value.decimal_value() {
390        Some(fill_value) => {
391            let fill_value = fill_value
392                .cast::<D>()
393                .vortex_expect("unexpected value type");
394            for _ in 0..len {
395                builder.append_value(fill_value)
396            }
397        }
398        None => {
399            builder.append_nulls(len);
400        }
401    }
402    let filled_array = builder.finish_into_decimal();
403    let array = filled_array.patch(patches);
404    Canonical::Decimal(array)
405}
406
407fn canonicalize_varbin(
408    array: &SparseArray,
409    dtype: DType,
410    fill_value: Option<ByteBuffer>,
411) -> Canonical {
412    let patches = array.resolved_patches();
413    let indices = patches.indices().to_primitive();
414    let values = patches.values().to_varbinview();
415    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
416    let len = array.len();
417
418    match_each_integer_ptype!(indices.ptype(), |I| {
419        let indices = indices.buffer::<I>();
420        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
421    })
422}
423
424fn canonicalize_varbin_inner<I: NativePType>(
425    fill_value: Option<ByteBuffer>,
426    indices: Buffer<I>,
427    values: VarBinViewArray,
428    dtype: DType,
429    validity: Validity,
430    len: usize,
431) -> Canonical {
432    assert_eq!(dtype.nullability(), validity.nullability());
433
434    let n_patch_buffers = values.buffers().len();
435    let mut buffers = values.buffers().to_vec();
436
437    let fill = if let Some(buffer) = &fill_value {
438        buffers.push(buffer.clone());
439        BinaryView::make_view(
440            buffer.as_ref(),
441            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
442            0,
443        )
444    } else {
445        // any <=12 character value will do
446        BinaryView::make_view(&[], 0, 0)
447    };
448
449    let mut views = buffer_mut![fill; len];
450    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
451        let patch_index_usize = <usize as NumCast>::from(patch_index)
452            .vortex_expect("var bin view indices must fit in usize");
453        views[patch_index_usize] = patch;
454    }
455
456    // SAFETY: views are constructed to maintain the invariants
457    let array = unsafe {
458        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
459    };
460
461    Canonical::VarBinView(array)
462}
463
464#[cfg(test)]
465mod test {
466    use rstest::rstest;
467    use vortex_array::arrays::{
468        BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
469        VarBinArray, VarBinViewArray,
470    };
471    use vortex_array::arrow::IntoArrowArray as _;
472    use vortex_array::validity::Validity;
473    use vortex_array::vtable::ValidityHelper;
474    use vortex_array::{IntoArray, ToCanonical};
475    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
476    use vortex_dtype::Nullability::{NonNullable, Nullable};
477    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
478    use vortex_mask::Mask;
479    use vortex_scalar::{DecimalValue, Scalar};
480
481    use crate::SparseArray;
482    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
483
484    #[rstest]
485    #[case(Some(true))]
486    #[case(Some(false))]
487    #[case(None)]
488    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
489        let indices = buffer![0u64, 1, 7].into_array();
490        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
491            .into_array();
492        let sparse_bools =
493            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
494        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
495
496        let flat_bools = sparse_bools.to_bool();
497        let expected = bool_array_from_nullable_vec(
498            vec![
499                Some(true),
500                None,
501                fill_value,
502                fill_value,
503                fill_value,
504                fill_value,
505                fill_value,
506                Some(false),
507                fill_value,
508                fill_value,
509            ],
510            fill_value,
511        );
512
513        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
514        assert_eq!(flat_bools.validity(), expected.validity());
515
516        assert!(flat_bools.boolean_buffer().value(0));
517        assert!(flat_bools.validity().is_valid(0));
518        assert_eq!(
519            flat_bools.boolean_buffer().value(1),
520            fill_value.unwrap_or_default()
521        );
522        assert!(!flat_bools.validity().is_valid(1));
523        assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
524        assert!(!flat_bools.boolean_buffer().value(7));
525        assert!(flat_bools.validity().is_valid(7));
526    }
527
528    fn bool_array_from_nullable_vec(
529        bools: Vec<Option<bool>>,
530        fill_value: Option<bool>,
531    ) -> BoolArray {
532        let mut buffer = BooleanBufferBuilder::new(bools.len());
533        let mut validity = BooleanBufferBuilder::new(bools.len());
534        for maybe_bool in bools {
535            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
536            validity.append(maybe_bool.is_some());
537        }
538        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
539    }
540
541    #[rstest]
542    #[case(Some(0i32))]
543    #[case(Some(-1i32))]
544    #[case(None)]
545    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
546        let indices = buffer![0u64, 1, 7].into_array();
547        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
548        let sparse_ints =
549            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
550        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
551
552        let flat_ints = sparse_ints.to_primitive();
553        let expected = PrimitiveArray::from_option_iter([
554            Some(0i32),
555            None,
556            fill_value,
557            fill_value,
558            fill_value,
559            fill_value,
560            fill_value,
561            Some(1),
562            fill_value,
563            fill_value,
564        ]);
565
566        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
567        assert_eq!(flat_ints.validity(), expected.validity());
568
569        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
570        assert!(flat_ints.validity().is_valid(0));
571        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
572        assert!(!flat_ints.validity().is_valid(1));
573        assert_eq!(
574            flat_ints.as_slice::<i32>()[2],
575            fill_value.unwrap_or_default()
576        );
577        assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
578        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
579        assert!(flat_ints.validity().is_valid(7));
580    }
581
582    #[test]
583    fn test_sparse_struct_valid_fill() {
584        let field_names = FieldNames::from_iter(["a", "b"]);
585        let field_types = vec![
586            DType::Primitive(PType::I32, Nullable),
587            DType::Primitive(PType::I32, Nullable),
588        ];
589        let struct_fields = StructFields::new(field_names, field_types);
590        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
591
592        let indices = buffer![0u64, 1, 7, 8].into_array();
593        let patch_values_a =
594            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
595        let patch_values_b =
596            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
597        let patch_values = StructArray::try_new_with_dtype(
598            vec![patch_values_a, patch_values_b],
599            struct_fields.clone(),
600            4,
601            Validity::Array(
602                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
603            ),
604        )
605        .unwrap()
606        .into_array();
607
608        let fill_scalar = Scalar::struct_(
609            struct_dtype,
610            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
611        );
612        let len = 10;
613        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
614
615        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
616            if i == 0 {
617                Some(10)
618            } else if i == 1 {
619                None
620            } else if i == 7 {
621                Some(20)
622            } else {
623                Some(-10)
624            }
625        }));
626        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
627            if i == 0 {
628                Some(1i32)
629            } else if i == 1 {
630                Some(2)
631            } else if i == 7 {
632                None
633            } else {
634                Some(-1)
635            }
636        }));
637
638        let expected = StructArray::try_new_with_dtype(
639            vec![expected_a.into_array(), expected_b.into_array()],
640            struct_fields,
641            len,
642            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
643            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
644        )
645        .unwrap()
646        .to_array()
647        .into_arrow_preferred()
648        .unwrap();
649
650        let actual = sparse_struct
651            .to_struct()
652            .to_array()
653            .into_arrow_preferred()
654            .unwrap();
655
656        assert_eq!(expected.data_type(), actual.data_type());
657        assert_eq!(&expected, &actual);
658    }
659
660    #[test]
661    fn test_sparse_struct_invalid_fill() {
662        let field_names = FieldNames::from_iter(["a", "b"]);
663        let field_types = vec![
664            DType::Primitive(PType::I32, Nullable),
665            DType::Primitive(PType::I32, Nullable),
666        ];
667        let struct_fields = StructFields::new(field_names, field_types);
668        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
669
670        let indices = buffer![0u64, 1, 7, 8].into_array();
671        let patch_values_a =
672            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
673        let patch_values_b =
674            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
675        let patch_values = StructArray::try_new_with_dtype(
676            vec![patch_values_a, patch_values_b],
677            struct_fields.clone(),
678            4,
679            Validity::Array(
680                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
681            ),
682        )
683        .unwrap()
684        .into_array();
685
686        let fill_scalar = Scalar::null(struct_dtype);
687        let len = 10;
688        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
689
690        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
691            if i == 0 {
692                Some(10)
693            } else if i == 1 {
694                None
695            } else if i == 7 {
696                Some(20)
697            } else {
698                Some(-10)
699            }
700        }));
701        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
702            if i == 0 {
703                Some(1i32)
704            } else if i == 1 {
705                Some(2)
706            } else if i == 7 {
707                None
708            } else {
709                Some(-1)
710            }
711        }));
712
713        let expected = StructArray::try_new_with_dtype(
714            vec![expected_a.into_array(), expected_b.into_array()],
715            struct_fields,
716            len,
717            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
718            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
719        )
720        .unwrap()
721        .to_array()
722        .into_arrow_preferred()
723        .unwrap();
724
725        let actual = sparse_struct
726            .to_struct()
727            .to_array()
728            .into_arrow_preferred()
729            .unwrap();
730
731        assert_eq!(expected.data_type(), actual.data_type());
732        assert_eq!(&expected, &actual);
733    }
734
735    #[test]
736    fn test_sparse_decimal() {
737        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
738        let decimal_dtype = DecimalDType::new(3, 2);
739        let patch_values = DecimalArray::new(
740            buffer![100i128, 200i128, 300i128, 4000i128],
741            decimal_dtype,
742            Validity::from_iter([true, true, true, false]),
743        )
744        .to_array();
745        let len = 10;
746        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
747        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
748
749        let expected = DecimalArray::new(
750            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
751            decimal_dtype,
752            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
753            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
754        )
755        .to_array()
756        .into_arrow_preferred()
757        .unwrap();
758
759        let actual = sparse_struct
760            .to_decimal()
761            .to_array()
762            .into_arrow_preferred()
763            .unwrap();
764
765        assert_eq!(expected.data_type(), actual.data_type());
766        assert_eq!(&expected, &actual);
767    }
768
769    #[test]
770    fn test_sparse_utf8_varbinview_non_null_fill() {
771        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
772            Some("hello"),
773            Some("goodbye"),
774            Some("hello"),
775            None,
776            Some("bonjour"),
777            Some("你好"),
778            None,
779        ])
780        .into_array();
781
782        let array = SparseArray::try_new(
783            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
784            strings,
785            12,
786            Scalar::from(Some("123".to_owned())),
787        )
788        .unwrap();
789
790        let actual = array.to_varbinview().into_array();
791        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
792            Some("hello"),
793            Some("123"),
794            Some("123"),
795            Some("goodbye"),
796            Some("hello"),
797            None,
798            Some("123"),
799            Some("bonjour"),
800            Some("123"),
801            Some("你好"),
802            None,
803            Some("123"),
804        ])
805        .into_array();
806
807        let actual = actual.into_arrow_preferred().unwrap();
808        let expected = expected.into_arrow_preferred().unwrap();
809
810        assert_eq!(actual.data_type(), expected.data_type());
811        assert_eq!(&actual, &expected);
812    }
813
814    #[test]
815    fn test_sparse_utf8_varbinview_null_fill() {
816        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
817            Some("hello"),
818            Some("goodbye"),
819            Some("hello"),
820            None,
821            Some("bonjour"),
822            Some("你好"),
823            None,
824        ])
825        .into_array();
826
827        let array = SparseArray::try_new(
828            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
829            strings,
830            12,
831            Scalar::null(DType::Utf8(Nullable)),
832        )
833        .unwrap();
834
835        let actual = array.to_varbinview().into_array();
836        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
837            Some("hello"),
838            None,
839            None,
840            Some("goodbye"),
841            Some("hello"),
842            None,
843            None,
844            Some("bonjour"),
845            None,
846            Some("你好"),
847            None,
848            None,
849        ])
850        .into_array();
851
852        let actual = actual.into_arrow_preferred().unwrap();
853        let expected = expected.into_arrow_preferred().unwrap();
854
855        assert_eq!(actual.data_type(), expected.data_type());
856        assert_eq!(&actual, &expected);
857    }
858
859    #[test]
860    fn test_sparse_utf8_varbinview_non_nullable() {
861        let strings =
862            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
863                .into_array();
864
865        let array = SparseArray::try_new(
866            buffer![0u16, 3, 4, 5, 8].into_array(),
867            strings,
868            9,
869            Scalar::from("123".to_owned()),
870        )
871        .unwrap();
872
873        let actual = array.to_varbinview().into_array();
874        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
875            Some("hello"),
876            Some("123"),
877            Some("123"),
878            Some("goodbye"),
879            Some("hello"),
880            Some("bonjour"),
881            Some("123"),
882            Some("123"),
883            Some("你好"),
884        ])
885        .into_array();
886
887        let actual = actual.into_arrow_preferred().unwrap();
888        let expected = expected.into_arrow_preferred().unwrap();
889
890        assert_eq!(actual.data_type(), expected.data_type());
891        assert_eq!(&actual, &expected);
892    }
893
894    #[test]
895    fn test_sparse_utf8_varbin_null_fill() {
896        let strings = <VarBinArray as FromIterator<_>>::from_iter([
897            Some("hello"),
898            Some("goodbye"),
899            Some("hello"),
900            None,
901            Some("bonjour"),
902            Some("你好"),
903            None,
904        ])
905        .into_array();
906
907        let array = SparseArray::try_new(
908            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
909            strings,
910            12,
911            Scalar::null(DType::Utf8(Nullable)),
912        )
913        .unwrap();
914
915        let actual = array.to_varbinview().into_array();
916        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
917            Some("hello"),
918            None,
919            None,
920            Some("goodbye"),
921            Some("hello"),
922            None,
923            None,
924            Some("bonjour"),
925            None,
926            Some("你好"),
927            None,
928            None,
929        ])
930        .into_array();
931
932        let actual = actual.into_arrow_preferred().unwrap();
933        let expected = expected.into_arrow_preferred().unwrap();
934
935        assert_eq!(actual.data_type(), expected.data_type());
936        assert_eq!(&actual, &expected);
937    }
938
939    #[test]
940    fn test_sparse_binary_varbinview_non_null_fill() {
941        let binaries = VarBinViewArray::from_iter_nullable_bin([
942            Some(b"hello" as &[u8]),
943            Some(b"goodbye"),
944            Some(b"hello"),
945            None,
946            Some(b"\x00"),
947            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
948            None,
949        ])
950        .into_array();
951
952        let array = SparseArray::try_new(
953            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
954            binaries,
955            12,
956            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
957        )
958        .unwrap();
959
960        let actual = array.to_varbinview().into_array();
961        let expected = VarBinViewArray::from_iter_nullable_bin([
962            Some(b"hello" as &[u8]),
963            Some(b"123"),
964            Some(b"123"),
965            Some(b"goodbye"),
966            Some(b"hello"),
967            None,
968            Some(b"123"),
969            Some(b"\x00"),
970            Some(b"123"),
971            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
972            None,
973            Some(b"123"),
974        ])
975        .into_array();
976
977        let actual = actual.into_arrow_preferred().unwrap();
978        let expected = expected.into_arrow_preferred().unwrap();
979
980        assert_eq!(actual.data_type(), expected.data_type());
981        assert_eq!(&actual, &expected);
982    }
983
984    #[test]
985    fn test_list_scalar_to_elements_array() {
986        let scalar = Scalar::from(Some(vec![1, 2, 3]));
987        let array = list_scalar_to_elements_array(scalar.as_list());
988        assert_eq!(
989            array.unwrap().display_values().to_string(),
990            "[1i32, 2i32, 3i32]"
991        );
992
993        let scalar = Scalar::null_typed::<Vec<i32>>();
994        let array = list_scalar_to_elements_array(scalar.as_list());
995        assert!(array.is_none());
996    }
997
998    #[test]
999    fn test_list_scalar_to_singleton_list_array() {
1000        let scalar = Scalar::from(Some(vec![1, 2, 3]));
1001        let array = list_scalar_to_singleton_list_array(scalar.as_list());
1002        assert!(array.is_some());
1003        let array = array.unwrap();
1004        assert_eq!(array.scalar_at(0), scalar);
1005        assert_eq!(array.len(), 1);
1006
1007        let scalar = Scalar::null_typed::<Vec<i32>>();
1008        let array = list_scalar_to_singleton_list_array(scalar.as_list());
1009        assert!(array.is_none());
1010    }
1011
1012    #[test]
1013    fn test_sparse_list_null_fill() {
1014        let elements = buffer![1i32, 2, 1, 2].into_array();
1015        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1016        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1017            .unwrap()
1018            .into_array();
1019
1020        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1021        let fill_value = Scalar::null(lists.dtype().clone());
1022        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1023            .unwrap()
1024            .into_array();
1025
1026        let actual = sparse.to_canonical().into_array();
1027        let expected = ListArray::try_new(
1028            buffer![1i32, 2, 1, 2].into_array(),
1029            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1030            Validity::Array(
1031                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1032            ),
1033        )
1034        .unwrap()
1035        .into_array();
1036
1037        let actual = actual.into_arrow_preferred().unwrap();
1038        let expected = expected.into_arrow_preferred().unwrap();
1039
1040        assert_eq!(actual.data_type(), expected.data_type());
1041        assert_eq!(&actual, &expected);
1042    }
1043
1044    #[test]
1045    fn test_sparse_list_null_fill_sliced_sparse_values() {
1046        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1047        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1048        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1049            .unwrap()
1050            .into_array();
1051        let lists = lists.slice(2..6);
1052
1053        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1054        let fill_value = Scalar::null(lists.dtype().clone());
1055        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1056            .unwrap()
1057            .into_array();
1058
1059        let actual = sparse.to_canonical().into_array();
1060        let expected = ListArray::try_new(
1061            buffer![1i32, 2, 1, 2].into_array(),
1062            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1063            Validity::Array(
1064                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1065            ),
1066        )
1067        .unwrap()
1068        .into_array();
1069
1070        let actual = actual.into_arrow_preferred().unwrap();
1071        let expected = expected.into_arrow_preferred().unwrap();
1072
1073        assert_eq!(actual.data_type(), expected.data_type());
1074        assert_eq!(&actual, &expected);
1075    }
1076
1077    #[test]
1078    fn test_sparse_list_non_null_fill() {
1079        let elements = buffer![1i32, 2, 1, 2].into_array();
1080        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1081        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1082            .unwrap()
1083            .into_array();
1084
1085        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1086        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1087        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1088            .unwrap()
1089            .into_array();
1090
1091        let actual = sparse.to_canonical().into_array();
1092        let expected = ListArray::try_new(
1093            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1094            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1095            Validity::AllValid,
1096        )
1097        .unwrap()
1098        .into_array();
1099
1100        let actual = actual.into_arrow_preferred().unwrap();
1101        let expected = expected.into_arrow_preferred().unwrap();
1102
1103        assert_eq!(actual.data_type(), expected.data_type());
1104        assert_eq!(&actual, &expected);
1105    }
1106
1107    #[test]
1108    fn test_sparse_binary_varbin_null_fill() {
1109        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1110            Some(b"hello" as &[u8]),
1111            Some(b"goodbye"),
1112            Some(b"hello"),
1113            None,
1114            Some(b"\x00"),
1115            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1116            None,
1117        ])
1118        .into_array();
1119
1120        let array = SparseArray::try_new(
1121            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1122            strings,
1123            12,
1124            Scalar::null(DType::Binary(Nullable)),
1125        )
1126        .unwrap();
1127
1128        let actual = array.to_varbinview().into_array();
1129        let expected = VarBinViewArray::from_iter_nullable_bin([
1130            Some(b"hello" as &[u8]),
1131            None,
1132            None,
1133            Some(b"goodbye"),
1134            Some(b"hello"),
1135            None,
1136            None,
1137            Some(b"\x00"),
1138            None,
1139            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1140            None,
1141            None,
1142        ])
1143        .into_array();
1144        let actual = actual.into_arrow_preferred().unwrap();
1145        let expected = expected.into_arrow_preferred().unwrap();
1146
1147        assert_eq!(actual.data_type(), expected.data_type());
1148        assert_eq!(&actual, &expected);
1149    }
1150
1151    #[test]
1152    fn test_sparse_list_grows_offset_type() {
1153        let elements = buffer![1i32, 2, 1, 2].into_array();
1154        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1155        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1156            .unwrap()
1157            .into_array();
1158
1159        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1160        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1161        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1162            .unwrap()
1163            .into_array();
1164
1165        let actual = sparse.to_canonical().into_array();
1166        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1167        expected_elements.extend(buffer![42i32; 252]);
1168        let expected = ListArray::try_new(
1169            expected_elements.freeze().into_array(),
1170            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1171            Validity::AllValid,
1172        )
1173        .unwrap()
1174        .into_array();
1175
1176        assert_eq!(
1177            actual.to_list().offsets().dtype(),
1178            &DType::Primitive(PType::U16, NonNullable)
1179        );
1180
1181        let actual = actual.into_arrow_preferred().unwrap();
1182        let expected = expected.into_arrow_preferred().unwrap();
1183
1184        assert_eq!(actual.data_type(), expected.data_type());
1185        assert_eq!(&actual, &expected);
1186    }
1187}