vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13    ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22    match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27    match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33    fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34        if array.patches().num_patches() == 0 {
35            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36        }
37
38        match array.dtype() {
39            DType::Null => {
40                assert!(array.fill_scalar().is_null());
41                Ok(Canonical::Null(NullArray::new(array.len())))
42            }
43            DType::Bool(..) => {
44                let resolved_patches = array.resolved_patches()?;
45                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46            }
47            DType::Primitive(ptype, ..) => {
48                let resolved_patches = array.resolved_patches()?;
49                match_each_native_ptype!(ptype, |P| {
50                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51                })
52            }
53            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54                struct_fields,
55                array.fill_scalar().as_struct(),
56                array.dtype(),
57                array.patches(),
58                array.len(),
59            ),
60            DType::Decimal(decimal_dtype, nullability) => {
61                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62                let fill_value = array.fill_scalar().as_decimal();
63                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64                    canonicalize_sparse_decimal::<D>(
65                        *decimal_dtype,
66                        *nullability,
67                        fill_value,
68                        array.patches(),
69                        array.len(),
70                    )
71                })
72            }
73            dtype @ DType::Utf8(..) => {
74                let fill_value = array.fill_scalar().as_utf8().value();
75                let fill_value = fill_value.map(BufferString::into_inner);
76                canonicalize_varbin(array, dtype.clone(), fill_value)
77            }
78            dtype @ DType::Binary(..) => {
79                let fill_value = array.fill_scalar().as_binary().value();
80                canonicalize_varbin(array, dtype.clone(), fill_value)
81            }
82            DType::List(values_dtype, nullability) => {
83                let resolved_patches = array.resolved_patches()?;
84                canonicalize_sparse_lists(
85                    array,
86                    resolved_patches,
87                    values_dtype.clone(),
88                    *nullability,
89                )
90            }
91            DType::FixedSizeList(..) => {
92                unimplemented!("TODO(connor)[FixedSizeList]")
93            }
94            DType::Extension(_ext_dtype) => todo!(),
95        }
96    }
97}
98
99/// The elements of this [ListScalar] as an array or `None` if scalar is null.
100fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
101    let Some(elements) = scalar.elements() else {
102        return Ok(None);
103    };
104
105    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
106    for s in elements {
107        builder.append_scalar(&s)?;
108    }
109    Ok(Some(builder.finish()))
110}
111
112/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
113fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
114    let nullability = scalar.dtype().nullability();
115    let Some(elements) = list_scalar_to_elements_array(scalar)? else {
116        return Ok(None);
117    };
118
119    let validity = match nullability {
120        Nullability::NonNullable => Validity::NonNullable,
121        Nullability::Nullable => Validity::AllValid,
122    };
123
124    let n = elements.len();
125    ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
126        .map(|x| Some(x.into_array()))
127}
128
129#[allow(clippy::cognitive_complexity)]
130fn canonicalize_sparse_lists(
131    array: &SparseArray,
132    resolved_patches: Patches,
133    values_dtype: Arc<DType>,
134    nullability: Nullability,
135) -> VortexResult<Canonical> {
136    macro_rules! match_smallest_offset_type {
137        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
138            let n_elements = $n_elements;
139            if n_elements <= u8::MAX as usize {
140                type $offset_type = u8;
141                $body
142            } else if n_elements <= u16::MAX as usize {
143                type $offset_type = u16;
144                $body
145            } else if n_elements <= u32::MAX as usize {
146                type $offset_type = u32;
147                $body
148            } else {
149                assert!(u64::try_from(n_elements).is_ok());
150                type $offset_type = u64;
151                $body
152            }
153        }};
154    }
155
156    let indices = resolved_patches.indices().to_primitive()?;
157    let values = resolved_patches.values().to_list()?;
158    let fill_value = array.fill_scalar().as_list();
159
160    let n_filled = array.len() - resolved_patches.num_patches();
161    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
162
163    let validity = Validity::from_mask(array.validity_mask(), nullability);
164
165    match_each_integer_ptype!(indices.ptype(), |I| {
166        match_smallest_offset_type!(total_canonical_values, |O| {
167            canonicalize_sparse_lists_inner::<I, O>(
168                indices.as_slice(),
169                values,
170                fill_value,
171                values_dtype,
172                array.len(),
173                total_canonical_values,
174                validity,
175            )
176        })
177    })
178}
179
180fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
181    indices: &[I],
182    values: ListArray,
183    fill_value: ListScalar,
184    values_dtype: Arc<DType>,
185    len: usize,
186    total_canonical_values: usize,
187    validity: Validity,
188) -> VortexResult<Canonical> {
189    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
190        let sparse_list_elements = values.elements().clone();
191        let sparse_list_offsets = values.offsets().to_primitive()?;
192        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
193            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
194            // If the values are a small slice of a large array, their offsets may not fit in
195            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
196            // therefore must use the offset type of values.offsets().
197            return canonicalize_sparse_lists_inner_with_null_fill_value(
198                indices,
199                sparse_list_elements,
200                sparse_list_offsets,
201                len,
202                validity,
203            );
204        });
205    };
206
207    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
208        values_dtype,
209        validity.nullability(),
210        total_canonical_values,
211        len + 1,
212    );
213    let mut next_index = 0_usize;
214    let enumerated_indices_usize = indices
215        .iter()
216        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
217        .enumerate();
218    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
219        for _ in next_index..next_patched_index {
220            builder.extend_from_array(&fill_value_array)?;
221        }
222        builder.extend_from_array(&values.slice(patch_values_index..patch_values_index + 1))?;
223        next_index = next_patched_index + 1;
224    }
225
226    for _ in next_index..len {
227        builder.extend_from_array(&fill_value_array)?;
228    }
229
230    builder.finish().to_canonical()
231}
232
233fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
234    indices: &[I],
235    elements: ArrayRef,
236    offsets: &[O],
237    len: usize,
238    validity: Validity,
239) -> VortexResult<Canonical> {
240    assert!(indices.len() < len + 1);
241    let mut dense_offsets = BufferMut::with_capacity(len + 1);
242
243    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
244    dense_offsets.push(offsets[0]);
245    let mut dense_last_set_index = 0_usize;
246    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
247        let sparse_end_index = sparse_start_index + 1;
248        let dense_start_index = (*dense_start_index)
249            .to_usize()
250            .vortex_expect("index must fit in usize");
251        let dense_end_index = dense_start_index + 1;
252
253        for _ in (dense_last_set_index + 1)..dense_end_index {
254            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
255            dense_offsets.push(dense_offsets[dense_last_set_index]);
256        }
257        dense_offsets.push(offsets[sparse_end_index]);
258        dense_last_set_index = dense_end_index;
259    }
260    for _ in (dense_last_set_index + 1)..len {
261        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
262        dense_offsets.push(dense_offsets[dense_last_set_index]);
263    }
264    let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
265    Ok(Canonical::List(array))
266}
267
268fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
269    let (fill_bool, validity) = if fill_value.is_null() {
270        (false, Validity::AllInvalid)
271    } else {
272        (
273            fill_value.try_into()?,
274            if patches.dtype().nullability() == Nullability::NonNullable {
275                Validity::NonNullable
276            } else {
277                Validity::AllValid
278            },
279        )
280    };
281
282    let bools = BoolArray::new(
283        if fill_bool {
284            BooleanBuffer::new_set(patches.array_len())
285        } else {
286            BooleanBuffer::new_unset(patches.array_len())
287        },
288        validity,
289    );
290
291    bools.patch(patches).map(Canonical::Bool)
292}
293
294fn canonicalize_sparse_primitives<
295    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
296>(
297    patches: &Patches,
298    fill_value: &Scalar,
299) -> VortexResult<Canonical> {
300    let (primitive_fill, validity) = if fill_value.is_null() {
301        (T::default(), Validity::AllInvalid)
302    } else {
303        (
304            fill_value.try_into()?,
305            if patches.dtype().nullability() == Nullability::NonNullable {
306                Validity::NonNullable
307            } else {
308                Validity::AllValid
309            },
310        )
311    };
312
313    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
314
315    parray.patch(patches).map(Canonical::Primitive)
316}
317
318fn canonicalize_sparse_struct(
319    struct_fields: &StructFields,
320    fill_struct: StructScalar,
321    dtype: &DType,
322    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
323    unresolved_patches: &Patches,
324    len: usize,
325) -> VortexResult<Canonical> {
326    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
327        Some(fill_values) => (fill_values, Validity::AllValid),
328        None => (
329            struct_fields.fields().map(Scalar::default_value).collect(),
330            Validity::AllInvalid,
331        ),
332    };
333    let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
334    let columns_patch_values = patch_values_as_struct.fields();
335    let names = patch_values_as_struct.names();
336    let validity = if dtype.is_nullable() {
337        top_level_fill_validity.patch(
338            len,
339            unresolved_patches.offset(),
340            unresolved_patches.indices(),
341            &Validity::from_mask(
342                unresolved_patches.values().validity_mask(),
343                Nullability::Nullable,
344            ),
345        )?
346    } else {
347        top_level_fill_validity
348            .into_non_nullable()
349            .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
350    };
351
352    columns_patch_values
353        .iter()
354        .cloned()
355        .zip_eq(fill_values.into_iter())
356        .map(|(patch_values, fill_value)| -> VortexResult<_> {
357            SparseArray::try_new_from_patches(
358                unresolved_patches
359                    .clone()
360                    .map_values(|_| Ok(patch_values))?,
361                fill_value,
362            )
363        })
364        .process_results(|sparse_columns| {
365            StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
366                .map(Canonical::Struct)
367        })?
368}
369
370fn canonicalize_sparse_decimal<D: NativeDecimalType>(
371    decimal_dtype: DecimalDType,
372    nullability: Nullability,
373    fill_value: DecimalScalar,
374    patches: &Patches,
375    len: usize,
376) -> VortexResult<Canonical> {
377    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
378    match fill_value.decimal_value() {
379        Some(fill_value) => {
380            let fill_value = fill_value
381                .cast::<D>()
382                .vortex_expect("unexpected value type");
383            for _ in 0..len {
384                builder.append_value(fill_value)
385            }
386        }
387        None => {
388            builder.append_nulls(len);
389        }
390    }
391    let filled_array = builder.finish_into_decimal();
392    let array = filled_array.patch(patches)?;
393    Ok(Canonical::Decimal(array))
394}
395
396fn canonicalize_varbin(
397    array: &SparseArray,
398    dtype: DType,
399    fill_value: Option<ByteBuffer>,
400) -> VortexResult<Canonical> {
401    let patches = array.resolved_patches()?;
402    let indices = patches.indices().to_primitive()?;
403    let values = patches.values().to_varbinview()?;
404    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
405    let len = array.len();
406
407    match_each_integer_ptype!(indices.ptype(), |I| {
408        let indices = indices.buffer::<I>();
409        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
410    })
411}
412
413fn canonicalize_varbin_inner<I: NativePType>(
414    fill_value: Option<ByteBuffer>,
415    indices: Buffer<I>,
416    values: VarBinViewArray,
417    dtype: DType,
418    validity: Validity,
419    len: usize,
420) -> VortexResult<Canonical> {
421    assert_eq!(dtype.nullability(), validity.nullability());
422
423    let n_patch_buffers = values.buffers().len();
424    let mut buffers = values.buffers().to_vec();
425
426    let fill = if let Some(buffer) = &fill_value {
427        buffers.push(buffer.clone());
428        BinaryView::make_view(
429            buffer.as_ref(),
430            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
431            0,
432        )
433    } else {
434        // any <=12 character value will do
435        BinaryView::make_view(&[], 0, 0)
436    };
437
438    let mut views = buffer_mut![fill; len];
439    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
440        let patch_index_usize = <usize as NumCast>::from(patch_index)
441            .vortex_expect("var bin view indices must fit in usize");
442        views[patch_index_usize] = patch;
443    }
444
445    // SAFETY: views are constructed to maintain the invariants
446    let array = unsafe {
447        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
448    };
449
450    Ok(Canonical::VarBinView(array))
451}
452
453#[cfg(test)]
454mod test {
455    use rstest::rstest;
456    use vortex_array::arrays::{
457        BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
458        VarBinArray, VarBinViewArray,
459    };
460    use vortex_array::arrow::IntoArrowArray as _;
461    use vortex_array::validity::Validity;
462    use vortex_array::vtable::ValidityHelper;
463    use vortex_array::{IntoArray, ToCanonical};
464    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
465    use vortex_dtype::Nullability::{NonNullable, Nullable};
466    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
467    use vortex_mask::Mask;
468    use vortex_scalar::{DecimalValue, Scalar};
469
470    use crate::SparseArray;
471    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
472
473    #[rstest]
474    #[case(Some(true))]
475    #[case(Some(false))]
476    #[case(None)]
477    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
478        let indices = buffer![0u64, 1, 7].into_array();
479        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
480            .into_array();
481        let sparse_bools =
482            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
483        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
484
485        let flat_bools = sparse_bools.to_bool().unwrap();
486        let expected = bool_array_from_nullable_vec(
487            vec![
488                Some(true),
489                None,
490                fill_value,
491                fill_value,
492                fill_value,
493                fill_value,
494                fill_value,
495                Some(false),
496                fill_value,
497                fill_value,
498            ],
499            fill_value,
500        );
501
502        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
503        assert_eq!(flat_bools.validity(), expected.validity());
504
505        assert!(flat_bools.boolean_buffer().value(0));
506        assert!(flat_bools.validity().is_valid(0));
507        assert_eq!(
508            flat_bools.boolean_buffer().value(1),
509            fill_value.unwrap_or_default()
510        );
511        assert!(!flat_bools.validity().is_valid(1));
512        assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
513        assert!(!flat_bools.boolean_buffer().value(7));
514        assert!(flat_bools.validity().is_valid(7));
515    }
516
517    fn bool_array_from_nullable_vec(
518        bools: Vec<Option<bool>>,
519        fill_value: Option<bool>,
520    ) -> BoolArray {
521        let mut buffer = BooleanBufferBuilder::new(bools.len());
522        let mut validity = BooleanBufferBuilder::new(bools.len());
523        for maybe_bool in bools {
524            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
525            validity.append(maybe_bool.is_some());
526        }
527        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
528    }
529
530    #[rstest]
531    #[case(Some(0i32))]
532    #[case(Some(-1i32))]
533    #[case(None)]
534    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
535        let indices = buffer![0u64, 1, 7].into_array();
536        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
537        let sparse_ints =
538            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
539        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
540
541        let flat_ints = sparse_ints.to_primitive().unwrap();
542        let expected = PrimitiveArray::from_option_iter([
543            Some(0i32),
544            None,
545            fill_value,
546            fill_value,
547            fill_value,
548            fill_value,
549            fill_value,
550            Some(1),
551            fill_value,
552            fill_value,
553        ]);
554
555        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
556        assert_eq!(flat_ints.validity(), expected.validity());
557
558        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
559        assert!(flat_ints.validity().is_valid(0));
560        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
561        assert!(!flat_ints.validity().is_valid(1));
562        assert_eq!(
563            flat_ints.as_slice::<i32>()[2],
564            fill_value.unwrap_or_default()
565        );
566        assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
567        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
568        assert!(flat_ints.validity().is_valid(7));
569    }
570
571    #[test]
572    fn test_sparse_struct_valid_fill() {
573        let field_names = FieldNames::from_iter(["a", "b"]);
574        let field_types = vec![
575            DType::Primitive(PType::I32, Nullable),
576            DType::Primitive(PType::I32, Nullable),
577        ];
578        let struct_fields = StructFields::new(field_names, field_types);
579        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
580
581        let indices = buffer![0u64, 1, 7, 8].into_array();
582        let patch_values_a =
583            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
584        let patch_values_b =
585            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
586        let patch_values = StructArray::try_new_with_dtype(
587            vec![patch_values_a, patch_values_b],
588            struct_fields.clone(),
589            4,
590            Validity::Array(
591                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
592            ),
593        )
594        .unwrap()
595        .into_array();
596
597        let fill_scalar = Scalar::struct_(
598            struct_dtype,
599            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
600        );
601        let len = 10;
602        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
603
604        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
605            if i == 0 {
606                Some(10)
607            } else if i == 1 {
608                None
609            } else if i == 7 {
610                Some(20)
611            } else {
612                Some(-10)
613            }
614        }));
615        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
616            if i == 0 {
617                Some(1i32)
618            } else if i == 1 {
619                Some(2)
620            } else if i == 7 {
621                None
622            } else {
623                Some(-1)
624            }
625        }));
626
627        let expected = StructArray::try_new_with_dtype(
628            vec![expected_a.into_array(), expected_b.into_array()],
629            struct_fields,
630            len,
631            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
632            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
633        )
634        .unwrap()
635        .to_array()
636        .into_arrow_preferred()
637        .unwrap();
638
639        let actual = sparse_struct
640            .to_struct()
641            .unwrap()
642            .to_array()
643            .into_arrow_preferred()
644            .unwrap();
645
646        assert_eq!(expected.data_type(), actual.data_type());
647        assert_eq!(&expected, &actual);
648    }
649
650    #[test]
651    fn test_sparse_struct_invalid_fill() {
652        let field_names = FieldNames::from_iter(["a", "b"]);
653        let field_types = vec![
654            DType::Primitive(PType::I32, Nullable),
655            DType::Primitive(PType::I32, Nullable),
656        ];
657        let struct_fields = StructFields::new(field_names, field_types);
658        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
659
660        let indices = buffer![0u64, 1, 7, 8].into_array();
661        let patch_values_a =
662            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
663        let patch_values_b =
664            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
665        let patch_values = StructArray::try_new_with_dtype(
666            vec![patch_values_a, patch_values_b],
667            struct_fields.clone(),
668            4,
669            Validity::Array(
670                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
671            ),
672        )
673        .unwrap()
674        .into_array();
675
676        let fill_scalar = Scalar::null(struct_dtype);
677        let len = 10;
678        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
679
680        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
681            if i == 0 {
682                Some(10)
683            } else if i == 1 {
684                None
685            } else if i == 7 {
686                Some(20)
687            } else {
688                Some(-10)
689            }
690        }));
691        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
692            if i == 0 {
693                Some(1i32)
694            } else if i == 1 {
695                Some(2)
696            } else if i == 7 {
697                None
698            } else {
699                Some(-1)
700            }
701        }));
702
703        let expected = StructArray::try_new_with_dtype(
704            vec![expected_a.into_array(), expected_b.into_array()],
705            struct_fields,
706            len,
707            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
708            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
709        )
710        .unwrap()
711        .to_array()
712        .into_arrow_preferred()
713        .unwrap();
714
715        let actual = sparse_struct
716            .to_struct()
717            .unwrap()
718            .to_array()
719            .into_arrow_preferred()
720            .unwrap();
721
722        assert_eq!(expected.data_type(), actual.data_type());
723        assert_eq!(&expected, &actual);
724    }
725
726    #[test]
727    fn test_sparse_decimal() {
728        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
729        let decimal_dtype = DecimalDType::new(3, 2);
730        let patch_values = DecimalArray::new(
731            buffer![100i128, 200i128, 300i128, 4000i128],
732            decimal_dtype,
733            Validity::from_iter([true, true, true, false]),
734        )
735        .to_array();
736        let len = 10;
737        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
738        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
739
740        let expected = DecimalArray::new(
741            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
742            decimal_dtype,
743            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
744            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
745        )
746        .to_array()
747        .into_arrow_preferred()
748        .unwrap();
749
750        let actual = sparse_struct
751            .to_decimal()
752            .unwrap()
753            .to_array()
754            .into_arrow_preferred()
755            .unwrap();
756
757        assert_eq!(expected.data_type(), actual.data_type());
758        assert_eq!(&expected, &actual);
759    }
760
761    #[test]
762    fn test_sparse_utf8_varbinview_non_null_fill() {
763        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
764            Some("hello"),
765            Some("goodbye"),
766            Some("hello"),
767            None,
768            Some("bonjour"),
769            Some("你好"),
770            None,
771        ])
772        .into_array();
773
774        let array = SparseArray::try_new(
775            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
776            strings,
777            12,
778            Scalar::from(Some("123".to_owned())),
779        )
780        .unwrap();
781
782        let actual = array.to_varbinview().unwrap().into_array();
783        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
784            Some("hello"),
785            Some("123"),
786            Some("123"),
787            Some("goodbye"),
788            Some("hello"),
789            None,
790            Some("123"),
791            Some("bonjour"),
792            Some("123"),
793            Some("你好"),
794            None,
795            Some("123"),
796        ])
797        .into_array();
798
799        let actual = actual.into_arrow_preferred().unwrap();
800        let expected = expected.into_arrow_preferred().unwrap();
801
802        assert_eq!(actual.data_type(), expected.data_type());
803        assert_eq!(&actual, &expected);
804    }
805
806    #[test]
807    fn test_sparse_utf8_varbinview_null_fill() {
808        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
809            Some("hello"),
810            Some("goodbye"),
811            Some("hello"),
812            None,
813            Some("bonjour"),
814            Some("你好"),
815            None,
816        ])
817        .into_array();
818
819        let array = SparseArray::try_new(
820            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
821            strings,
822            12,
823            Scalar::null(DType::Utf8(Nullable)),
824        )
825        .unwrap();
826
827        let actual = array.to_varbinview().unwrap().into_array();
828        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
829            Some("hello"),
830            None,
831            None,
832            Some("goodbye"),
833            Some("hello"),
834            None,
835            None,
836            Some("bonjour"),
837            None,
838            Some("你好"),
839            None,
840            None,
841        ])
842        .into_array();
843
844        let actual = actual.into_arrow_preferred().unwrap();
845        let expected = expected.into_arrow_preferred().unwrap();
846
847        assert_eq!(actual.data_type(), expected.data_type());
848        assert_eq!(&actual, &expected);
849    }
850
851    #[test]
852    fn test_sparse_utf8_varbinview_non_nullable() {
853        let strings =
854            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
855                .into_array();
856
857        let array = SparseArray::try_new(
858            buffer![0u16, 3, 4, 5, 8].into_array(),
859            strings,
860            9,
861            Scalar::from("123".to_owned()),
862        )
863        .unwrap();
864
865        let actual = array.to_varbinview().unwrap().into_array();
866        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
867            Some("hello"),
868            Some("123"),
869            Some("123"),
870            Some("goodbye"),
871            Some("hello"),
872            Some("bonjour"),
873            Some("123"),
874            Some("123"),
875            Some("你好"),
876        ])
877        .into_array();
878
879        let actual = actual.into_arrow_preferred().unwrap();
880        let expected = expected.into_arrow_preferred().unwrap();
881
882        assert_eq!(actual.data_type(), expected.data_type());
883        assert_eq!(&actual, &expected);
884    }
885
886    #[test]
887    fn test_sparse_utf8_varbin_null_fill() {
888        let strings = <VarBinArray as FromIterator<_>>::from_iter([
889            Some("hello"),
890            Some("goodbye"),
891            Some("hello"),
892            None,
893            Some("bonjour"),
894            Some("你好"),
895            None,
896        ])
897        .into_array();
898
899        let array = SparseArray::try_new(
900            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
901            strings,
902            12,
903            Scalar::null(DType::Utf8(Nullable)),
904        )
905        .unwrap();
906
907        let actual = array.to_varbinview().unwrap().into_array();
908        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
909            Some("hello"),
910            None,
911            None,
912            Some("goodbye"),
913            Some("hello"),
914            None,
915            None,
916            Some("bonjour"),
917            None,
918            Some("你好"),
919            None,
920            None,
921        ])
922        .into_array();
923
924        let actual = actual.into_arrow_preferred().unwrap();
925        let expected = expected.into_arrow_preferred().unwrap();
926
927        assert_eq!(actual.data_type(), expected.data_type());
928        assert_eq!(&actual, &expected);
929    }
930
931    #[test]
932    fn test_sparse_binary_varbinview_non_null_fill() {
933        let binaries = VarBinViewArray::from_iter_nullable_bin([
934            Some(b"hello" as &[u8]),
935            Some(b"goodbye"),
936            Some(b"hello"),
937            None,
938            Some(b"\x00"),
939            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
940            None,
941        ])
942        .into_array();
943
944        let array = SparseArray::try_new(
945            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
946            binaries,
947            12,
948            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
949        )
950        .unwrap();
951
952        let actual = array.to_varbinview().unwrap().into_array();
953        let expected = VarBinViewArray::from_iter_nullable_bin([
954            Some(b"hello" as &[u8]),
955            Some(b"123"),
956            Some(b"123"),
957            Some(b"goodbye"),
958            Some(b"hello"),
959            None,
960            Some(b"123"),
961            Some(b"\x00"),
962            Some(b"123"),
963            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
964            None,
965            Some(b"123"),
966        ])
967        .into_array();
968
969        let actual = actual.into_arrow_preferred().unwrap();
970        let expected = expected.into_arrow_preferred().unwrap();
971
972        assert_eq!(actual.data_type(), expected.data_type());
973        assert_eq!(&actual, &expected);
974    }
975
976    #[test]
977    fn test_list_scalar_to_elements_array() {
978        let scalar = Scalar::from(Some(vec![1, 2, 3]));
979        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
980        assert_eq!(
981            array.unwrap().display_values().to_string(),
982            "[1i32, 2i32, 3i32]"
983        );
984
985        let scalar = Scalar::null_typed::<Vec<i32>>();
986        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
987        assert!(array.is_none());
988    }
989
990    #[test]
991    fn test_list_scalar_to_singleton_list_array() {
992        let scalar = Scalar::from(Some(vec![1, 2, 3]));
993        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
994        assert!(array.is_some());
995        let array = array.unwrap();
996        assert_eq!(array.scalar_at(0), scalar);
997        assert_eq!(array.len(), 1);
998
999        let scalar = Scalar::null_typed::<Vec<i32>>();
1000        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1001        assert!(array.is_none());
1002    }
1003
1004    #[test]
1005    fn test_sparse_list_null_fill() {
1006        let elements = buffer![1i32, 2, 1, 2].into_array();
1007        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1008        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1009            .unwrap()
1010            .into_array();
1011
1012        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1013        let fill_value = Scalar::null(lists.dtype().clone());
1014        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1015            .unwrap()
1016            .into_array();
1017
1018        let actual = sparse.to_canonical().unwrap().into_array();
1019        let expected = ListArray::try_new(
1020            buffer![1i32, 2, 1, 2].into_array(),
1021            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1022            Validity::Array(
1023                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1024            ),
1025        )
1026        .unwrap()
1027        .into_array();
1028
1029        let actual = actual.into_arrow_preferred().unwrap();
1030        let expected = expected.into_arrow_preferred().unwrap();
1031
1032        assert_eq!(actual.data_type(), expected.data_type());
1033        assert_eq!(&actual, &expected);
1034    }
1035
1036    #[test]
1037    fn test_sparse_list_null_fill_sliced_sparse_values() {
1038        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1039        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1040        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1041            .unwrap()
1042            .into_array();
1043        let lists = lists.slice(2..6);
1044
1045        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1046        let fill_value = Scalar::null(lists.dtype().clone());
1047        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1048            .unwrap()
1049            .into_array();
1050
1051        let actual = sparse.to_canonical().unwrap().into_array();
1052        let expected = ListArray::try_new(
1053            buffer![1i32, 2, 1, 2].into_array(),
1054            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1055            Validity::Array(
1056                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1057            ),
1058        )
1059        .unwrap()
1060        .into_array();
1061
1062        let actual = actual.into_arrow_preferred().unwrap();
1063        let expected = expected.into_arrow_preferred().unwrap();
1064
1065        assert_eq!(actual.data_type(), expected.data_type());
1066        assert_eq!(&actual, &expected);
1067    }
1068
1069    #[test]
1070    fn test_sparse_list_non_null_fill() {
1071        let elements = buffer![1i32, 2, 1, 2].into_array();
1072        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1073        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1074            .unwrap()
1075            .into_array();
1076
1077        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1078        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1079        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1080            .unwrap()
1081            .into_array();
1082
1083        let actual = sparse.to_canonical().unwrap().into_array();
1084        let expected = ListArray::try_new(
1085            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1086            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1087            Validity::AllValid,
1088        )
1089        .unwrap()
1090        .into_array();
1091
1092        let actual = actual.into_arrow_preferred().unwrap();
1093        let expected = expected.into_arrow_preferred().unwrap();
1094
1095        assert_eq!(actual.data_type(), expected.data_type());
1096        assert_eq!(&actual, &expected);
1097    }
1098
1099    #[test]
1100    fn test_sparse_binary_varbin_null_fill() {
1101        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1102            Some(b"hello" as &[u8]),
1103            Some(b"goodbye"),
1104            Some(b"hello"),
1105            None,
1106            Some(b"\x00"),
1107            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1108            None,
1109        ])
1110        .into_array();
1111
1112        let array = SparseArray::try_new(
1113            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1114            strings,
1115            12,
1116            Scalar::null(DType::Binary(Nullable)),
1117        )
1118        .unwrap();
1119
1120        let actual = array.to_varbinview().unwrap().into_array();
1121        let expected = VarBinViewArray::from_iter_nullable_bin([
1122            Some(b"hello" as &[u8]),
1123            None,
1124            None,
1125            Some(b"goodbye"),
1126            Some(b"hello"),
1127            None,
1128            None,
1129            Some(b"\x00"),
1130            None,
1131            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1132            None,
1133            None,
1134        ])
1135        .into_array();
1136        let actual = actual.into_arrow_preferred().unwrap();
1137        let expected = expected.into_arrow_preferred().unwrap();
1138
1139        assert_eq!(actual.data_type(), expected.data_type());
1140        assert_eq!(&actual, &expected);
1141    }
1142
1143    #[test]
1144    fn test_sparse_list_grows_offset_type() {
1145        let elements = buffer![1i32, 2, 1, 2].into_array();
1146        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1147        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1148            .unwrap()
1149            .into_array();
1150
1151        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1152        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1153        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1154            .unwrap()
1155            .into_array();
1156
1157        let actual = sparse.to_canonical().unwrap().into_array();
1158        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1159        expected_elements.extend(buffer![42i32; 252]);
1160        let expected = ListArray::try_new(
1161            expected_elements.freeze().into_array(),
1162            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1163            Validity::AllValid,
1164        )
1165        .unwrap()
1166        .into_array();
1167
1168        assert_eq!(
1169            actual.to_list().unwrap().offsets().dtype(),
1170            &DType::Primitive(PType::U16, NonNullable)
1171        );
1172
1173        let actual = actual.into_arrow_preferred().unwrap();
1174        let expected = expected.into_arrow_preferred().unwrap();
1175
1176        assert_eq!(actual.data_type(), expected.data_type());
1177        assert_eq!(&actual, &expected);
1178    }
1179}