vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13    ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22    match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27    match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33    fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34        if array.patches().num_patches() == 0 {
35            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36        }
37
38        match array.dtype() {
39            DType::Null => {
40                assert!(array.fill_scalar().is_null());
41                Ok(Canonical::Null(NullArray::new(array.len())))
42            }
43            DType::Bool(..) => {
44                let resolved_patches = array.resolved_patches()?;
45                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46            }
47            DType::Primitive(ptype, ..) => {
48                let resolved_patches = array.resolved_patches()?;
49                match_each_native_ptype!(ptype, |P| {
50                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51                })
52            }
53            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54                struct_fields,
55                array.fill_scalar().as_struct(),
56                array.dtype(),
57                array.patches(),
58                array.len(),
59            ),
60            DType::Decimal(decimal_dtype, nullability) => {
61                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62                let fill_value = array.fill_scalar().as_decimal();
63                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64                    canonicalize_sparse_decimal::<D>(
65                        *decimal_dtype,
66                        *nullability,
67                        fill_value,
68                        array.patches(),
69                        array.len(),
70                    )
71                })
72            }
73            dtype @ DType::Utf8(..) => {
74                let fill_value = array.fill_scalar().as_utf8().value();
75                let fill_value = fill_value.map(BufferString::into_inner);
76                canonicalize_varbin(array, dtype.clone(), fill_value)
77            }
78            dtype @ DType::Binary(..) => {
79                let fill_value = array.fill_scalar().as_binary().value();
80                canonicalize_varbin(array, dtype.clone(), fill_value)
81            }
82            DType::List(values_dtype, nullability) => {
83                let resolved_patches = array.resolved_patches()?;
84                canonicalize_sparse_lists(
85                    array,
86                    resolved_patches,
87                    values_dtype.clone(),
88                    *nullability,
89                )
90            }
91            DType::FixedSizeList(..) => {
92                unimplemented!("TODO(connor)[FixedSizeList]")
93            }
94            DType::Extension(_ext_dtype) => todo!(),
95        }
96    }
97}
98
99/// The elements of this [ListScalar] as an array or `None` if scalar is null.
100fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
101    let Some(elements) = scalar.elements() else {
102        return Ok(None);
103    };
104
105    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
106    for s in elements {
107        builder.append_scalar(&s)?;
108    }
109    Ok(Some(builder.finish()))
110}
111
112/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
113fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
114    let nullability = scalar.dtype().nullability();
115    let Some(elements) = list_scalar_to_elements_array(scalar)? else {
116        return Ok(None);
117    };
118
119    let validity = match nullability {
120        Nullability::NonNullable => Validity::NonNullable,
121        Nullability::Nullable => Validity::AllValid,
122    };
123
124    let n = elements.len();
125    ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
126        .map(|x| Some(x.into_array()))
127}
128
129#[allow(clippy::cognitive_complexity)]
130fn canonicalize_sparse_lists(
131    array: &SparseArray,
132    resolved_patches: Patches,
133    values_dtype: Arc<DType>,
134    nullability: Nullability,
135) -> VortexResult<Canonical> {
136    macro_rules! match_smallest_offset_type {
137        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
138            let n_elements = $n_elements;
139            if n_elements <= u8::MAX as usize {
140                type $offset_type = u8;
141                $body
142            } else if n_elements <= u16::MAX as usize {
143                type $offset_type = u16;
144                $body
145            } else if n_elements <= u32::MAX as usize {
146                type $offset_type = u32;
147                $body
148            } else {
149                assert!(u64::try_from(n_elements).is_ok());
150                type $offset_type = u64;
151                $body
152            }
153        }};
154    }
155
156    let indices = resolved_patches.indices().to_primitive()?;
157    let values = resolved_patches.values().to_list()?;
158    let fill_value = array.fill_scalar().as_list();
159
160    let n_filled = array.len() - resolved_patches.num_patches();
161    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
162
163    let validity = array
164        .validity_mask()
165        .map(|x| Validity::from_mask(x, nullability))?;
166
167    match_each_integer_ptype!(indices.ptype(), |I| {
168        match_smallest_offset_type!(total_canonical_values, |O| {
169            canonicalize_sparse_lists_inner::<I, O>(
170                indices.as_slice(),
171                values,
172                fill_value,
173                values_dtype,
174                array.len(),
175                total_canonical_values,
176                validity,
177            )
178        })
179    })
180}
181
182fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
183    indices: &[I],
184    values: ListArray,
185    fill_value: ListScalar,
186    values_dtype: Arc<DType>,
187    len: usize,
188    total_canonical_values: usize,
189    validity: Validity,
190) -> VortexResult<Canonical> {
191    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
192        let sparse_list_elements = values.elements().clone();
193        let sparse_list_offsets = values.offsets().to_primitive()?;
194        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
195            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
196            // If the values are a small slice of a large array, their offsets may not fit in
197            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
198            // therefore must use the offset type of values.offsets().
199            return canonicalize_sparse_lists_inner_with_null_fill_value(
200                indices,
201                sparse_list_elements,
202                sparse_list_offsets,
203                len,
204                validity,
205            );
206        });
207    };
208
209    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
210        values_dtype,
211        validity.nullability(),
212        total_canonical_values,
213        len + 1,
214    );
215    let mut next_index = 0_usize;
216    let enumerated_indices_usize = indices
217        .iter()
218        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
219        .enumerate();
220    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
221        for _ in next_index..next_patched_index {
222            builder.extend_from_array(&fill_value_array)?;
223        }
224        builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1))?;
225        next_index = next_patched_index + 1;
226    }
227
228    for _ in next_index..len {
229        builder.extend_from_array(&fill_value_array)?;
230    }
231
232    builder.finish().to_canonical()
233}
234
235fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
236    indices: &[I],
237    elements: ArrayRef,
238    offsets: &[O],
239    len: usize,
240    validity: Validity,
241) -> VortexResult<Canonical> {
242    assert!(indices.len() < len + 1);
243    let mut dense_offsets = BufferMut::with_capacity(len + 1);
244
245    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
246    dense_offsets.push(offsets[0]);
247    let mut dense_last_set_index = 0_usize;
248    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
249        let sparse_end_index = sparse_start_index + 1;
250        let dense_start_index = (*dense_start_index)
251            .to_usize()
252            .vortex_expect("index must fit in usize");
253        let dense_end_index = dense_start_index + 1;
254
255        for _ in (dense_last_set_index + 1)..dense_end_index {
256            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
257            dense_offsets.push(dense_offsets[dense_last_set_index]);
258        }
259        dense_offsets.push(offsets[sparse_end_index]);
260        dense_last_set_index = dense_end_index;
261    }
262    for _ in (dense_last_set_index + 1)..len {
263        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
264        dense_offsets.push(dense_offsets[dense_last_set_index]);
265    }
266    let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
267    Ok(Canonical::List(array))
268}
269
270fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
271    let (fill_bool, validity) = if fill_value.is_null() {
272        (false, Validity::AllInvalid)
273    } else {
274        (
275            fill_value.try_into()?,
276            if patches.dtype().nullability() == Nullability::NonNullable {
277                Validity::NonNullable
278            } else {
279                Validity::AllValid
280            },
281        )
282    };
283
284    let bools = BoolArray::new(
285        if fill_bool {
286            BooleanBuffer::new_set(patches.array_len())
287        } else {
288            BooleanBuffer::new_unset(patches.array_len())
289        },
290        validity,
291    );
292
293    bools.patch(patches).map(Canonical::Bool)
294}
295
296fn canonicalize_sparse_primitives<
297    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
298>(
299    patches: &Patches,
300    fill_value: &Scalar,
301) -> VortexResult<Canonical> {
302    let (primitive_fill, validity) = if fill_value.is_null() {
303        (T::default(), Validity::AllInvalid)
304    } else {
305        (
306            fill_value.try_into()?,
307            if patches.dtype().nullability() == Nullability::NonNullable {
308                Validity::NonNullable
309            } else {
310                Validity::AllValid
311            },
312        )
313    };
314
315    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
316
317    parray.patch(patches).map(Canonical::Primitive)
318}
319
320fn canonicalize_sparse_struct(
321    struct_fields: &StructFields,
322    fill_struct: StructScalar,
323    dtype: &DType,
324    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
325    unresolved_patches: &Patches,
326    len: usize,
327) -> VortexResult<Canonical> {
328    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
329        Some(fill_values) => (fill_values, Validity::AllValid),
330        None => (
331            struct_fields.fields().map(Scalar::default_value).collect(),
332            Validity::AllInvalid,
333        ),
334    };
335    let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
336    let columns_patch_values = patch_values_as_struct.fields();
337    let names = patch_values_as_struct.names();
338    let validity = if dtype.is_nullable() {
339        top_level_fill_validity.patch(
340            len,
341            unresolved_patches.offset(),
342            unresolved_patches.indices(),
343            &Validity::from_mask(
344                unresolved_patches.values().validity_mask()?,
345                Nullability::Nullable,
346            ),
347        )?
348    } else {
349        top_level_fill_validity
350            .into_non_nullable()
351            .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
352    };
353
354    columns_patch_values
355        .iter()
356        .cloned()
357        .zip_eq(fill_values.into_iter())
358        .map(|(patch_values, fill_value)| -> VortexResult<_> {
359            SparseArray::try_new_from_patches(
360                unresolved_patches
361                    .clone()
362                    .map_values(|_| Ok(patch_values))?,
363                fill_value,
364            )
365        })
366        .process_results(|sparse_columns| {
367            StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
368                .map(Canonical::Struct)
369        })?
370}
371
372fn canonicalize_sparse_decimal<D: NativeDecimalType>(
373    decimal_dtype: DecimalDType,
374    nullability: Nullability,
375    fill_value: DecimalScalar,
376    patches: &Patches,
377    len: usize,
378) -> VortexResult<Canonical> {
379    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
380    match fill_value.decimal_value() {
381        Some(fill_value) => {
382            let fill_value = fill_value
383                .cast::<D>()
384                .vortex_expect("unexpected value type");
385            for _ in 0..len {
386                builder.append_value(fill_value)
387            }
388        }
389        None => {
390            builder.append_nulls(len);
391        }
392    }
393    let filled_array = builder.finish_into_decimal();
394    let array = filled_array.patch(patches)?;
395    Ok(Canonical::Decimal(array))
396}
397
398fn canonicalize_varbin(
399    array: &SparseArray,
400    dtype: DType,
401    fill_value: Option<ByteBuffer>,
402) -> VortexResult<Canonical> {
403    let patches = array.resolved_patches()?;
404    let indices = patches.indices().to_primitive()?;
405    let values = patches.values().to_varbinview()?;
406    let validity = array
407        .validity_mask()
408        .map(|x| Validity::from_mask(x, dtype.nullability()))?;
409    let len = array.len();
410
411    match_each_integer_ptype!(indices.ptype(), |I| {
412        let indices = indices.buffer::<I>();
413        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
414    })
415}
416
417fn canonicalize_varbin_inner<I: NativePType>(
418    fill_value: Option<ByteBuffer>,
419    indices: Buffer<I>,
420    values: VarBinViewArray,
421    dtype: DType,
422    validity: Validity,
423    len: usize,
424) -> VortexResult<Canonical> {
425    assert_eq!(dtype.nullability(), validity.nullability());
426
427    let n_patch_buffers = values.buffers().len();
428    let mut buffers = values.buffers().to_vec();
429
430    let fill = if let Some(buffer) = &fill_value {
431        buffers.push(buffer.clone());
432        BinaryView::make_view(
433            buffer.as_ref(),
434            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
435            0,
436        )
437    } else {
438        // any <=12 character value will do
439        BinaryView::make_view(&[], 0, 0)
440    };
441
442    let mut views = buffer_mut![fill; len];
443    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
444        let patch_index_usize = <usize as NumCast>::from(patch_index)
445            .vortex_expect("var bin view indices must fit in usize");
446        views[patch_index_usize] = patch;
447    }
448
449    // SAFETY: views are constructed to maintain the invariants
450    let array = unsafe {
451        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
452    };
453
454    Ok(Canonical::VarBinView(array))
455}
456
457#[cfg(test)]
458mod test {
459    use rstest::rstest;
460    use vortex_array::arrays::{
461        BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
462        VarBinArray, VarBinViewArray,
463    };
464    use vortex_array::arrow::IntoArrowArray as _;
465    use vortex_array::validity::Validity;
466    use vortex_array::vtable::ValidityHelper;
467    use vortex_array::{IntoArray, ToCanonical};
468    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
469    use vortex_dtype::Nullability::{NonNullable, Nullable};
470    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
471    use vortex_mask::Mask;
472    use vortex_scalar::{DecimalValue, Scalar};
473
474    use crate::SparseArray;
475    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
476
477    #[rstest]
478    #[case(Some(true))]
479    #[case(Some(false))]
480    #[case(None)]
481    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
482        let indices = buffer![0u64, 1, 7].into_array();
483        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
484            .into_array();
485        let sparse_bools =
486            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
487        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
488
489        let flat_bools = sparse_bools.to_bool().unwrap();
490        let expected = bool_array_from_nullable_vec(
491            vec![
492                Some(true),
493                None,
494                fill_value,
495                fill_value,
496                fill_value,
497                fill_value,
498                fill_value,
499                Some(false),
500                fill_value,
501                fill_value,
502            ],
503            fill_value,
504        );
505
506        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
507        assert_eq!(flat_bools.validity(), expected.validity());
508
509        assert!(flat_bools.boolean_buffer().value(0));
510        assert!(flat_bools.validity().is_valid(0).unwrap());
511        assert_eq!(
512            flat_bools.boolean_buffer().value(1),
513            fill_value.unwrap_or_default()
514        );
515        assert!(!flat_bools.validity().is_valid(1).unwrap());
516        assert_eq!(
517            flat_bools.validity().is_valid(2).unwrap(),
518            fill_value.is_some()
519        );
520        assert!(!flat_bools.boolean_buffer().value(7));
521        assert!(flat_bools.validity().is_valid(7).unwrap());
522    }
523
524    fn bool_array_from_nullable_vec(
525        bools: Vec<Option<bool>>,
526        fill_value: Option<bool>,
527    ) -> BoolArray {
528        let mut buffer = BooleanBufferBuilder::new(bools.len());
529        let mut validity = BooleanBufferBuilder::new(bools.len());
530        for maybe_bool in bools {
531            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
532            validity.append(maybe_bool.is_some());
533        }
534        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
535    }
536
537    #[rstest]
538    #[case(Some(0i32))]
539    #[case(Some(-1i32))]
540    #[case(None)]
541    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
542        let indices = buffer![0u64, 1, 7].into_array();
543        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
544        let sparse_ints =
545            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
546        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
547
548        let flat_ints = sparse_ints.to_primitive().unwrap();
549        let expected = PrimitiveArray::from_option_iter([
550            Some(0i32),
551            None,
552            fill_value,
553            fill_value,
554            fill_value,
555            fill_value,
556            fill_value,
557            Some(1),
558            fill_value,
559            fill_value,
560        ]);
561
562        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
563        assert_eq!(flat_ints.validity(), expected.validity());
564
565        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
566        assert!(flat_ints.validity().is_valid(0).unwrap());
567        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
568        assert!(!flat_ints.validity().is_valid(1).unwrap());
569        assert_eq!(
570            flat_ints.as_slice::<i32>()[2],
571            fill_value.unwrap_or_default()
572        );
573        assert_eq!(
574            flat_ints.validity().is_valid(2).unwrap(),
575            fill_value.is_some()
576        );
577        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
578        assert!(flat_ints.validity().is_valid(7).unwrap());
579    }
580
581    #[test]
582    fn test_sparse_struct_valid_fill() {
583        let field_names = FieldNames::from_iter(["a", "b"]);
584        let field_types = vec![
585            DType::Primitive(PType::I32, Nullable),
586            DType::Primitive(PType::I32, Nullable),
587        ];
588        let struct_fields = StructFields::new(field_names, field_types);
589        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
590
591        let indices = buffer![0u64, 1, 7, 8].into_array();
592        let patch_values_a =
593            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
594        let patch_values_b =
595            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
596        let patch_values = StructArray::try_new_with_dtype(
597            vec![patch_values_a, patch_values_b],
598            struct_fields.clone(),
599            4,
600            Validity::Array(
601                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
602            ),
603        )
604        .unwrap()
605        .into_array();
606
607        let fill_scalar = Scalar::struct_(
608            struct_dtype,
609            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
610        );
611        let len = 10;
612        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
613
614        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
615            if i == 0 {
616                Some(10)
617            } else if i == 1 {
618                None
619            } else if i == 7 {
620                Some(20)
621            } else {
622                Some(-10)
623            }
624        }));
625        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
626            if i == 0 {
627                Some(1i32)
628            } else if i == 1 {
629                Some(2)
630            } else if i == 7 {
631                None
632            } else {
633                Some(-1)
634            }
635        }));
636
637        let expected = StructArray::try_new_with_dtype(
638            vec![expected_a.into_array(), expected_b.into_array()],
639            struct_fields,
640            len,
641            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
642            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
643        )
644        .unwrap()
645        .to_array()
646        .into_arrow_preferred()
647        .unwrap();
648
649        let actual = sparse_struct
650            .to_struct()
651            .unwrap()
652            .to_array()
653            .into_arrow_preferred()
654            .unwrap();
655
656        assert_eq!(expected.data_type(), actual.data_type());
657        assert_eq!(&expected, &actual);
658    }
659
660    #[test]
661    fn test_sparse_struct_invalid_fill() {
662        let field_names = FieldNames::from_iter(["a", "b"]);
663        let field_types = vec![
664            DType::Primitive(PType::I32, Nullable),
665            DType::Primitive(PType::I32, Nullable),
666        ];
667        let struct_fields = StructFields::new(field_names, field_types);
668        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
669
670        let indices = buffer![0u64, 1, 7, 8].into_array();
671        let patch_values_a =
672            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
673        let patch_values_b =
674            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
675        let patch_values = StructArray::try_new_with_dtype(
676            vec![patch_values_a, patch_values_b],
677            struct_fields.clone(),
678            4,
679            Validity::Array(
680                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
681            ),
682        )
683        .unwrap()
684        .into_array();
685
686        let fill_scalar = Scalar::null(struct_dtype);
687        let len = 10;
688        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
689
690        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
691            if i == 0 {
692                Some(10)
693            } else if i == 1 {
694                None
695            } else if i == 7 {
696                Some(20)
697            } else {
698                Some(-10)
699            }
700        }));
701        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
702            if i == 0 {
703                Some(1i32)
704            } else if i == 1 {
705                Some(2)
706            } else if i == 7 {
707                None
708            } else {
709                Some(-1)
710            }
711        }));
712
713        let expected = StructArray::try_new_with_dtype(
714            vec![expected_a.into_array(), expected_b.into_array()],
715            struct_fields,
716            len,
717            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
718            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
719        )
720        .unwrap()
721        .to_array()
722        .into_arrow_preferred()
723        .unwrap();
724
725        let actual = sparse_struct
726            .to_struct()
727            .unwrap()
728            .to_array()
729            .into_arrow_preferred()
730            .unwrap();
731
732        assert_eq!(expected.data_type(), actual.data_type());
733        assert_eq!(&expected, &actual);
734    }
735
736    #[test]
737    fn test_sparse_decimal() {
738        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
739        let decimal_dtype = DecimalDType::new(3, 2);
740        let patch_values = DecimalArray::new(
741            buffer![100i128, 200i128, 300i128, 4000i128],
742            decimal_dtype,
743            Validity::from_iter([true, true, true, false]),
744        )
745        .to_array();
746        let len = 10;
747        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
748        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
749
750        let expected = DecimalArray::new(
751            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
752            decimal_dtype,
753            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
754            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
755        )
756        .to_array()
757        .into_arrow_preferred()
758        .unwrap();
759
760        let actual = sparse_struct
761            .to_decimal()
762            .unwrap()
763            .to_array()
764            .into_arrow_preferred()
765            .unwrap();
766
767        assert_eq!(expected.data_type(), actual.data_type());
768        assert_eq!(&expected, &actual);
769    }
770
771    #[test]
772    fn test_sparse_utf8_varbinview_non_null_fill() {
773        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
774            Some("hello"),
775            Some("goodbye"),
776            Some("hello"),
777            None,
778            Some("bonjour"),
779            Some("你好"),
780            None,
781        ])
782        .into_array();
783
784        let array = SparseArray::try_new(
785            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
786            strings,
787            12,
788            Scalar::from(Some("123".to_owned())),
789        )
790        .unwrap();
791
792        let actual = array.to_varbinview().unwrap().into_array();
793        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
794            Some("hello"),
795            Some("123"),
796            Some("123"),
797            Some("goodbye"),
798            Some("hello"),
799            None,
800            Some("123"),
801            Some("bonjour"),
802            Some("123"),
803            Some("你好"),
804            None,
805            Some("123"),
806        ])
807        .into_array();
808
809        let actual = actual.into_arrow_preferred().unwrap();
810        let expected = expected.into_arrow_preferred().unwrap();
811
812        assert_eq!(actual.data_type(), expected.data_type());
813        assert_eq!(&actual, &expected);
814    }
815
816    #[test]
817    fn test_sparse_utf8_varbinview_null_fill() {
818        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
819            Some("hello"),
820            Some("goodbye"),
821            Some("hello"),
822            None,
823            Some("bonjour"),
824            Some("你好"),
825            None,
826        ])
827        .into_array();
828
829        let array = SparseArray::try_new(
830            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
831            strings,
832            12,
833            Scalar::null(DType::Utf8(Nullable)),
834        )
835        .unwrap();
836
837        let actual = array.to_varbinview().unwrap().into_array();
838        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
839            Some("hello"),
840            None,
841            None,
842            Some("goodbye"),
843            Some("hello"),
844            None,
845            None,
846            Some("bonjour"),
847            None,
848            Some("你好"),
849            None,
850            None,
851        ])
852        .into_array();
853
854        let actual = actual.into_arrow_preferred().unwrap();
855        let expected = expected.into_arrow_preferred().unwrap();
856
857        assert_eq!(actual.data_type(), expected.data_type());
858        assert_eq!(&actual, &expected);
859    }
860
861    #[test]
862    fn test_sparse_utf8_varbinview_non_nullable() {
863        let strings =
864            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
865                .into_array();
866
867        let array = SparseArray::try_new(
868            buffer![0u16, 3, 4, 5, 8].into_array(),
869            strings,
870            9,
871            Scalar::from("123".to_owned()),
872        )
873        .unwrap();
874
875        let actual = array.to_varbinview().unwrap().into_array();
876        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
877            Some("hello"),
878            Some("123"),
879            Some("123"),
880            Some("goodbye"),
881            Some("hello"),
882            Some("bonjour"),
883            Some("123"),
884            Some("123"),
885            Some("你好"),
886        ])
887        .into_array();
888
889        let actual = actual.into_arrow_preferred().unwrap();
890        let expected = expected.into_arrow_preferred().unwrap();
891
892        assert_eq!(actual.data_type(), expected.data_type());
893        assert_eq!(&actual, &expected);
894    }
895
896    #[test]
897    fn test_sparse_utf8_varbin_null_fill() {
898        let strings = <VarBinArray as FromIterator<_>>::from_iter([
899            Some("hello"),
900            Some("goodbye"),
901            Some("hello"),
902            None,
903            Some("bonjour"),
904            Some("你好"),
905            None,
906        ])
907        .into_array();
908
909        let array = SparseArray::try_new(
910            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
911            strings,
912            12,
913            Scalar::null(DType::Utf8(Nullable)),
914        )
915        .unwrap();
916
917        let actual = array.to_varbinview().unwrap().into_array();
918        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
919            Some("hello"),
920            None,
921            None,
922            Some("goodbye"),
923            Some("hello"),
924            None,
925            None,
926            Some("bonjour"),
927            None,
928            Some("你好"),
929            None,
930            None,
931        ])
932        .into_array();
933
934        let actual = actual.into_arrow_preferred().unwrap();
935        let expected = expected.into_arrow_preferred().unwrap();
936
937        assert_eq!(actual.data_type(), expected.data_type());
938        assert_eq!(&actual, &expected);
939    }
940
941    #[test]
942    fn test_sparse_binary_varbinview_non_null_fill() {
943        let binaries = VarBinViewArray::from_iter_nullable_bin([
944            Some(b"hello" as &[u8]),
945            Some(b"goodbye"),
946            Some(b"hello"),
947            None,
948            Some(b"\x00"),
949            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
950            None,
951        ])
952        .into_array();
953
954        let array = SparseArray::try_new(
955            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
956            binaries,
957            12,
958            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
959        )
960        .unwrap();
961
962        let actual = array.to_varbinview().unwrap().into_array();
963        let expected = VarBinViewArray::from_iter_nullable_bin([
964            Some(b"hello" as &[u8]),
965            Some(b"123"),
966            Some(b"123"),
967            Some(b"goodbye"),
968            Some(b"hello"),
969            None,
970            Some(b"123"),
971            Some(b"\x00"),
972            Some(b"123"),
973            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
974            None,
975            Some(b"123"),
976        ])
977        .into_array();
978
979        let actual = actual.into_arrow_preferred().unwrap();
980        let expected = expected.into_arrow_preferred().unwrap();
981
982        assert_eq!(actual.data_type(), expected.data_type());
983        assert_eq!(&actual, &expected);
984    }
985
986    #[test]
987    fn test_list_scalar_to_elements_array() {
988        let scalar = Scalar::from(Some(vec![1, 2, 3]));
989        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
990        assert_eq!(
991            array.unwrap().display_values().to_string(),
992            "[1i32, 2i32, 3i32]"
993        );
994
995        let scalar = Scalar::null_typed::<Vec<i32>>();
996        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
997        assert!(array.is_none());
998    }
999
1000    #[test]
1001    fn test_list_scalar_to_singleton_list_array() {
1002        let scalar = Scalar::from(Some(vec![1, 2, 3]));
1003        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1004        assert!(array.is_some());
1005        let array = array.unwrap();
1006        assert_eq!(array.scalar_at(0), scalar);
1007        assert_eq!(array.len(), 1);
1008
1009        let scalar = Scalar::null_typed::<Vec<i32>>();
1010        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1011        assert!(array.is_none());
1012    }
1013
1014    #[test]
1015    fn test_sparse_list_null_fill() {
1016        let elements = buffer![1i32, 2, 1, 2].into_array();
1017        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1018        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1019            .unwrap()
1020            .into_array();
1021
1022        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1023        let fill_value = Scalar::null(lists.dtype().clone());
1024        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1025            .unwrap()
1026            .into_array();
1027
1028        let actual = sparse.to_canonical().unwrap().into_array();
1029        let expected = ListArray::try_new(
1030            buffer![1i32, 2, 1, 2].into_array(),
1031            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1032            Validity::Array(
1033                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1034            ),
1035        )
1036        .unwrap()
1037        .into_array();
1038
1039        let actual = actual.into_arrow_preferred().unwrap();
1040        let expected = expected.into_arrow_preferred().unwrap();
1041
1042        assert_eq!(actual.data_type(), expected.data_type());
1043        assert_eq!(&actual, &expected);
1044    }
1045
1046    #[test]
1047    fn test_sparse_list_null_fill_sliced_sparse_values() {
1048        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1049        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1050        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1051            .unwrap()
1052            .into_array();
1053        let lists = lists.slice(2, 6);
1054
1055        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1056        let fill_value = Scalar::null(lists.dtype().clone());
1057        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1058            .unwrap()
1059            .into_array();
1060
1061        let actual = sparse.to_canonical().unwrap().into_array();
1062        let expected = ListArray::try_new(
1063            buffer![1i32, 2, 1, 2].into_array(),
1064            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1065            Validity::Array(
1066                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1067            ),
1068        )
1069        .unwrap()
1070        .into_array();
1071
1072        let actual = actual.into_arrow_preferred().unwrap();
1073        let expected = expected.into_arrow_preferred().unwrap();
1074
1075        assert_eq!(actual.data_type(), expected.data_type());
1076        assert_eq!(&actual, &expected);
1077    }
1078
1079    #[test]
1080    fn test_sparse_list_non_null_fill() {
1081        let elements = buffer![1i32, 2, 1, 2].into_array();
1082        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1083        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1084            .unwrap()
1085            .into_array();
1086
1087        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1088        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1089        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1090            .unwrap()
1091            .into_array();
1092
1093        let actual = sparse.to_canonical().unwrap().into_array();
1094        let expected = ListArray::try_new(
1095            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1096            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1097            Validity::AllValid,
1098        )
1099        .unwrap()
1100        .into_array();
1101
1102        let actual = actual.into_arrow_preferred().unwrap();
1103        let expected = expected.into_arrow_preferred().unwrap();
1104
1105        assert_eq!(actual.data_type(), expected.data_type());
1106        assert_eq!(&actual, &expected);
1107    }
1108
1109    #[test]
1110    fn test_sparse_binary_varbin_null_fill() {
1111        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1112            Some(b"hello" as &[u8]),
1113            Some(b"goodbye"),
1114            Some(b"hello"),
1115            None,
1116            Some(b"\x00"),
1117            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1118            None,
1119        ])
1120        .into_array();
1121
1122        let array = SparseArray::try_new(
1123            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1124            strings,
1125            12,
1126            Scalar::null(DType::Binary(Nullable)),
1127        )
1128        .unwrap();
1129
1130        let actual = array.to_varbinview().unwrap().into_array();
1131        let expected = VarBinViewArray::from_iter_nullable_bin([
1132            Some(b"hello" as &[u8]),
1133            None,
1134            None,
1135            Some(b"goodbye"),
1136            Some(b"hello"),
1137            None,
1138            None,
1139            Some(b"\x00"),
1140            None,
1141            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1142            None,
1143            None,
1144        ])
1145        .into_array();
1146        let actual = actual.into_arrow_preferred().unwrap();
1147        let expected = expected.into_arrow_preferred().unwrap();
1148
1149        assert_eq!(actual.data_type(), expected.data_type());
1150        assert_eq!(&actual, &expected);
1151    }
1152
1153    #[test]
1154    fn test_sparse_list_grows_offset_type() {
1155        let elements = buffer![1i32, 2, 1, 2].into_array();
1156        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1157        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1158            .unwrap()
1159            .into_array();
1160
1161        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1162        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1163        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1164            .unwrap()
1165            .into_array();
1166
1167        let actual = sparse.to_canonical().unwrap().into_array();
1168        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1169        expected_elements.extend(buffer![42i32; 252]);
1170        let expected = ListArray::try_new(
1171            expected_elements.freeze().into_array(),
1172            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1173            Validity::AllValid,
1174        )
1175        .unwrap()
1176        .into_array();
1177
1178        assert_eq!(
1179            actual.to_list().unwrap().offsets().dtype(),
1180            &DType::Primitive(PType::U16, NonNullable)
1181        );
1182
1183        let actual = actual.into_arrow_preferred().unwrap();
1184        let expected = expected.into_arrow_preferred().unwrap();
1185
1186        assert_eq!(actual.data_type(), expected.data_type());
1187        assert_eq!(&actual, &expected);
1188    }
1189}