vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13    ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22    match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27    match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33    fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34        if array.patches().num_patches() == 0 {
35            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36        }
37
38        match array.dtype() {
39            DType::Null => {
40                assert!(array.fill_scalar().is_null());
41                Ok(Canonical::Null(NullArray::new(array.len())))
42            }
43            DType::Bool(..) => {
44                let resolved_patches = array.resolved_patches()?;
45                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46            }
47            DType::Primitive(ptype, ..) => {
48                let resolved_patches = array.resolved_patches()?;
49                match_each_native_ptype!(ptype, |P| {
50                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51                })
52            }
53            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54                struct_fields,
55                array.fill_scalar().as_struct(),
56                array.dtype(),
57                array.patches(),
58                array.len(),
59            ),
60            DType::Decimal(decimal_dtype, nullability) => {
61                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62                let fill_value = array.fill_scalar().as_decimal();
63                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64                    canonicalize_sparse_decimal::<D>(
65                        *decimal_dtype,
66                        *nullability,
67                        fill_value,
68                        array.patches(),
69                        array.len(),
70                    )
71                })
72            }
73            dtype @ DType::Utf8(..) => {
74                let fill_value = array.fill_scalar().as_utf8().value();
75                let fill_value = fill_value.map(BufferString::into_inner);
76                canonicalize_varbin(array, dtype.clone(), fill_value)
77            }
78            dtype @ DType::Binary(..) => {
79                let fill_value = array.fill_scalar().as_binary().value();
80                canonicalize_varbin(array, dtype.clone(), fill_value)
81            }
82            DType::List(values_dtype, nullability) => {
83                let resolved_patches = array.resolved_patches()?;
84                canonicalize_sparse_lists(
85                    array,
86                    resolved_patches,
87                    values_dtype.clone(),
88                    *nullability,
89                )
90            }
91            DType::Extension(_ext_dtype) => todo!(),
92        }
93    }
94}
95
96/// The elements of this [ListScalar] as an array or `None` if scalar is null.
97fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
98    let Some(elements) = scalar.elements() else {
99        return Ok(None);
100    };
101
102    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
103    for s in elements {
104        builder.append_scalar(&s)?;
105    }
106    Ok(Some(builder.finish()))
107}
108
109/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
110fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
111    let nullability = scalar.dtype().nullability();
112    let Some(elements) = list_scalar_to_elements_array(scalar)? else {
113        return Ok(None);
114    };
115
116    let validity = match nullability {
117        Nullability::NonNullable => Validity::NonNullable,
118        Nullability::Nullable => Validity::AllValid,
119    };
120
121    let n = elements.len();
122    ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
123        .map(|x| Some(x.into_array()))
124}
125
126#[allow(clippy::cognitive_complexity)]
127fn canonicalize_sparse_lists(
128    array: &SparseArray,
129    resolved_patches: Patches,
130    values_dtype: Arc<DType>,
131    nullability: Nullability,
132) -> VortexResult<Canonical> {
133    macro_rules! match_smallest_offset_type {
134        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
135            let n_elements = $n_elements;
136            if n_elements <= u8::MAX as usize {
137                type $offset_type = u8;
138                $body
139            } else if n_elements <= u16::MAX as usize {
140                type $offset_type = u16;
141                $body
142            } else if n_elements <= u32::MAX as usize {
143                type $offset_type = u32;
144                $body
145            } else {
146                assert!(u64::try_from(n_elements).is_ok());
147                type $offset_type = u64;
148                $body
149            }
150        }};
151    }
152
153    let indices = resolved_patches.indices().to_primitive()?;
154    let values = resolved_patches.values().to_list()?;
155    let fill_value = array.fill_scalar().as_list();
156
157    let n_filled = array.len() - resolved_patches.num_patches();
158    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
159
160    let validity = array
161        .validity_mask()
162        .map(|x| Validity::from_mask(x, nullability))?;
163
164    match_each_integer_ptype!(indices.ptype(), |I| {
165        match_smallest_offset_type!(total_canonical_values, |O| {
166            canonicalize_sparse_lists_inner::<I, O>(
167                indices.as_slice(),
168                values,
169                fill_value,
170                values_dtype,
171                array.len(),
172                total_canonical_values,
173                validity,
174            )
175        })
176    })
177}
178
179fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
180    indices: &[I],
181    values: ListArray,
182    fill_value: ListScalar,
183    values_dtype: Arc<DType>,
184    len: usize,
185    total_canonical_values: usize,
186    validity: Validity,
187) -> VortexResult<Canonical> {
188    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
189        let sparse_list_elements = values.elements().clone();
190        let sparse_list_offsets = values.offsets().to_primitive()?;
191        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
192            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
193            // If the values are a small slice of a large array, their offsets may not fit in
194            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
195            // therefore must use the offset type of values.offsets().
196            return canonicalize_sparse_lists_inner_with_null_fill_value(
197                indices,
198                sparse_list_elements,
199                sparse_list_offsets,
200                len,
201                validity,
202            );
203        });
204    };
205
206    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
207        values_dtype,
208        validity.nullability(),
209        total_canonical_values,
210        len + 1,
211    );
212    let mut next_index = 0_usize;
213    let enumerated_indices_usize = indices
214        .iter()
215        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
216        .enumerate();
217    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
218        for _ in next_index..next_patched_index {
219            builder.extend_from_array(&fill_value_array)?;
220        }
221        builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1)?)?;
222        next_index = next_patched_index + 1;
223    }
224
225    for _ in next_index..len {
226        builder.extend_from_array(&fill_value_array)?;
227    }
228
229    builder.finish().to_canonical()
230}
231
232fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
233    indices: &[I],
234    elements: ArrayRef,
235    offsets: &[O],
236    len: usize,
237    validity: Validity,
238) -> VortexResult<Canonical> {
239    assert!(indices.len() < len + 1);
240    let mut dense_offsets = BufferMut::with_capacity(len + 1);
241
242    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
243    dense_offsets.push(offsets[0]);
244    let mut dense_last_set_index = 0_usize;
245    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
246        let sparse_end_index = sparse_start_index + 1;
247        let dense_start_index = (*dense_start_index)
248            .to_usize()
249            .vortex_expect("index must fit in usize");
250        let dense_end_index = dense_start_index + 1;
251
252        for _ in (dense_last_set_index + 1)..dense_end_index {
253            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
254            dense_offsets.push(dense_offsets[dense_last_set_index]);
255        }
256        dense_offsets.push(offsets[sparse_end_index]);
257        dense_last_set_index = dense_end_index;
258    }
259    for _ in (dense_last_set_index + 1)..len {
260        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
261        dense_offsets.push(dense_offsets[dense_last_set_index]);
262    }
263    let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
264    Ok(Canonical::List(array))
265}
266
267fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
268    let (fill_bool, validity) = if fill_value.is_null() {
269        (false, Validity::AllInvalid)
270    } else {
271        (
272            fill_value.try_into()?,
273            if patches.dtype().nullability() == Nullability::NonNullable {
274                Validity::NonNullable
275            } else {
276                Validity::AllValid
277            },
278        )
279    };
280
281    let bools = BoolArray::new(
282        if fill_bool {
283            BooleanBuffer::new_set(patches.array_len())
284        } else {
285            BooleanBuffer::new_unset(patches.array_len())
286        },
287        validity,
288    );
289
290    bools.patch(patches).map(Canonical::Bool)
291}
292
293fn canonicalize_sparse_primitives<
294    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
295>(
296    patches: &Patches,
297    fill_value: &Scalar,
298) -> VortexResult<Canonical> {
299    let (primitive_fill, validity) = if fill_value.is_null() {
300        (T::default(), Validity::AllInvalid)
301    } else {
302        (
303            fill_value.try_into()?,
304            if patches.dtype().nullability() == Nullability::NonNullable {
305                Validity::NonNullable
306            } else {
307                Validity::AllValid
308            },
309        )
310    };
311
312    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
313
314    parray.patch(patches).map(Canonical::Primitive)
315}
316
317fn canonicalize_sparse_struct(
318    struct_fields: &StructFields,
319    fill_struct: StructScalar,
320    dtype: &DType,
321    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
322    unresolved_patches: &Patches,
323    len: usize,
324) -> VortexResult<Canonical> {
325    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
326        Some(fill_values) => (fill_values, Validity::AllValid),
327        None => (
328            struct_fields.fields().map(Scalar::default_value).collect(),
329            Validity::AllInvalid,
330        ),
331    };
332    let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
333    let columns_patch_values = patch_values_as_struct.fields();
334    let names = patch_values_as_struct.names();
335    let validity = if dtype.is_nullable() {
336        top_level_fill_validity.patch(
337            len,
338            unresolved_patches.offset(),
339            unresolved_patches.indices(),
340            &Validity::from_mask(
341                unresolved_patches.values().validity_mask()?,
342                Nullability::Nullable,
343            ),
344        )?
345    } else {
346        top_level_fill_validity
347            .into_non_nullable()
348            .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
349    };
350
351    columns_patch_values
352        .iter()
353        .cloned()
354        .zip_eq(fill_values.into_iter())
355        .map(|(patch_values, fill_value)| -> VortexResult<_> {
356            SparseArray::try_new_from_patches(
357                unresolved_patches
358                    .clone()
359                    .map_values(|_| Ok(patch_values))?,
360                fill_value,
361            )
362        })
363        .process_results(|sparse_columns| {
364            StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
365                .map(Canonical::Struct)
366        })?
367}
368
369fn canonicalize_sparse_decimal<D: NativeDecimalType>(
370    decimal_dtype: DecimalDType,
371    nullability: Nullability,
372    fill_value: DecimalScalar,
373    patches: &Patches,
374    len: usize,
375) -> VortexResult<Canonical> {
376    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
377    match fill_value.decimal_value() {
378        Some(fill_value) => {
379            let fill_value = fill_value
380                .cast::<D>()
381                .vortex_expect("unexpected value type");
382            for _ in 0..len {
383                builder.append_value(fill_value)
384            }
385        }
386        None => {
387            builder.append_nulls(len);
388        }
389    }
390    let filled_array = builder.finish_into_decimal();
391    let array = filled_array.patch(patches)?;
392    Ok(Canonical::Decimal(array))
393}
394
395fn canonicalize_varbin(
396    array: &SparseArray,
397    dtype: DType,
398    fill_value: Option<ByteBuffer>,
399) -> VortexResult<Canonical> {
400    let patches = array.resolved_patches()?;
401    let indices = patches.indices().to_primitive()?;
402    let values = patches.values().to_varbinview()?;
403    let validity = array
404        .validity_mask()
405        .map(|x| Validity::from_mask(x, dtype.nullability()))?;
406    let len = array.len();
407
408    match_each_integer_ptype!(indices.ptype(), |I| {
409        let indices = indices.buffer::<I>();
410        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
411    })
412}
413
414fn canonicalize_varbin_inner<I: NativePType>(
415    fill_value: Option<ByteBuffer>,
416    indices: Buffer<I>,
417    values: VarBinViewArray,
418    dtype: DType,
419    validity: Validity,
420    len: usize,
421) -> VortexResult<Canonical> {
422    assert_eq!(dtype.nullability(), validity.nullability());
423
424    let n_patch_buffers = values.buffers().len();
425    let mut buffers = values.buffers().to_vec();
426
427    let fill = if let Some(buffer) = &fill_value {
428        buffers.push(buffer.clone());
429        BinaryView::make_view(
430            buffer.as_ref(),
431            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
432            0,
433        )
434    } else {
435        // any <=12 character value will do
436        BinaryView::make_view(&[], 0, 0)
437    };
438
439    let mut views = buffer_mut![fill; len];
440    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
441        let patch_index_usize = <usize as NumCast>::from(patch_index)
442            .vortex_expect("var bin view indices must fit in usize");
443        views[patch_index_usize] = patch;
444    }
445
446    let array = VarBinViewArray::try_new(views.freeze(), Arc::from(buffers), dtype, validity)?;
447
448    Ok(Canonical::VarBinView(array))
449}
450
451#[cfg(test)]
452mod test {
453    use rstest::rstest;
454    use vortex_array::arrays::{
455        BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
456        VarBinArray, VarBinViewArray,
457    };
458    use vortex_array::arrow::IntoArrowArray as _;
459    use vortex_array::validity::Validity;
460    use vortex_array::vtable::ValidityHelper;
461    use vortex_array::{IntoArray, ToCanonical};
462    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
463    use vortex_dtype::Nullability::{NonNullable, Nullable};
464    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
465    use vortex_mask::Mask;
466    use vortex_scalar::{DecimalValue, Scalar};
467
468    use crate::SparseArray;
469    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
470
471    #[rstest]
472    #[case(Some(true))]
473    #[case(Some(false))]
474    #[case(None)]
475    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
476        let indices = buffer![0u64, 1, 7].into_array();
477        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
478            .into_array();
479        let sparse_bools =
480            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
481        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
482
483        let flat_bools = sparse_bools.to_bool().unwrap();
484        let expected = bool_array_from_nullable_vec(
485            vec![
486                Some(true),
487                None,
488                fill_value,
489                fill_value,
490                fill_value,
491                fill_value,
492                fill_value,
493                Some(false),
494                fill_value,
495                fill_value,
496            ],
497            fill_value,
498        );
499
500        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
501        assert_eq!(flat_bools.validity(), expected.validity());
502
503        assert!(flat_bools.boolean_buffer().value(0));
504        assert!(flat_bools.validity().is_valid(0).unwrap());
505        assert_eq!(
506            flat_bools.boolean_buffer().value(1),
507            fill_value.unwrap_or_default()
508        );
509        assert!(!flat_bools.validity().is_valid(1).unwrap());
510        assert_eq!(
511            flat_bools.validity().is_valid(2).unwrap(),
512            fill_value.is_some()
513        );
514        assert!(!flat_bools.boolean_buffer().value(7));
515        assert!(flat_bools.validity().is_valid(7).unwrap());
516    }
517
518    fn bool_array_from_nullable_vec(
519        bools: Vec<Option<bool>>,
520        fill_value: Option<bool>,
521    ) -> BoolArray {
522        let mut buffer = BooleanBufferBuilder::new(bools.len());
523        let mut validity = BooleanBufferBuilder::new(bools.len());
524        for maybe_bool in bools {
525            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
526            validity.append(maybe_bool.is_some());
527        }
528        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
529    }
530
531    #[rstest]
532    #[case(Some(0i32))]
533    #[case(Some(-1i32))]
534    #[case(None)]
535    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
536        let indices = buffer![0u64, 1, 7].into_array();
537        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
538        let sparse_ints =
539            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
540        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
541
542        let flat_ints = sparse_ints.to_primitive().unwrap();
543        let expected = PrimitiveArray::from_option_iter([
544            Some(0i32),
545            None,
546            fill_value,
547            fill_value,
548            fill_value,
549            fill_value,
550            fill_value,
551            Some(1),
552            fill_value,
553            fill_value,
554        ]);
555
556        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
557        assert_eq!(flat_ints.validity(), expected.validity());
558
559        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
560        assert!(flat_ints.validity().is_valid(0).unwrap());
561        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
562        assert!(!flat_ints.validity().is_valid(1).unwrap());
563        assert_eq!(
564            flat_ints.as_slice::<i32>()[2],
565            fill_value.unwrap_or_default()
566        );
567        assert_eq!(
568            flat_ints.validity().is_valid(2).unwrap(),
569            fill_value.is_some()
570        );
571        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
572        assert!(flat_ints.validity().is_valid(7).unwrap());
573    }
574
575    #[test]
576    fn test_sparse_struct_valid_fill() {
577        let field_names = FieldNames::from_iter(["a", "b"]);
578        let field_types = vec![
579            DType::Primitive(PType::I32, Nullable),
580            DType::Primitive(PType::I32, Nullable),
581        ];
582        let struct_fields = StructFields::new(field_names, field_types);
583        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
584
585        let indices = buffer![0u64, 1, 7, 8].into_array();
586        let patch_values_a =
587            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
588        let patch_values_b =
589            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
590        let patch_values = StructArray::try_new_with_dtype(
591            vec![patch_values_a, patch_values_b],
592            struct_fields.clone(),
593            4,
594            Validity::Array(
595                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
596            ),
597        )
598        .unwrap()
599        .into_array();
600
601        let fill_scalar = Scalar::struct_(
602            struct_dtype,
603            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
604        );
605        let len = 10;
606        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
607
608        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
609            if i == 0 {
610                Some(10)
611            } else if i == 1 {
612                None
613            } else if i == 7 {
614                Some(20)
615            } else {
616                Some(-10)
617            }
618        }));
619        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
620            if i == 0 {
621                Some(1i32)
622            } else if i == 1 {
623                Some(2)
624            } else if i == 7 {
625                None
626            } else {
627                Some(-1)
628            }
629        }));
630
631        let expected = StructArray::try_new_with_dtype(
632            vec![expected_a.into_array(), expected_b.into_array()],
633            struct_fields,
634            len,
635            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
636            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
637        )
638        .unwrap()
639        .to_array()
640        .into_arrow_preferred()
641        .unwrap();
642
643        let actual = sparse_struct
644            .to_struct()
645            .unwrap()
646            .to_array()
647            .into_arrow_preferred()
648            .unwrap();
649
650        assert_eq!(expected.data_type(), actual.data_type());
651        assert_eq!(&expected, &actual);
652    }
653
654    #[test]
655    fn test_sparse_struct_invalid_fill() {
656        let field_names = FieldNames::from_iter(["a", "b"]);
657        let field_types = vec![
658            DType::Primitive(PType::I32, Nullable),
659            DType::Primitive(PType::I32, Nullable),
660        ];
661        let struct_fields = StructFields::new(field_names, field_types);
662        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
663
664        let indices = buffer![0u64, 1, 7, 8].into_array();
665        let patch_values_a =
666            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
667        let patch_values_b =
668            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
669        let patch_values = StructArray::try_new_with_dtype(
670            vec![patch_values_a, patch_values_b],
671            struct_fields.clone(),
672            4,
673            Validity::Array(
674                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
675            ),
676        )
677        .unwrap()
678        .into_array();
679
680        let fill_scalar = Scalar::null(struct_dtype);
681        let len = 10;
682        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
683
684        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
685            if i == 0 {
686                Some(10)
687            } else if i == 1 {
688                None
689            } else if i == 7 {
690                Some(20)
691            } else {
692                Some(-10)
693            }
694        }));
695        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
696            if i == 0 {
697                Some(1i32)
698            } else if i == 1 {
699                Some(2)
700            } else if i == 7 {
701                None
702            } else {
703                Some(-1)
704            }
705        }));
706
707        let expected = StructArray::try_new_with_dtype(
708            vec![expected_a.into_array(), expected_b.into_array()],
709            struct_fields,
710            len,
711            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
712            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
713        )
714        .unwrap()
715        .to_array()
716        .into_arrow_preferred()
717        .unwrap();
718
719        let actual = sparse_struct
720            .to_struct()
721            .unwrap()
722            .to_array()
723            .into_arrow_preferred()
724            .unwrap();
725
726        assert_eq!(expected.data_type(), actual.data_type());
727        assert_eq!(&expected, &actual);
728    }
729
730    #[test]
731    fn test_sparse_decimal() {
732        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
733        let decimal_dtype = DecimalDType::new(3, 2);
734        let patch_values = DecimalArray::new(
735            buffer![100i128, 200i128, 300i128, 4000i128],
736            decimal_dtype,
737            Validity::from_iter([true, true, true, false]),
738        )
739        .to_array();
740        let len = 10;
741        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
742        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
743
744        let expected = DecimalArray::new(
745            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
746            decimal_dtype,
747            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
748            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
749        )
750        .to_array()
751        .into_arrow_preferred()
752        .unwrap();
753
754        let actual = sparse_struct
755            .to_decimal()
756            .unwrap()
757            .to_array()
758            .into_arrow_preferred()
759            .unwrap();
760
761        assert_eq!(expected.data_type(), actual.data_type());
762        assert_eq!(&expected, &actual);
763    }
764
765    #[test]
766    fn test_sparse_utf8_varbinview_non_null_fill() {
767        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
768            Some("hello"),
769            Some("goodbye"),
770            Some("hello"),
771            None,
772            Some("bonjour"),
773            Some("你好"),
774            None,
775        ])
776        .into_array();
777
778        let array = SparseArray::try_new(
779            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
780            strings,
781            12,
782            Scalar::from(Some("123".to_owned())),
783        )
784        .unwrap();
785
786        let actual = array.to_varbinview().unwrap().into_array();
787        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
788            Some("hello"),
789            Some("123"),
790            Some("123"),
791            Some("goodbye"),
792            Some("hello"),
793            None,
794            Some("123"),
795            Some("bonjour"),
796            Some("123"),
797            Some("你好"),
798            None,
799            Some("123"),
800        ])
801        .into_array();
802
803        let actual = actual.into_arrow_preferred().unwrap();
804        let expected = expected.into_arrow_preferred().unwrap();
805
806        assert_eq!(actual.data_type(), expected.data_type());
807        assert_eq!(&actual, &expected);
808    }
809
810    #[test]
811    fn test_sparse_utf8_varbinview_null_fill() {
812        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
813            Some("hello"),
814            Some("goodbye"),
815            Some("hello"),
816            None,
817            Some("bonjour"),
818            Some("你好"),
819            None,
820        ])
821        .into_array();
822
823        let array = SparseArray::try_new(
824            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
825            strings,
826            12,
827            Scalar::null(DType::Utf8(Nullable)),
828        )
829        .unwrap();
830
831        let actual = array.to_varbinview().unwrap().into_array();
832        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
833            Some("hello"),
834            None,
835            None,
836            Some("goodbye"),
837            Some("hello"),
838            None,
839            None,
840            Some("bonjour"),
841            None,
842            Some("你好"),
843            None,
844            None,
845        ])
846        .into_array();
847
848        let actual = actual.into_arrow_preferred().unwrap();
849        let expected = expected.into_arrow_preferred().unwrap();
850
851        assert_eq!(actual.data_type(), expected.data_type());
852        assert_eq!(&actual, &expected);
853    }
854
855    #[test]
856    fn test_sparse_utf8_varbinview_non_nullable() {
857        let strings =
858            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
859                .into_array();
860
861        let array = SparseArray::try_new(
862            buffer![0u16, 3, 4, 5, 8].into_array(),
863            strings,
864            9,
865            Scalar::from("123".to_owned()),
866        )
867        .unwrap();
868
869        let actual = array.to_varbinview().unwrap().into_array();
870        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
871            Some("hello"),
872            Some("123"),
873            Some("123"),
874            Some("goodbye"),
875            Some("hello"),
876            Some("bonjour"),
877            Some("123"),
878            Some("123"),
879            Some("你好"),
880        ])
881        .into_array();
882
883        let actual = actual.into_arrow_preferred().unwrap();
884        let expected = expected.into_arrow_preferred().unwrap();
885
886        assert_eq!(actual.data_type(), expected.data_type());
887        assert_eq!(&actual, &expected);
888    }
889
890    #[test]
891    fn test_sparse_utf8_varbin_null_fill() {
892        let strings = <VarBinArray as FromIterator<_>>::from_iter([
893            Some("hello"),
894            Some("goodbye"),
895            Some("hello"),
896            None,
897            Some("bonjour"),
898            Some("你好"),
899            None,
900        ])
901        .into_array();
902
903        let array = SparseArray::try_new(
904            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
905            strings,
906            12,
907            Scalar::null(DType::Utf8(Nullable)),
908        )
909        .unwrap();
910
911        let actual = array.to_varbinview().unwrap().into_array();
912        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
913            Some("hello"),
914            None,
915            None,
916            Some("goodbye"),
917            Some("hello"),
918            None,
919            None,
920            Some("bonjour"),
921            None,
922            Some("你好"),
923            None,
924            None,
925        ])
926        .into_array();
927
928        let actual = actual.into_arrow_preferred().unwrap();
929        let expected = expected.into_arrow_preferred().unwrap();
930
931        assert_eq!(actual.data_type(), expected.data_type());
932        assert_eq!(&actual, &expected);
933    }
934
935    #[test]
936    fn test_sparse_binary_varbinview_non_null_fill() {
937        let binaries = VarBinViewArray::from_iter_nullable_bin([
938            Some(b"hello" as &[u8]),
939            Some(b"goodbye"),
940            Some(b"hello"),
941            None,
942            Some(b"\x00"),
943            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
944            None,
945        ])
946        .into_array();
947
948        let array = SparseArray::try_new(
949            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
950            binaries,
951            12,
952            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
953        )
954        .unwrap();
955
956        let actual = array.to_varbinview().unwrap().into_array();
957        let expected = VarBinViewArray::from_iter_nullable_bin([
958            Some(b"hello" as &[u8]),
959            Some(b"123"),
960            Some(b"123"),
961            Some(b"goodbye"),
962            Some(b"hello"),
963            None,
964            Some(b"123"),
965            Some(b"\x00"),
966            Some(b"123"),
967            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
968            None,
969            Some(b"123"),
970        ])
971        .into_array();
972
973        let actual = actual.into_arrow_preferred().unwrap();
974        let expected = expected.into_arrow_preferred().unwrap();
975
976        assert_eq!(actual.data_type(), expected.data_type());
977        assert_eq!(&actual, &expected);
978    }
979
980    #[test]
981    fn test_list_scalar_to_elements_array() {
982        let scalar = Scalar::from(Some(vec![1, 2, 3]));
983        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
984        assert_eq!(
985            array.unwrap().display_values().to_string(),
986            "[1i32, 2i32, 3i32]"
987        );
988
989        let scalar = Scalar::null_typed::<Vec<i32>>();
990        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
991        assert!(array.is_none());
992    }
993
994    #[test]
995    fn test_list_scalar_to_singleton_list_array() {
996        let scalar = Scalar::from(Some(vec![1, 2, 3]));
997        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
998        assert!(array.is_some());
999        let array = array.unwrap();
1000        assert_eq!(array.scalar_at(0).unwrap(), scalar);
1001        assert_eq!(array.len(), 1);
1002
1003        let scalar = Scalar::null_typed::<Vec<i32>>();
1004        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1005        assert!(array.is_none());
1006    }
1007
1008    #[test]
1009    fn test_sparse_list_null_fill() {
1010        let elements = buffer![1i32, 2, 1, 2].into_array();
1011        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1012        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1013            .unwrap()
1014            .into_array();
1015
1016        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1017        let fill_value = Scalar::null(lists.dtype().clone());
1018        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1019            .unwrap()
1020            .into_array();
1021
1022        let actual = sparse.to_canonical().unwrap().into_array();
1023        let expected = ListArray::try_new(
1024            buffer![1i32, 2, 1, 2].into_array(),
1025            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1026            Validity::Array(
1027                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1028            ),
1029        )
1030        .unwrap()
1031        .into_array();
1032
1033        let actual = actual.into_arrow_preferred().unwrap();
1034        let expected = expected.into_arrow_preferred().unwrap();
1035
1036        assert_eq!(actual.data_type(), expected.data_type());
1037        assert_eq!(&actual, &expected);
1038    }
1039
1040    #[test]
1041    fn test_sparse_list_null_fill_sliced_sparse_values() {
1042        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1043        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1044        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1045            .unwrap()
1046            .into_array();
1047        let lists = lists.slice(2, 6).unwrap();
1048
1049        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1050        let fill_value = Scalar::null(lists.dtype().clone());
1051        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1052            .unwrap()
1053            .into_array();
1054
1055        let actual = sparse.to_canonical().unwrap().into_array();
1056        let expected = ListArray::try_new(
1057            buffer![1i32, 2, 1, 2].into_array(),
1058            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1059            Validity::Array(
1060                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1061            ),
1062        )
1063        .unwrap()
1064        .into_array();
1065
1066        let actual = actual.into_arrow_preferred().unwrap();
1067        let expected = expected.into_arrow_preferred().unwrap();
1068
1069        assert_eq!(actual.data_type(), expected.data_type());
1070        assert_eq!(&actual, &expected);
1071    }
1072
1073    #[test]
1074    fn test_sparse_list_non_null_fill() {
1075        let elements = buffer![1i32, 2, 1, 2].into_array();
1076        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1077        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1078            .unwrap()
1079            .into_array();
1080
1081        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1082        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1083        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1084            .unwrap()
1085            .into_array();
1086
1087        let actual = sparse.to_canonical().unwrap().into_array();
1088        let expected = ListArray::try_new(
1089            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1090            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1091            Validity::AllValid,
1092        )
1093        .unwrap()
1094        .into_array();
1095
1096        let actual = actual.into_arrow_preferred().unwrap();
1097        let expected = expected.into_arrow_preferred().unwrap();
1098
1099        assert_eq!(actual.data_type(), expected.data_type());
1100        assert_eq!(&actual, &expected);
1101    }
1102
1103    #[test]
1104    fn test_sparse_binary_varbin_null_fill() {
1105        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1106            Some(b"hello" as &[u8]),
1107            Some(b"goodbye"),
1108            Some(b"hello"),
1109            None,
1110            Some(b"\x00"),
1111            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1112            None,
1113        ])
1114        .into_array();
1115
1116        let array = SparseArray::try_new(
1117            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1118            strings,
1119            12,
1120            Scalar::null(DType::Binary(Nullable)),
1121        )
1122        .unwrap();
1123
1124        let actual = array.to_varbinview().unwrap().into_array();
1125        let expected = VarBinViewArray::from_iter_nullable_bin([
1126            Some(b"hello" as &[u8]),
1127            None,
1128            None,
1129            Some(b"goodbye"),
1130            Some(b"hello"),
1131            None,
1132            None,
1133            Some(b"\x00"),
1134            None,
1135            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1136            None,
1137            None,
1138        ])
1139        .into_array();
1140        let actual = actual.into_arrow_preferred().unwrap();
1141        let expected = expected.into_arrow_preferred().unwrap();
1142
1143        assert_eq!(actual.data_type(), expected.data_type());
1144        assert_eq!(&actual, &expected);
1145    }
1146
1147    #[test]
1148    fn test_sparse_list_grows_offset_type() {
1149        let elements = buffer![1i32, 2, 1, 2].into_array();
1150        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1151        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1152            .unwrap()
1153            .into_array();
1154
1155        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1156        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1157        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1158            .unwrap()
1159            .into_array();
1160
1161        let actual = sparse.to_canonical().unwrap().into_array();
1162        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1163        expected_elements.extend(buffer![42i32; 252]);
1164        let expected = ListArray::try_new(
1165            expected_elements.freeze().into_array(),
1166            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1167            Validity::AllValid,
1168        )
1169        .unwrap()
1170        .into_array();
1171
1172        assert_eq!(
1173            actual.to_list().unwrap().offsets().dtype(),
1174            &DType::Primitive(PType::U16, NonNullable)
1175        );
1176
1177        let actual = actual.into_arrow_preferred().unwrap();
1178        let expected = expected.into_arrow_preferred().unwrap();
1179
1180        assert_eq!(actual.data_type(), expected.data_type());
1181        assert_eq!(&actual, &expected);
1182    }
1183}