vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, FixedSizeListArray, ListArray, NullArray,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{ArrayBuilder, DecimalBuilder, ListBuilder, builder_with_capacity};
13use vortex_array::patches::Patches;
14use vortex_array::validity::Validity;
15use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
16use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, OffsetPType, ToCanonical as _};
17use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
18use vortex_dtype::{
19    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
20    match_each_native_ptype,
21};
22use vortex_error::{VortexError, VortexExpect as _, vortex_panic};
23use vortex_scalar::{
24    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
25    match_each_decimal_value_type,
26};
27
28use crate::{SparseArray, SparseVTable};
29
30impl CanonicalVTable<SparseVTable> for SparseVTable {
31    fn canonicalize(array: &SparseArray) -> Canonical {
32        if array.patches().num_patches() == 0 {
33            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
34        }
35
36        match array.dtype() {
37            DType::Null => {
38                assert!(array.fill_scalar().is_null());
39                Canonical::Null(NullArray::new(array.len()))
40            }
41            DType::Bool(..) => {
42                let resolved_patches = array.resolved_patches();
43                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
44            }
45            DType::Primitive(ptype, ..) => {
46                let resolved_patches = array.resolved_patches();
47                match_each_native_ptype!(ptype, |P| {
48                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
49                })
50            }
51            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
52                struct_fields,
53                array.fill_scalar().as_struct(),
54                array.dtype(),
55                array.patches(),
56                array.len(),
57            ),
58            DType::Decimal(decimal_dtype, nullability) => {
59                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
60                let fill_value = array.fill_scalar().as_decimal();
61                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
62                    canonicalize_sparse_decimal::<D>(
63                        *decimal_dtype,
64                        *nullability,
65                        fill_value,
66                        array.patches(),
67                        array.len(),
68                    )
69                })
70            }
71            dtype @ DType::Utf8(..) => {
72                let fill_value = array.fill_scalar().as_utf8().value();
73                let fill_value = fill_value.map(BufferString::into_inner);
74                canonicalize_varbin(array, dtype.clone(), fill_value)
75            }
76            dtype @ DType::Binary(..) => {
77                let fill_value = array.fill_scalar().as_binary().value();
78                canonicalize_varbin(array, dtype.clone(), fill_value)
79            }
80            DType::List(values_dtype, nullability) => {
81                let resolved_patches = array.resolved_patches();
82                canonicalize_sparse_lists(
83                    array,
84                    resolved_patches,
85                    values_dtype.clone(),
86                    *nullability,
87                )
88            }
89            DType::FixedSizeList(.., nullability) => {
90                canonicalize_sparse_fixed_size_list(array, *nullability)
91            }
92            DType::Extension(_ext_dtype) => todo!(),
93        }
94    }
95}
96
97/// The elements of this [ListScalar] as an array or `None` if scalar is null.
98fn list_scalar_to_elements_array(scalar: ListScalar) -> Option<ArrayRef> {
99    let elements = scalar.elements()?;
100
101    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
102    for s in elements {
103        builder
104            .append_scalar(&s)
105            .vortex_expect("Scalar dtype must match");
106    }
107    Some(builder.finish())
108}
109
110/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
111fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> Option<ArrayRef> {
112    let nullability = scalar.dtype().nullability();
113    let elements = list_scalar_to_elements_array(scalar)?;
114
115    let validity = match nullability {
116        Nullability::NonNullable => Validity::NonNullable,
117        Nullability::Nullable => Validity::AllValid,
118    };
119
120    let n = elements.len();
121    Some(
122        unsafe {
123            ListArray::new_unchecked(elements, buffer![0_u64, n as u64].into_array(), validity)
124        }
125        .into_array(),
126    )
127}
128
129#[allow(clippy::cognitive_complexity)]
130fn canonicalize_sparse_lists(
131    array: &SparseArray,
132    resolved_patches: Patches,
133    values_dtype: Arc<DType>,
134    nullability: Nullability,
135) -> Canonical {
136    macro_rules! match_smallest_offset_type {
137        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
138            let n_elements = $n_elements;
139            if n_elements <= u8::MAX as usize {
140                type $offset_type = u8;
141                $body
142            } else if n_elements <= u16::MAX as usize {
143                type $offset_type = u16;
144                $body
145            } else if n_elements <= u32::MAX as usize {
146                type $offset_type = u32;
147                $body
148            } else {
149                assert!(u64::try_from(n_elements).is_ok());
150                type $offset_type = u64;
151                $body
152            }
153        }};
154    }
155
156    let indices = resolved_patches.indices().to_primitive();
157    let values = resolved_patches.values().to_list();
158    let fill_value = array.fill_scalar().as_list();
159
160    let n_filled = array.len() - resolved_patches.num_patches();
161    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
162
163    let validity = Validity::from_mask(array.validity_mask(), nullability);
164
165    match_each_integer_ptype!(indices.ptype(), |I| {
166        match_smallest_offset_type!(total_canonical_values, |O| {
167            canonicalize_sparse_lists_inner::<I, O>(
168                indices.as_slice(),
169                values,
170                fill_value,
171                values_dtype,
172                array.len(),
173                total_canonical_values,
174                validity,
175            )
176        })
177    })
178}
179
180fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
181    indices: &[I],
182    values: ListArray,
183    fill_value: ListScalar,
184    values_dtype: Arc<DType>,
185    len: usize,
186    total_canonical_values: usize,
187    validity: Validity,
188) -> Canonical {
189    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value) else {
190        let sparse_list_elements = values.elements().clone();
191        let sparse_list_offsets = values.offsets().to_primitive();
192        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
193            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
194            // If the values are a small slice of a large array, their offsets may not fit in
195            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
196            // therefore must use the offset type of values.offsets().
197            return canonicalize_sparse_lists_inner_with_null_fill_value(
198                indices,
199                sparse_list_elements,
200                sparse_list_offsets,
201                len,
202                validity,
203            );
204        });
205    };
206
207    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
208        values_dtype,
209        validity.nullability(),
210        total_canonical_values,
211        len + 1,
212    );
213    let mut next_index = 0_usize;
214    let enumerated_indices_usize = indices
215        .iter()
216        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
217        .enumerate();
218    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
219        for _ in next_index..next_patched_index {
220            builder.extend_from_array(&fill_value_array);
221        }
222        builder.extend_from_array(&values.slice(patch_values_index..patch_values_index + 1));
223        next_index = next_patched_index + 1;
224    }
225
226    for _ in next_index..len {
227        builder.extend_from_array(&fill_value_array);
228    }
229
230    builder.finish_into_canonical()
231}
232
233fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
234    indices: &[I],
235    elements: ArrayRef,
236    offsets: &[O],
237    len: usize,
238    validity: Validity,
239) -> Canonical {
240    assert!(indices.len() < len + 1);
241    let mut dense_offsets = BufferMut::with_capacity(len + 1);
242
243    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
244    dense_offsets.push(offsets[0]);
245    let mut dense_last_set_index = 0_usize;
246    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
247        let sparse_end_index = sparse_start_index + 1;
248        let dense_start_index = (*dense_start_index)
249            .to_usize()
250            .vortex_expect("index must fit in usize");
251        let dense_end_index = dense_start_index + 1;
252
253        for _ in (dense_last_set_index + 1)..dense_end_index {
254            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
255            dense_offsets.push(dense_offsets[dense_last_set_index]);
256        }
257        dense_offsets.push(offsets[sparse_end_index]);
258        dense_last_set_index = dense_end_index;
259    }
260    for _ in (dense_last_set_index + 1)..len {
261        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
262        dense_offsets.push(dense_offsets[dense_last_set_index]);
263    }
264    Canonical::List(unsafe {
265        ListArray::new_unchecked(elements, dense_offsets.into_array(), validity)
266    })
267}
268
269/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
270fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
271    let resolved_patches = array.resolved_patches();
272    let indices = resolved_patches.indices().to_primitive();
273    let values = resolved_patches.values().to_fixed_size_list();
274    let fill_value = array.fill_scalar().as_list();
275
276    let validity = Validity::from_mask(array.validity_mask(), nullability);
277
278    match_each_integer_ptype!(indices.ptype(), |I| {
279        canonicalize_sparse_fixed_size_list_inner::<I>(
280            indices.as_slice(),
281            values,
282            fill_value,
283            array.len(),
284            validity,
285        )
286    })
287}
288
289/// Build a canonical [`FixedSizeListArray`] from sparse patches by interleaving patch values with
290/// fill values.
291///
292/// This algorithm walks through the sparse indices sequentially, filling gaps with the fill value's
293/// elements (or defaults if null). Since all lists have the same size, we can directly append
294/// elements without tracking offsets.
295fn canonicalize_sparse_fixed_size_list_inner<I: NativePType>(
296    indices: &[I],
297    values: FixedSizeListArray,
298    fill_value: ListScalar,
299    array_len: usize,
300    validity: Validity,
301) -> Canonical {
302    let list_size = values.list_size();
303    let element_dtype = values.elements().dtype();
304    let total_elements = array_len * list_size as usize;
305    let mut builder = builder_with_capacity(element_dtype, total_elements);
306    let fill_elements = fill_value.elements();
307
308    let mut next_index = 0;
309    let indices = indices
310        .iter()
311        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
312
313    for (patch_idx, sparse_idx) in indices.enumerate() {
314        // Fill gap before this patch with fill values.
315        append_n_lists(
316            &mut *builder,
317            fill_elements.as_deref(),
318            list_size,
319            sparse_idx - next_index,
320        );
321
322        // Append the patch value, handling null patches by appending defaults.
323        if values.validity().is_valid(patch_idx) {
324            let patch_list = values.fixed_size_list_elements_at(patch_idx);
325            for i in 0..list_size as usize {
326                builder
327                    .append_scalar(&patch_list.scalar_at(i))
328                    .vortex_expect("element dtype must match");
329            }
330        } else {
331            builder.append_defaults(list_size as usize);
332        }
333
334        next_index = sparse_idx + 1;
335    }
336
337    // Fill remaining positions after last patch.
338    append_n_lists(
339        &mut *builder,
340        fill_elements.as_deref(),
341        list_size,
342        array_len - next_index,
343    );
344
345    let elements = builder.finish();
346
347    // SAFETY: elements.len() == array_len * list_size, validity length matches array_len.
348    Canonical::FixedSizeList(unsafe {
349        FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
350    })
351}
352
353/// Append `count` copies of a fixed-size list to the builder.
354///
355/// If `fill_elements` is `Some`, appends those elements `count` times.
356/// If `fill_elements` is `None` (null fill), appends `list_size` default elements `count` times.
357fn append_n_lists(
358    builder: &mut dyn ArrayBuilder,
359    fill_elements: Option<&[Scalar]>,
360    list_size: u32,
361    count: usize,
362) {
363    for _ in 0..count {
364        if let Some(fill_elems) = fill_elements {
365            for elem in fill_elems {
366                builder
367                    .append_scalar(elem)
368                    .vortex_expect("element dtype must match");
369            }
370        } else {
371            builder.append_defaults(list_size as usize);
372        }
373    }
374}
375
376fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
377    let (fill_bool, validity) = if fill_value.is_null() {
378        (false, Validity::AllInvalid)
379    } else {
380        (
381            fill_value
382                .try_into()
383                .vortex_expect("Fill value must convert to bool"),
384            if patches.dtype().nullability() == Nullability::NonNullable {
385                Validity::NonNullable
386            } else {
387                Validity::AllValid
388            },
389        )
390    };
391
392    let bools = BoolArray::from_bool_buffer(
393        if fill_bool {
394            BooleanBuffer::new_set(patches.array_len())
395        } else {
396            BooleanBuffer::new_unset(patches.array_len())
397        },
398        validity,
399    );
400
401    Canonical::Bool(bools.patch(patches))
402}
403
404fn canonicalize_sparse_primitives<
405    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
406>(
407    patches: &Patches,
408    fill_value: &Scalar,
409) -> Canonical {
410    let (primitive_fill, validity) = if fill_value.is_null() {
411        (T::default(), Validity::AllInvalid)
412    } else {
413        (
414            fill_value
415                .try_into()
416                .vortex_expect("Fill value must convert to target T"),
417            if patches.dtype().nullability() == Nullability::NonNullable {
418                Validity::NonNullable
419            } else {
420                Validity::AllValid
421            },
422        )
423    };
424
425    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
426
427    Canonical::Primitive(parray.patch(patches))
428}
429
430fn canonicalize_sparse_struct(
431    struct_fields: &StructFields,
432    fill_struct: StructScalar,
433    dtype: &DType,
434    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
435    unresolved_patches: &Patches,
436    len: usize,
437) -> Canonical {
438    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
439        Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
440        None => (
441            struct_fields
442                .fields()
443                .map(Scalar::default_value)
444                .collect::<Vec<_>>(),
445            Validity::AllInvalid,
446        ),
447    };
448    let patch_values_as_struct = unresolved_patches.values().to_struct();
449    let columns_patch_values = patch_values_as_struct.fields();
450    let names = patch_values_as_struct.names();
451    let validity = if dtype.is_nullable() {
452        top_level_fill_validity.patch(
453            len,
454            unresolved_patches.offset(),
455            unresolved_patches.indices(),
456            &Validity::from_mask(
457                unresolved_patches.values().validity_mask(),
458                Nullability::Nullable,
459            ),
460        )
461    } else {
462        top_level_fill_validity
463            .into_non_nullable(len)
464            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
465    };
466
467    StructArray::try_from_iter_with_validity(
468        names.iter().zip_eq(
469            columns_patch_values
470                .iter()
471                .cloned()
472                .zip_eq(fill_values)
473                .map(|(patch_values, fill_value)| unsafe {
474                    SparseArray::new_unchecked(
475                        unresolved_patches
476                            .clone()
477                            .map_values(|_| Ok(patch_values))
478                            .vortex_expect("Replacing patch values"),
479                        fill_value,
480                    )
481                }),
482        ),
483        validity,
484    )
485    .map(Canonical::Struct)
486    .vortex_expect("Creating struct array")
487}
488
489fn canonicalize_sparse_decimal<D: NativeDecimalType>(
490    decimal_dtype: DecimalDType,
491    nullability: Nullability,
492    fill_value: DecimalScalar,
493    patches: &Patches,
494    len: usize,
495) -> Canonical {
496    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
497    match fill_value.decimal_value() {
498        Some(fill_value) => {
499            let fill_value = fill_value
500                .cast::<D>()
501                .vortex_expect("unexpected value type");
502            for _ in 0..len {
503                builder.append_value(fill_value)
504            }
505        }
506        None => {
507            builder.append_nulls(len);
508        }
509    }
510    let filled_array = builder.finish_into_decimal();
511    let array = filled_array.patch(patches);
512    Canonical::Decimal(array)
513}
514
515fn canonicalize_varbin(
516    array: &SparseArray,
517    dtype: DType,
518    fill_value: Option<ByteBuffer>,
519) -> Canonical {
520    let patches = array.resolved_patches();
521    let indices = patches.indices().to_primitive();
522    let values = patches.values().to_varbinview();
523    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
524    let len = array.len();
525
526    match_each_integer_ptype!(indices.ptype(), |I| {
527        let indices = indices.buffer::<I>();
528        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
529    })
530}
531
532fn canonicalize_varbin_inner<I: NativePType>(
533    fill_value: Option<ByteBuffer>,
534    indices: Buffer<I>,
535    values: VarBinViewArray,
536    dtype: DType,
537    validity: Validity,
538    len: usize,
539) -> Canonical {
540    assert_eq!(dtype.nullability(), validity.nullability());
541
542    let n_patch_buffers = values.buffers().len();
543    let mut buffers = values.buffers().to_vec();
544
545    let fill = if let Some(buffer) = &fill_value {
546        buffers.push(buffer.clone());
547        BinaryView::make_view(
548            buffer.as_ref(),
549            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
550            0,
551        )
552    } else {
553        // any <=12 character value will do
554        BinaryView::make_view(&[], 0, 0)
555    };
556
557    let mut views = buffer_mut![fill; len];
558    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
559        let patch_index_usize = <usize as NumCast>::from(patch_index)
560            .vortex_expect("var bin view indices must fit in usize");
561        views[patch_index_usize] = patch;
562    }
563
564    // SAFETY: views are constructed to maintain the invariants
565    let array = unsafe {
566        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
567    };
568
569    Canonical::VarBinView(array)
570}
571
572#[cfg(test)]
573mod test {
574    use std::sync::Arc;
575
576    use rstest::rstest;
577    use vortex_array::arrays::{
578        BoolArray, BooleanBufferBuilder, DecimalArray, FixedSizeListArray, ListArray,
579        PrimitiveArray, StructArray, VarBinArray, VarBinViewArray,
580    };
581    use vortex_array::arrow::IntoArrowArray as _;
582    use vortex_array::validity::Validity;
583    use vortex_array::vtable::ValidityHelper;
584    use vortex_array::{IntoArray, ToCanonical};
585    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
586    use vortex_dtype::Nullability::{NonNullable, Nullable};
587    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
588    use vortex_mask::Mask;
589    use vortex_scalar::{DecimalValue, Scalar};
590
591    use crate::SparseArray;
592    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
593
594    #[rstest]
595    #[case(Some(true))]
596    #[case(Some(false))]
597    #[case(None)]
598    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
599        let indices = buffer![0u64, 1, 7].into_array();
600        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
601            .into_array();
602        let sparse_bools =
603            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
604        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
605
606        let flat_bools = sparse_bools.to_bool();
607        let expected = bool_array_from_nullable_vec(
608            vec![
609                Some(true),
610                None,
611                fill_value,
612                fill_value,
613                fill_value,
614                fill_value,
615                fill_value,
616                Some(false),
617                fill_value,
618                fill_value,
619            ],
620            fill_value,
621        );
622
623        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
624        assert_eq!(flat_bools.validity(), expected.validity());
625
626        assert!(flat_bools.boolean_buffer().value(0));
627        assert!(flat_bools.validity().is_valid(0));
628        assert_eq!(
629            flat_bools.boolean_buffer().value(1),
630            fill_value.unwrap_or_default()
631        );
632        assert!(!flat_bools.validity().is_valid(1));
633        assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
634        assert!(!flat_bools.boolean_buffer().value(7));
635        assert!(flat_bools.validity().is_valid(7));
636    }
637
638    fn bool_array_from_nullable_vec(
639        bools: Vec<Option<bool>>,
640        fill_value: Option<bool>,
641    ) -> BoolArray {
642        let mut buffer = BooleanBufferBuilder::new(bools.len());
643        let mut validity = BooleanBufferBuilder::new(bools.len());
644        for maybe_bool in bools {
645            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
646            validity.append(maybe_bool.is_some());
647        }
648        BoolArray::from_bool_buffer(buffer.finish(), Validity::from(validity.finish()))
649    }
650
651    #[rstest]
652    #[case(Some(0i32))]
653    #[case(Some(-1i32))]
654    #[case(None)]
655    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
656        let indices = buffer![0u64, 1, 7].into_array();
657        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
658        let sparse_ints =
659            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
660        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
661
662        let flat_ints = sparse_ints.to_primitive();
663        let expected = PrimitiveArray::from_option_iter([
664            Some(0i32),
665            None,
666            fill_value,
667            fill_value,
668            fill_value,
669            fill_value,
670            fill_value,
671            Some(1),
672            fill_value,
673            fill_value,
674        ]);
675
676        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
677        assert_eq!(flat_ints.validity(), expected.validity());
678
679        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
680        assert!(flat_ints.validity().is_valid(0));
681        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
682        assert!(!flat_ints.validity().is_valid(1));
683        assert_eq!(
684            flat_ints.as_slice::<i32>()[2],
685            fill_value.unwrap_or_default()
686        );
687        assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
688        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
689        assert!(flat_ints.validity().is_valid(7));
690    }
691
692    #[test]
693    fn test_sparse_struct_valid_fill() {
694        let field_names = FieldNames::from_iter(["a", "b"]);
695        let field_types = vec![
696            DType::Primitive(PType::I32, Nullable),
697            DType::Primitive(PType::I32, Nullable),
698        ];
699        let struct_fields = StructFields::new(field_names, field_types);
700        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
701
702        let indices = buffer![0u64, 1, 7, 8].into_array();
703        let patch_values_a =
704            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
705        let patch_values_b =
706            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
707        let patch_values = StructArray::try_new_with_dtype(
708            vec![patch_values_a, patch_values_b],
709            struct_fields.clone(),
710            4,
711            Validity::Array(
712                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
713            ),
714        )
715        .unwrap()
716        .into_array();
717
718        let fill_scalar = Scalar::struct_(
719            struct_dtype,
720            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
721        );
722        let len = 10;
723        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
724
725        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
726            if i == 0 {
727                Some(10)
728            } else if i == 1 {
729                None
730            } else if i == 7 {
731                Some(20)
732            } else {
733                Some(-10)
734            }
735        }));
736        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
737            if i == 0 {
738                Some(1i32)
739            } else if i == 1 {
740                Some(2)
741            } else if i == 7 {
742                None
743            } else {
744                Some(-1)
745            }
746        }));
747
748        let expected = StructArray::try_new_with_dtype(
749            vec![expected_a.into_array(), expected_b.into_array()],
750            struct_fields,
751            len,
752            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
753            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
754        )
755        .unwrap()
756        .to_array()
757        .into_arrow_preferred()
758        .unwrap();
759
760        let actual = sparse_struct
761            .to_struct()
762            .to_array()
763            .into_arrow_preferred()
764            .unwrap();
765
766        assert_eq!(expected.data_type(), actual.data_type());
767        assert_eq!(&expected, &actual);
768    }
769
770    #[test]
771    fn test_sparse_struct_invalid_fill() {
772        let field_names = FieldNames::from_iter(["a", "b"]);
773        let field_types = vec![
774            DType::Primitive(PType::I32, Nullable),
775            DType::Primitive(PType::I32, Nullable),
776        ];
777        let struct_fields = StructFields::new(field_names, field_types);
778        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
779
780        let indices = buffer![0u64, 1, 7, 8].into_array();
781        let patch_values_a =
782            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
783        let patch_values_b =
784            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
785        let patch_values = StructArray::try_new_with_dtype(
786            vec![patch_values_a, patch_values_b],
787            struct_fields.clone(),
788            4,
789            Validity::Array(
790                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
791            ),
792        )
793        .unwrap()
794        .into_array();
795
796        let fill_scalar = Scalar::null(struct_dtype);
797        let len = 10;
798        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
799
800        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
801            if i == 0 {
802                Some(10)
803            } else if i == 1 {
804                None
805            } else if i == 7 {
806                Some(20)
807            } else {
808                Some(-10)
809            }
810        }));
811        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
812            if i == 0 {
813                Some(1i32)
814            } else if i == 1 {
815                Some(2)
816            } else if i == 7 {
817                None
818            } else {
819                Some(-1)
820            }
821        }));
822
823        let expected = StructArray::try_new_with_dtype(
824            vec![expected_a.into_array(), expected_b.into_array()],
825            struct_fields,
826            len,
827            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
828            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
829        )
830        .unwrap()
831        .to_array()
832        .into_arrow_preferred()
833        .unwrap();
834
835        let actual = sparse_struct
836            .to_struct()
837            .to_array()
838            .into_arrow_preferred()
839            .unwrap();
840
841        assert_eq!(expected.data_type(), actual.data_type());
842        assert_eq!(&expected, &actual);
843    }
844
845    #[test]
846    fn test_sparse_decimal() {
847        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
848        let decimal_dtype = DecimalDType::new(3, 2);
849        let patch_values = DecimalArray::new(
850            buffer![100i128, 200i128, 300i128, 4000i128],
851            decimal_dtype,
852            Validity::from_iter([true, true, true, false]),
853        )
854        .to_array();
855        let len = 10;
856        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
857        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
858
859        let expected = DecimalArray::new(
860            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
861            decimal_dtype,
862            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
863            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
864        )
865        .to_array()
866        .into_arrow_preferred()
867        .unwrap();
868
869        let actual = sparse_struct
870            .to_decimal()
871            .to_array()
872            .into_arrow_preferred()
873            .unwrap();
874
875        assert_eq!(expected.data_type(), actual.data_type());
876        assert_eq!(&expected, &actual);
877    }
878
879    #[test]
880    fn test_sparse_utf8_varbinview_non_null_fill() {
881        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
882            Some("hello"),
883            Some("goodbye"),
884            Some("hello"),
885            None,
886            Some("bonjour"),
887            Some("你好"),
888            None,
889        ])
890        .into_array();
891
892        let array = SparseArray::try_new(
893            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
894            strings,
895            12,
896            Scalar::from(Some("123".to_owned())),
897        )
898        .unwrap();
899
900        let actual = array.to_varbinview().into_array();
901        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
902            Some("hello"),
903            Some("123"),
904            Some("123"),
905            Some("goodbye"),
906            Some("hello"),
907            None,
908            Some("123"),
909            Some("bonjour"),
910            Some("123"),
911            Some("你好"),
912            None,
913            Some("123"),
914        ])
915        .into_array();
916
917        let actual = actual.into_arrow_preferred().unwrap();
918        let expected = expected.into_arrow_preferred().unwrap();
919
920        assert_eq!(actual.data_type(), expected.data_type());
921        assert_eq!(&actual, &expected);
922    }
923
924    #[test]
925    fn test_sparse_utf8_varbinview_null_fill() {
926        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
927            Some("hello"),
928            Some("goodbye"),
929            Some("hello"),
930            None,
931            Some("bonjour"),
932            Some("你好"),
933            None,
934        ])
935        .into_array();
936
937        let array = SparseArray::try_new(
938            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
939            strings,
940            12,
941            Scalar::null(DType::Utf8(Nullable)),
942        )
943        .unwrap();
944
945        let actual = array.to_varbinview().into_array();
946        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
947            Some("hello"),
948            None,
949            None,
950            Some("goodbye"),
951            Some("hello"),
952            None,
953            None,
954            Some("bonjour"),
955            None,
956            Some("你好"),
957            None,
958            None,
959        ])
960        .into_array();
961
962        let actual = actual.into_arrow_preferred().unwrap();
963        let expected = expected.into_arrow_preferred().unwrap();
964
965        assert_eq!(actual.data_type(), expected.data_type());
966        assert_eq!(&actual, &expected);
967    }
968
969    #[test]
970    fn test_sparse_utf8_varbinview_non_nullable() {
971        let strings =
972            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
973                .into_array();
974
975        let array = SparseArray::try_new(
976            buffer![0u16, 3, 4, 5, 8].into_array(),
977            strings,
978            9,
979            Scalar::from("123".to_owned()),
980        )
981        .unwrap();
982
983        let actual = array.to_varbinview().into_array();
984        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
985            Some("hello"),
986            Some("123"),
987            Some("123"),
988            Some("goodbye"),
989            Some("hello"),
990            Some("bonjour"),
991            Some("123"),
992            Some("123"),
993            Some("你好"),
994        ])
995        .into_array();
996
997        let actual = actual.into_arrow_preferred().unwrap();
998        let expected = expected.into_arrow_preferred().unwrap();
999
1000        assert_eq!(actual.data_type(), expected.data_type());
1001        assert_eq!(&actual, &expected);
1002    }
1003
1004    #[test]
1005    fn test_sparse_utf8_varbin_null_fill() {
1006        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1007            Some("hello"),
1008            Some("goodbye"),
1009            Some("hello"),
1010            None,
1011            Some("bonjour"),
1012            Some("你好"),
1013            None,
1014        ])
1015        .into_array();
1016
1017        let array = SparseArray::try_new(
1018            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1019            strings,
1020            12,
1021            Scalar::null(DType::Utf8(Nullable)),
1022        )
1023        .unwrap();
1024
1025        let actual = array.to_varbinview().into_array();
1026        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
1027            Some("hello"),
1028            None,
1029            None,
1030            Some("goodbye"),
1031            Some("hello"),
1032            None,
1033            None,
1034            Some("bonjour"),
1035            None,
1036            Some("你好"),
1037            None,
1038            None,
1039        ])
1040        .into_array();
1041
1042        let actual = actual.into_arrow_preferred().unwrap();
1043        let expected = expected.into_arrow_preferred().unwrap();
1044
1045        assert_eq!(actual.data_type(), expected.data_type());
1046        assert_eq!(&actual, &expected);
1047    }
1048
1049    #[test]
1050    fn test_sparse_binary_varbinview_non_null_fill() {
1051        let binaries = VarBinViewArray::from_iter_nullable_bin([
1052            Some(b"hello" as &[u8]),
1053            Some(b"goodbye"),
1054            Some(b"hello"),
1055            None,
1056            Some(b"\x00"),
1057            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1058            None,
1059        ])
1060        .into_array();
1061
1062        let array = SparseArray::try_new(
1063            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1064            binaries,
1065            12,
1066            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
1067        )
1068        .unwrap();
1069
1070        let actual = array.to_varbinview().into_array();
1071        let expected = VarBinViewArray::from_iter_nullable_bin([
1072            Some(b"hello" as &[u8]),
1073            Some(b"123"),
1074            Some(b"123"),
1075            Some(b"goodbye"),
1076            Some(b"hello"),
1077            None,
1078            Some(b"123"),
1079            Some(b"\x00"),
1080            Some(b"123"),
1081            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1082            None,
1083            Some(b"123"),
1084        ])
1085        .into_array();
1086
1087        let actual = actual.into_arrow_preferred().unwrap();
1088        let expected = expected.into_arrow_preferred().unwrap();
1089
1090        assert_eq!(actual.data_type(), expected.data_type());
1091        assert_eq!(&actual, &expected);
1092    }
1093
1094    #[test]
1095    fn test_list_scalar_to_elements_array() {
1096        let scalar = Scalar::from(Some(vec![1, 2, 3]));
1097        let array = list_scalar_to_elements_array(scalar.as_list());
1098        assert_eq!(
1099            array.unwrap().display_values().to_string(),
1100            "[1i32, 2i32, 3i32]"
1101        );
1102
1103        let scalar = Scalar::null_typed::<Vec<i32>>();
1104        let array = list_scalar_to_elements_array(scalar.as_list());
1105        assert!(array.is_none());
1106    }
1107
1108    #[test]
1109    fn test_list_scalar_to_singleton_list_array() {
1110        let scalar = Scalar::from(Some(vec![1, 2, 3]));
1111        let array = list_scalar_to_singleton_list_array(scalar.as_list());
1112        assert!(array.is_some());
1113        let array = array.unwrap();
1114        assert_eq!(array.scalar_at(0), scalar);
1115        assert_eq!(array.len(), 1);
1116
1117        let scalar = Scalar::null_typed::<Vec<i32>>();
1118        let array = list_scalar_to_singleton_list_array(scalar.as_list());
1119        assert!(array.is_none());
1120    }
1121
1122    #[test]
1123    fn test_sparse_list_null_fill() {
1124        let elements = buffer![1i32, 2, 1, 2].into_array();
1125        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1126        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1127            .unwrap()
1128            .into_array();
1129
1130        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1131        let fill_value = Scalar::null(lists.dtype().clone());
1132        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1133            .unwrap()
1134            .into_array();
1135
1136        let actual = sparse.to_canonical().into_array();
1137        let expected = ListArray::try_new(
1138            buffer![1i32, 2, 1, 2].into_array(),
1139            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1140            Validity::Array(
1141                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1142            ),
1143        )
1144        .unwrap()
1145        .into_array();
1146
1147        let actual = actual.into_arrow_preferred().unwrap();
1148        let expected = expected.into_arrow_preferred().unwrap();
1149
1150        assert_eq!(actual.data_type(), expected.data_type());
1151        assert_eq!(&actual, &expected);
1152    }
1153
1154    #[test]
1155    fn test_sparse_list_null_fill_sliced_sparse_values() {
1156        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1157        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1158        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1159            .unwrap()
1160            .into_array();
1161        let lists = lists.slice(2..6);
1162
1163        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1164        let fill_value = Scalar::null(lists.dtype().clone());
1165        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1166            .unwrap()
1167            .into_array();
1168
1169        let actual = sparse.to_canonical().into_array();
1170        let expected = ListArray::try_new(
1171            buffer![1i32, 2, 1, 2].into_array(),
1172            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1173            Validity::Array(
1174                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1175            ),
1176        )
1177        .unwrap()
1178        .into_array();
1179
1180        let actual = actual.into_arrow_preferred().unwrap();
1181        let expected = expected.into_arrow_preferred().unwrap();
1182
1183        assert_eq!(actual.data_type(), expected.data_type());
1184        assert_eq!(&actual, &expected);
1185    }
1186
1187    #[test]
1188    fn test_sparse_list_non_null_fill() {
1189        let elements = buffer![1i32, 2, 1, 2].into_array();
1190        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1191        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1192            .unwrap()
1193            .into_array();
1194
1195        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1196        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1197        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1198            .unwrap()
1199            .into_array();
1200
1201        let actual = sparse.to_canonical().into_array();
1202        let expected = ListArray::try_new(
1203            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1204            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1205            Validity::AllValid,
1206        )
1207        .unwrap()
1208        .into_array();
1209
1210        let actual = actual.into_arrow_preferred().unwrap();
1211        let expected = expected.into_arrow_preferred().unwrap();
1212
1213        assert_eq!(actual.data_type(), expected.data_type());
1214        assert_eq!(&actual, &expected);
1215    }
1216
1217    #[test]
1218    fn test_sparse_binary_varbin_null_fill() {
1219        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1220            Some(b"hello" as &[u8]),
1221            Some(b"goodbye"),
1222            Some(b"hello"),
1223            None,
1224            Some(b"\x00"),
1225            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1226            None,
1227        ])
1228        .into_array();
1229
1230        let array = SparseArray::try_new(
1231            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1232            strings,
1233            12,
1234            Scalar::null(DType::Binary(Nullable)),
1235        )
1236        .unwrap();
1237
1238        let actual = array.to_varbinview().into_array();
1239        let expected = VarBinViewArray::from_iter_nullable_bin([
1240            Some(b"hello" as &[u8]),
1241            None,
1242            None,
1243            Some(b"goodbye"),
1244            Some(b"hello"),
1245            None,
1246            None,
1247            Some(b"\x00"),
1248            None,
1249            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1250            None,
1251            None,
1252        ])
1253        .into_array();
1254        let actual = actual.into_arrow_preferred().unwrap();
1255        let expected = expected.into_arrow_preferred().unwrap();
1256
1257        assert_eq!(actual.data_type(), expected.data_type());
1258        assert_eq!(&actual, &expected);
1259    }
1260
1261    #[test]
1262    fn test_sparse_fixed_size_list_null_fill() {
1263        // Create a FixedSizeListArray with 3 lists of size 3.
1264        let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1265        let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1266            .unwrap()
1267            .into_array();
1268
1269        let indices = buffer![0u8, 2u8, 3u8].into_array();
1270        let fill_value = Scalar::null(DType::FixedSizeList(
1271            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1272            3,
1273            Nullable,
1274        ));
1275        let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1276            .unwrap()
1277            .into_array();
1278
1279        let actual = sparse.to_canonical().into_array();
1280
1281        // Expected: [1,2,3], null, [4,5,6], [7,8,9], null.
1282        let expected_elements =
1283            buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1284        let expected = FixedSizeListArray::try_new(
1285            expected_elements,
1286            3,
1287            Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1288            5,
1289        )
1290        .unwrap()
1291        .into_array();
1292
1293        let actual = actual.into_arrow_preferred().unwrap();
1294        let expected = expected.into_arrow_preferred().unwrap();
1295
1296        assert_eq!(actual.data_type(), expected.data_type());
1297        assert_eq!(&actual, &expected);
1298    }
1299
1300    #[test]
1301    fn test_sparse_fixed_size_list_non_null_fill() {
1302        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1303        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1304            .unwrap()
1305            .into_array();
1306
1307        let indices = buffer![0u8, 2u8, 4u8].into_array();
1308        let fill_value = Scalar::fixed_size_list(
1309            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1310            vec![
1311                Scalar::primitive(99i32, NonNullable),
1312                Scalar::primitive(88i32, NonNullable),
1313            ],
1314            NonNullable,
1315        );
1316        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1317            .unwrap()
1318            .into_array();
1319
1320        let actual = sparse.to_canonical().into_array();
1321
1322        // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88].
1323        let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1324        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1325            .unwrap()
1326            .into_array();
1327
1328        let actual = actual.into_arrow_preferred().unwrap();
1329        let expected = expected.into_arrow_preferred().unwrap();
1330
1331        assert_eq!(actual.data_type(), expected.data_type());
1332        assert_eq!(&actual, &expected);
1333    }
1334
1335    #[test]
1336    fn test_sparse_fixed_size_list_with_validity() {
1337        // Create FSL values with some nulls.
1338        let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1339        let fsl = FixedSizeListArray::try_new(
1340            elements,
1341            2,
1342            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1343            3,
1344        )
1345        .unwrap()
1346        .into_array();
1347
1348        let indices = buffer![1u16, 3u16, 4u16].into_array();
1349        let fill_value = Scalar::fixed_size_list(
1350            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1351            vec![
1352                Scalar::primitive(7i32, NonNullable),
1353                Scalar::primitive(8i32, NonNullable),
1354            ],
1355            Nullable,
1356        );
1357        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1358            .unwrap()
1359            .into_array();
1360
1361        let actual = sparse.to_canonical().into_array();
1362
1363        // Expected validity: [true, true, true, false, true, true].
1364        // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8].
1365        let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1366        let expected = FixedSizeListArray::try_new(
1367            expected_elements,
1368            2,
1369            Validity::Array(
1370                BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1371            ),
1372            6,
1373        )
1374        .unwrap()
1375        .into_array();
1376
1377        let actual = actual.into_arrow_preferred().unwrap();
1378        let expected = expected.into_arrow_preferred().unwrap();
1379
1380        assert_eq!(actual.data_type(), expected.data_type());
1381        assert_eq!(&actual, &expected);
1382    }
1383
1384    #[test]
1385    fn test_sparse_fixed_size_list_truly_sparse() {
1386        // Test with a truly sparse array where most values are the fill value.
1387        // This demonstrates the compression benefit of sparse encoding.
1388
1389        // Create patch values: only 3 distinct lists out of 100 total positions.
1390        let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1391        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1392            .unwrap()
1393            .into_array();
1394
1395        // Patches at positions 5, 50, and 95 out of 100.
1396        let indices = buffer![5u32, 50, 95].into_array();
1397
1398        // Fill value [99, 99] will appear 97 times but stored only once.
1399        let fill_value = Scalar::fixed_size_list(
1400            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1401            vec![
1402                Scalar::primitive(99i32, NonNullable),
1403                Scalar::primitive(99i32, NonNullable),
1404            ],
1405            NonNullable,
1406        );
1407
1408        let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1409            .unwrap()
1410            .into_array();
1411
1412        let actual = sparse.to_canonical().into_array();
1413
1414        // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95.
1415        let mut expected_elements_vec = Vec::with_capacity(200);
1416        // Positions 0-4: fill values
1417        for _ in 0..5 {
1418            expected_elements_vec.extend([99i32, 99]);
1419        }
1420        // Position 5: first patch [10, 11]
1421        expected_elements_vec.extend([10, 11]);
1422        // Positions 6-49: fill values
1423        for _ in 6..50 {
1424            expected_elements_vec.extend([99, 99]);
1425        }
1426        // Position 50: second patch [20, 21]
1427        expected_elements_vec.extend([20, 21]);
1428        // Positions 51-94: fill values
1429        for _ in 51..95 {
1430            expected_elements_vec.extend([99, 99]);
1431        }
1432        // Position 95: third patch [30, 31]
1433        expected_elements_vec.extend([30, 31]);
1434        // Positions 96-99: fill values
1435        for _ in 96..100 {
1436            expected_elements_vec.extend([99, 99]);
1437        }
1438        let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1439        let expected =
1440            FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1441                .unwrap()
1442                .into_array();
1443
1444        let actual = actual.into_arrow_preferred().unwrap();
1445        let expected = expected.into_arrow_preferred().unwrap();
1446
1447        assert_eq!(actual.data_type(), expected.data_type());
1448        assert_eq!(&actual, &expected);
1449    }
1450
1451    #[test]
1452    fn test_sparse_fixed_size_list_single_element() {
1453        // Test with a single element FSL array.
1454        let elements = buffer![42i32, 43].into_array();
1455        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1456            .unwrap()
1457            .into_array();
1458
1459        let indices = buffer![0u32].into_array();
1460        let fill_value = Scalar::fixed_size_list(
1461            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1462            vec![
1463                Scalar::primitive(1i32, NonNullable),
1464                Scalar::primitive(2i32, NonNullable),
1465            ],
1466            NonNullable,
1467        );
1468        let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1469            .unwrap()
1470            .into_array();
1471
1472        let actual = sparse.to_canonical().into_array();
1473
1474        // Expected: just [42, 43].
1475        let expected_elements = buffer![42i32, 43].into_array();
1476        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1477            .unwrap()
1478            .into_array();
1479
1480        let actual = actual.into_arrow_preferred().unwrap();
1481        let expected = expected.into_arrow_preferred().unwrap();
1482
1483        assert_eq!(actual.data_type(), expected.data_type());
1484        assert_eq!(&actual, &expected);
1485    }
1486
1487    #[test]
1488    fn test_sparse_list_grows_offset_type() {
1489        let elements = buffer![1i32, 2, 1, 2].into_array();
1490        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1491        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1492            .unwrap()
1493            .into_array();
1494
1495        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1496        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1497        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1498            .unwrap()
1499            .into_array();
1500
1501        let actual = sparse.to_canonical().into_array();
1502        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1503        expected_elements.extend(buffer![42i32; 252]);
1504        let expected = ListArray::try_new(
1505            expected_elements.freeze().into_array(),
1506            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1507            Validity::AllValid,
1508        )
1509        .unwrap()
1510        .into_array();
1511
1512        assert_eq!(
1513            actual.to_list().offsets().dtype(),
1514            &DType::Primitive(PType::U16, NonNullable)
1515        );
1516
1517        let actual = actual.into_arrow_preferred().unwrap();
1518        let expected = expected.into_arrow_preferred().unwrap();
1519
1520        assert_eq!(actual.data_type(), expected.data_type());
1521        assert_eq!(&actual, &expected);
1522    }
1523}