vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::binary_view::BinaryView;
9use vortex_array::arrays::{
10    BoolArray, BooleanBuffer, ConstantArray, FixedSizeListArray, ListViewArray, NullArray,
11    PrimitiveArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
12};
13use vortex_array::builders::{
14    ArrayBuilder, DecimalBuilder, ListViewBuilder, builder_with_capacity,
15};
16use vortex_array::patches::Patches;
17use vortex_array::validity::Validity;
18use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
19use vortex_array::{Array, Canonical, ToCanonical};
20use vortex_buffer::{Buffer, BufferString, ByteBuffer, buffer, buffer_mut};
21use vortex_dtype::{
22    DType, DecimalDType, IntegerPType, NativePType, Nullability, StructFields,
23    match_each_integer_ptype, match_each_native_ptype,
24};
25use vortex_error::{VortexError, VortexExpect, vortex_panic};
26use vortex_scalar::{
27    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
28    match_each_decimal_value_type,
29};
30
31use crate::{SparseArray, SparseVTable};
32
33impl CanonicalVTable<SparseVTable> for SparseVTable {
34    fn canonicalize(array: &SparseArray) -> Canonical {
35        if array.patches().num_patches() == 0 {
36            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
37        }
38
39        match array.dtype() {
40            DType::Null => {
41                assert!(array.fill_scalar().is_null());
42                Canonical::Null(NullArray::new(array.len()))
43            }
44            DType::Bool(..) => {
45                let resolved_patches = array.resolved_patches();
46                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
47            }
48            DType::Primitive(ptype, ..) => {
49                let resolved_patches = array.resolved_patches();
50                match_each_native_ptype!(ptype, |P| {
51                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
52                })
53            }
54            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
55                struct_fields,
56                array.fill_scalar().as_struct(),
57                array.dtype(),
58                array.patches(),
59                array.len(),
60            ),
61            DType::Decimal(decimal_dtype, nullability) => {
62                let canonical_decimal_value_type = smallest_decimal_value_type(decimal_dtype);
63                let fill_value = array.fill_scalar().as_decimal();
64                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
65                    canonicalize_sparse_decimal::<D>(
66                        *decimal_dtype,
67                        *nullability,
68                        fill_value,
69                        array.patches(),
70                        array.len(),
71                    )
72                })
73            }
74            dtype @ DType::Utf8(..) => {
75                let fill_value = array.fill_scalar().as_utf8().value();
76                let fill_value = fill_value.map(BufferString::into_inner);
77                canonicalize_varbin(array, dtype.clone(), fill_value)
78            }
79            dtype @ DType::Binary(..) => {
80                let fill_value = array.fill_scalar().as_binary().value();
81                canonicalize_varbin(array, dtype.clone(), fill_value)
82            }
83            DType::List(values_dtype, nullability) => {
84                canonicalize_sparse_lists(array, values_dtype.clone(), *nullability)
85            }
86            DType::FixedSizeList(.., nullability) => {
87                canonicalize_sparse_fixed_size_list(array, *nullability)
88            }
89            DType::Extension(_ext_dtype) => todo!(),
90        }
91    }
92}
93
94#[allow(clippy::cognitive_complexity)]
95fn canonicalize_sparse_lists(
96    array: &SparseArray,
97    values_dtype: Arc<DType>,
98    nullability: Nullability,
99) -> Canonical {
100    // TODO(connor): We should move this to `vortex-dtype` so that we can use this elsewhere.
101    macro_rules! match_smallest_offset_type {
102        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
103            let n_elements = $n_elements;
104            if n_elements <= u8::MAX as usize {
105                type $offset_type = u8;
106                $body
107            } else if n_elements <= u16::MAX as usize {
108                type $offset_type = u16;
109                $body
110            } else if n_elements <= u32::MAX as usize {
111                type $offset_type = u32;
112                $body
113            } else {
114                assert!(u64::try_from(n_elements).is_ok());
115                type $offset_type = u64;
116                $body
117            }
118        }};
119    }
120
121    let resolved_patches = array.resolved_patches();
122
123    let indices = resolved_patches.indices().to_primitive();
124    let values = resolved_patches.values().to_listview();
125    let fill_value = array.fill_scalar().as_list();
126
127    let n_filled = array.len() - resolved_patches.num_patches();
128    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
129
130    let validity = Validity::from_mask(array.validity_mask(), nullability);
131
132    match_each_integer_ptype!(indices.ptype(), |I| {
133        match_smallest_offset_type!(total_canonical_values, |O| {
134            canonicalize_sparse_lists_inner::<I, O>(
135                indices.as_slice(),
136                values,
137                fill_value,
138                values_dtype,
139                array.len(),
140                total_canonical_values,
141                validity,
142            )
143        })
144    })
145}
146
147fn canonicalize_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
148    patch_indices: &[I],
149    patch_values: ListViewArray,
150    fill_value: ListScalar,
151    values_dtype: Arc<DType>,
152    len: usize,
153    total_canonical_values: usize,
154    validity: Validity,
155) -> Canonical {
156    // Create the builder with appropriate types. It is easy to just use the same type for both
157    // `offsets` and `sizes` since we have no other constraints.
158    let mut builder = ListViewBuilder::<O, O>::with_capacity(
159        values_dtype,
160        validity.nullability(),
161        total_canonical_values,
162        len,
163    );
164
165    let mut patch_idx = 0;
166
167    // Loop over the patch indices and set them to the corresponding scalar values. For positions
168    // that are not patched, use the fill value.
169    for position in 0..len {
170        let position_is_patched = patch_idx < patch_indices.len()
171            && patch_indices[patch_idx]
172                .to_usize()
173                .vortex_expect("patch index must fit in usize")
174                == position;
175
176        if position_is_patched {
177            // Set with the patch value.
178            builder
179                .append_value(patch_values.scalar_at(patch_idx).as_list())
180                .vortex_expect("Failed to append sparse value");
181            patch_idx += 1;
182        } else {
183            // Set with the fill value.
184            builder
185                .append_value(fill_value.clone())
186                .vortex_expect("Failed to append fill value");
187        }
188    }
189
190    builder.finish_into_canonical()
191}
192
193/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
194fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
195    let resolved_patches = array.resolved_patches();
196    let indices = resolved_patches.indices().to_primitive();
197    let values = resolved_patches.values().to_fixed_size_list();
198    let fill_value = array.fill_scalar().as_list();
199
200    let validity = Validity::from_mask(array.validity_mask(), nullability);
201
202    match_each_integer_ptype!(indices.ptype(), |I| {
203        canonicalize_sparse_fixed_size_list_inner::<I>(
204            indices.as_slice(),
205            values,
206            fill_value,
207            array.len(),
208            validity,
209        )
210    })
211}
212
213/// Build a canonical [`FixedSizeListArray`] from sparse patches by interleaving patch values with
214/// fill values.
215///
216/// This algorithm walks through the sparse indices sequentially, filling gaps with the fill value's
217/// elements (or defaults if null). Since all lists have the same size, we can directly append
218/// elements without tracking offsets.
219fn canonicalize_sparse_fixed_size_list_inner<I: IntegerPType>(
220    indices: &[I],
221    values: FixedSizeListArray,
222    fill_value: ListScalar,
223    array_len: usize,
224    validity: Validity,
225) -> Canonical {
226    let list_size = values.list_size();
227    let element_dtype = values.elements().dtype();
228    let total_elements = array_len * list_size as usize;
229    let mut builder = builder_with_capacity(element_dtype, total_elements);
230    let fill_elements = fill_value.elements();
231
232    let mut next_index = 0;
233    let indices = indices
234        .iter()
235        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
236
237    for (patch_idx, sparse_idx) in indices.enumerate() {
238        // Fill gap before this patch with fill values.
239        append_n_lists(
240            &mut *builder,
241            fill_elements.as_deref(),
242            list_size,
243            sparse_idx - next_index,
244        );
245
246        // Append the patch value, handling null patches by appending defaults.
247        if values.validity().is_valid(patch_idx) {
248            let patch_list = values.fixed_size_list_elements_at(patch_idx);
249            for i in 0..list_size as usize {
250                builder
251                    .append_scalar(&patch_list.scalar_at(i))
252                    .vortex_expect("element dtype must match");
253            }
254        } else {
255            builder.append_defaults(list_size as usize);
256        }
257
258        next_index = sparse_idx + 1;
259    }
260
261    // Fill remaining positions after last patch.
262    append_n_lists(
263        &mut *builder,
264        fill_elements.as_deref(),
265        list_size,
266        array_len - next_index,
267    );
268
269    let elements = builder.finish();
270
271    // SAFETY: elements.len() == array_len * list_size, validity length matches array_len.
272    Canonical::FixedSizeList(unsafe {
273        FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
274    })
275}
276
277/// Append `count` copies of a fixed-size list to the builder.
278///
279/// If `fill_elements` is `Some`, appends those elements `count` times.
280/// If `fill_elements` is `None` (null fill), appends `list_size` default elements `count` times.
281fn append_n_lists(
282    builder: &mut dyn ArrayBuilder,
283    fill_elements: Option<&[Scalar]>,
284    list_size: u32,
285    count: usize,
286) {
287    for _ in 0..count {
288        if let Some(fill_elems) = fill_elements {
289            for elem in fill_elems {
290                builder
291                    .append_scalar(elem)
292                    .vortex_expect("element dtype must match");
293            }
294        } else {
295            builder.append_defaults(list_size as usize);
296        }
297    }
298}
299
300fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
301    let (fill_bool, validity) = if fill_value.is_null() {
302        (false, Validity::AllInvalid)
303    } else {
304        (
305            fill_value
306                .try_into()
307                .vortex_expect("Fill value must convert to bool"),
308            if patches.dtype().nullability() == Nullability::NonNullable {
309                Validity::NonNullable
310            } else {
311                Validity::AllValid
312            },
313        )
314    };
315
316    let bools = BoolArray::from_bool_buffer(
317        if fill_bool {
318            BooleanBuffer::new_set(patches.array_len())
319        } else {
320            BooleanBuffer::new_unset(patches.array_len())
321        },
322        validity,
323    );
324
325    Canonical::Bool(bools.patch(patches))
326}
327
328fn canonicalize_sparse_primitives<
329    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
330>(
331    patches: &Patches,
332    fill_value: &Scalar,
333) -> Canonical {
334    let (primitive_fill, validity) = if fill_value.is_null() {
335        (T::default(), Validity::AllInvalid)
336    } else {
337        (
338            fill_value
339                .try_into()
340                .vortex_expect("Fill value must convert to target T"),
341            if patches.dtype().nullability() == Nullability::NonNullable {
342                Validity::NonNullable
343            } else {
344                Validity::AllValid
345            },
346        )
347    };
348
349    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
350
351    Canonical::Primitive(parray.patch(patches))
352}
353
354fn canonicalize_sparse_struct(
355    struct_fields: &StructFields,
356    fill_struct: StructScalar,
357    dtype: &DType,
358    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
359    unresolved_patches: &Patches,
360    len: usize,
361) -> Canonical {
362    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
363        Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
364        None => (
365            struct_fields
366                .fields()
367                .map(Scalar::default_value)
368                .collect::<Vec<_>>(),
369            Validity::AllInvalid,
370        ),
371    };
372    let patch_values_as_struct = unresolved_patches.values().to_struct();
373    let columns_patch_values = patch_values_as_struct.fields();
374    let names = patch_values_as_struct.names();
375    let validity = if dtype.is_nullable() {
376        top_level_fill_validity.patch(
377            len,
378            unresolved_patches.offset(),
379            unresolved_patches.indices(),
380            &Validity::from_mask(
381                unresolved_patches.values().validity_mask(),
382                Nullability::Nullable,
383            ),
384        )
385    } else {
386        top_level_fill_validity
387            .into_non_nullable(len)
388            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
389    };
390
391    StructArray::try_from_iter_with_validity(
392        names.iter().zip_eq(
393            columns_patch_values
394                .iter()
395                .cloned()
396                .zip_eq(fill_values)
397                .map(|(patch_values, fill_value)| unsafe {
398                    SparseArray::new_unchecked(
399                        unresolved_patches
400                            .clone()
401                            .map_values(|_| Ok(patch_values))
402                            .vortex_expect("Replacing patch values"),
403                        fill_value,
404                    )
405                }),
406        ),
407        validity,
408    )
409    .map(Canonical::Struct)
410    .vortex_expect("Creating struct array")
411}
412
413fn canonicalize_sparse_decimal<D: NativeDecimalType>(
414    decimal_dtype: DecimalDType,
415    nullability: Nullability,
416    fill_value: DecimalScalar,
417    patches: &Patches,
418    len: usize,
419) -> Canonical {
420    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
421    match fill_value.decimal_value() {
422        Some(fill_value) => {
423            let fill_value = fill_value
424                .cast::<D>()
425                .vortex_expect("unexpected value type");
426            for _ in 0..len {
427                builder.append_value(fill_value)
428            }
429        }
430        None => {
431            builder.append_nulls(len);
432        }
433    }
434    let filled_array = builder.finish_into_decimal();
435    let array = filled_array.patch(patches);
436    Canonical::Decimal(array)
437}
438
439fn canonicalize_varbin(
440    array: &SparseArray,
441    dtype: DType,
442    fill_value: Option<ByteBuffer>,
443) -> Canonical {
444    let patches = array.resolved_patches();
445    let indices = patches.indices().to_primitive();
446    let values = patches.values().to_varbinview();
447    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
448    let len = array.len();
449
450    match_each_integer_ptype!(indices.ptype(), |I| {
451        let indices = indices.buffer::<I>();
452        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
453    })
454}
455
456fn canonicalize_varbin_inner<I: IntegerPType>(
457    fill_value: Option<ByteBuffer>,
458    indices: Buffer<I>,
459    values: VarBinViewArray,
460    dtype: DType,
461    validity: Validity,
462    len: usize,
463) -> Canonical {
464    assert_eq!(dtype.nullability(), validity.nullability());
465
466    let n_patch_buffers = values.buffers().len();
467    let mut buffers = values.buffers().to_vec();
468
469    let fill = if let Some(buffer) = &fill_value {
470        buffers.push(buffer.clone());
471        BinaryView::make_view(
472            buffer.as_ref(),
473            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
474            0,
475        )
476    } else {
477        // any <=12 character value will do
478        BinaryView::make_view(&[], 0, 0)
479    };
480
481    let mut views = buffer_mut![fill; len];
482    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
483        let patch_index_usize = <usize as NumCast>::from(patch_index)
484            .vortex_expect("var bin view indices must fit in usize");
485        views[patch_index_usize] = patch;
486    }
487
488    // SAFETY: views are constructed to maintain the invariants
489    let array = unsafe {
490        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
491    };
492
493    Canonical::VarBinView(array)
494}
495
496#[cfg(test)]
497mod test {
498    use std::sync::Arc;
499
500    use rstest::rstest;
501    use vortex_array::arrays::{
502        BoolArray, BooleanBufferBuilder, DecimalArray, FixedSizeListArray, ListArray,
503        ListViewArray, PrimitiveArray, StructArray, VarBinArray, VarBinViewArray,
504    };
505    use vortex_array::arrow::IntoArrowArray as _;
506    use vortex_array::validity::Validity;
507    use vortex_array::vtable::ValidityHelper;
508    use vortex_array::{IntoArray, ToCanonical};
509    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
510    use vortex_dtype::Nullability::{NonNullable, Nullable};
511    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
512    use vortex_mask::Mask;
513    use vortex_scalar::{DecimalValue, Scalar};
514
515    use crate::SparseArray;
516
517    #[rstest]
518    #[case(Some(true))]
519    #[case(Some(false))]
520    #[case(None)]
521    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
522        let indices = buffer![0u64, 1, 7].into_array();
523        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
524            .into_array();
525        let sparse_bools =
526            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
527        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
528
529        let flat_bools = sparse_bools.to_bool();
530        let expected = bool_array_from_nullable_vec(
531            vec![
532                Some(true),
533                None,
534                fill_value,
535                fill_value,
536                fill_value,
537                fill_value,
538                fill_value,
539                Some(false),
540                fill_value,
541                fill_value,
542            ],
543            fill_value,
544        );
545
546        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
547        assert_eq!(flat_bools.validity(), expected.validity());
548
549        assert!(flat_bools.boolean_buffer().value(0));
550        assert!(flat_bools.validity().is_valid(0));
551        assert_eq!(
552            flat_bools.boolean_buffer().value(1),
553            fill_value.unwrap_or_default()
554        );
555        assert!(!flat_bools.validity().is_valid(1));
556        assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
557        assert!(!flat_bools.boolean_buffer().value(7));
558        assert!(flat_bools.validity().is_valid(7));
559    }
560
561    fn bool_array_from_nullable_vec(
562        bools: Vec<Option<bool>>,
563        fill_value: Option<bool>,
564    ) -> BoolArray {
565        let mut buffer = BooleanBufferBuilder::new(bools.len());
566        let mut validity = BooleanBufferBuilder::new(bools.len());
567        for maybe_bool in bools {
568            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
569            validity.append(maybe_bool.is_some());
570        }
571        BoolArray::from_bool_buffer(buffer.finish(), Validity::from(validity.finish()))
572    }
573
574    #[rstest]
575    #[case(Some(0i32))]
576    #[case(Some(-1i32))]
577    #[case(None)]
578    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
579        let indices = buffer![0u64, 1, 7].into_array();
580        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
581        let sparse_ints =
582            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
583        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
584
585        let flat_ints = sparse_ints.to_primitive();
586        let expected = PrimitiveArray::from_option_iter([
587            Some(0i32),
588            None,
589            fill_value,
590            fill_value,
591            fill_value,
592            fill_value,
593            fill_value,
594            Some(1),
595            fill_value,
596            fill_value,
597        ]);
598
599        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
600        assert_eq!(flat_ints.validity(), expected.validity());
601
602        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
603        assert!(flat_ints.validity().is_valid(0));
604        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
605        assert!(!flat_ints.validity().is_valid(1));
606        assert_eq!(
607            flat_ints.as_slice::<i32>()[2],
608            fill_value.unwrap_or_default()
609        );
610        assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
611        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
612        assert!(flat_ints.validity().is_valid(7));
613    }
614
615    #[test]
616    fn test_sparse_struct_valid_fill() {
617        let field_names = FieldNames::from_iter(["a", "b"]);
618        let field_types = vec![
619            DType::Primitive(PType::I32, Nullable),
620            DType::Primitive(PType::I32, Nullable),
621        ];
622        let struct_fields = StructFields::new(field_names, field_types);
623        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
624
625        let indices = buffer![0u64, 1, 7, 8].into_array();
626        let patch_values_a =
627            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
628        let patch_values_b =
629            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
630        let patch_values = StructArray::try_new_with_dtype(
631            vec![patch_values_a, patch_values_b],
632            struct_fields.clone(),
633            4,
634            Validity::Array(
635                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
636            ),
637        )
638        .unwrap()
639        .into_array();
640
641        let fill_scalar = Scalar::struct_(
642            struct_dtype,
643            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
644        );
645        let len = 10;
646        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
647
648        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
649            if i == 0 {
650                Some(10)
651            } else if i == 1 {
652                None
653            } else if i == 7 {
654                Some(20)
655            } else {
656                Some(-10)
657            }
658        }));
659        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
660            if i == 0 {
661                Some(1i32)
662            } else if i == 1 {
663                Some(2)
664            } else if i == 7 {
665                None
666            } else {
667                Some(-1)
668            }
669        }));
670
671        let expected = StructArray::try_new_with_dtype(
672            vec![expected_a.into_array(), expected_b.into_array()],
673            struct_fields,
674            len,
675            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
676            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
677        )
678        .unwrap()
679        .to_array()
680        .into_arrow_preferred()
681        .unwrap();
682
683        let actual = sparse_struct
684            .to_struct()
685            .to_array()
686            .into_arrow_preferred()
687            .unwrap();
688
689        assert_eq!(expected.data_type(), actual.data_type());
690        assert_eq!(&expected, &actual);
691    }
692
693    #[test]
694    fn test_sparse_struct_invalid_fill() {
695        let field_names = FieldNames::from_iter(["a", "b"]);
696        let field_types = vec![
697            DType::Primitive(PType::I32, Nullable),
698            DType::Primitive(PType::I32, Nullable),
699        ];
700        let struct_fields = StructFields::new(field_names, field_types);
701        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
702
703        let indices = buffer![0u64, 1, 7, 8].into_array();
704        let patch_values_a =
705            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
706        let patch_values_b =
707            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
708        let patch_values = StructArray::try_new_with_dtype(
709            vec![patch_values_a, patch_values_b],
710            struct_fields.clone(),
711            4,
712            Validity::Array(
713                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
714            ),
715        )
716        .unwrap()
717        .into_array();
718
719        let fill_scalar = Scalar::null(struct_dtype);
720        let len = 10;
721        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
722
723        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
724            if i == 0 {
725                Some(10)
726            } else if i == 1 {
727                None
728            } else if i == 7 {
729                Some(20)
730            } else {
731                Some(-10)
732            }
733        }));
734        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
735            if i == 0 {
736                Some(1i32)
737            } else if i == 1 {
738                Some(2)
739            } else if i == 7 {
740                None
741            } else {
742                Some(-1)
743            }
744        }));
745
746        let expected = StructArray::try_new_with_dtype(
747            vec![expected_a.into_array(), expected_b.into_array()],
748            struct_fields,
749            len,
750            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
751            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
752        )
753        .unwrap()
754        .to_array()
755        .into_arrow_preferred()
756        .unwrap();
757
758        let actual = sparse_struct
759            .to_struct()
760            .to_array()
761            .into_arrow_preferred()
762            .unwrap();
763
764        assert_eq!(expected.data_type(), actual.data_type());
765        assert_eq!(&expected, &actual);
766    }
767
768    #[test]
769    fn test_sparse_decimal() {
770        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
771        let decimal_dtype = DecimalDType::new(3, 2);
772        let patch_values = DecimalArray::new(
773            buffer![100i128, 200i128, 300i128, 4000i128],
774            decimal_dtype,
775            Validity::from_iter([true, true, true, false]),
776        )
777        .to_array();
778        let len = 10;
779        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
780        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
781
782        let expected = DecimalArray::new(
783            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
784            decimal_dtype,
785            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
786            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
787        )
788        .to_array()
789        .into_arrow_preferred()
790        .unwrap();
791
792        let actual = sparse_struct
793            .to_decimal()
794            .to_array()
795            .into_arrow_preferred()
796            .unwrap();
797
798        assert_eq!(expected.data_type(), actual.data_type());
799        assert_eq!(&expected, &actual);
800    }
801
802    #[test]
803    fn test_sparse_utf8_varbinview_non_null_fill() {
804        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
805            Some("hello"),
806            Some("goodbye"),
807            Some("hello"),
808            None,
809            Some("bonjour"),
810            Some("你好"),
811            None,
812        ])
813        .into_array();
814
815        let array = SparseArray::try_new(
816            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
817            strings,
818            12,
819            Scalar::from(Some("123".to_owned())),
820        )
821        .unwrap();
822
823        let actual = array.to_varbinview().into_array();
824        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
825            Some("hello"),
826            Some("123"),
827            Some("123"),
828            Some("goodbye"),
829            Some("hello"),
830            None,
831            Some("123"),
832            Some("bonjour"),
833            Some("123"),
834            Some("你好"),
835            None,
836            Some("123"),
837        ])
838        .into_array();
839
840        let actual = actual.into_arrow_preferred().unwrap();
841        let expected = expected.into_arrow_preferred().unwrap();
842
843        assert_eq!(actual.data_type(), expected.data_type());
844        assert_eq!(&actual, &expected);
845    }
846
847    #[test]
848    fn test_sparse_utf8_varbinview_null_fill() {
849        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
850            Some("hello"),
851            Some("goodbye"),
852            Some("hello"),
853            None,
854            Some("bonjour"),
855            Some("你好"),
856            None,
857        ])
858        .into_array();
859
860        let array = SparseArray::try_new(
861            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
862            strings,
863            12,
864            Scalar::null(DType::Utf8(Nullable)),
865        )
866        .unwrap();
867
868        let actual = array.to_varbinview().into_array();
869        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
870            Some("hello"),
871            None,
872            None,
873            Some("goodbye"),
874            Some("hello"),
875            None,
876            None,
877            Some("bonjour"),
878            None,
879            Some("你好"),
880            None,
881            None,
882        ])
883        .into_array();
884
885        let actual = actual.into_arrow_preferred().unwrap();
886        let expected = expected.into_arrow_preferred().unwrap();
887
888        assert_eq!(actual.data_type(), expected.data_type());
889        assert_eq!(&actual, &expected);
890    }
891
892    #[test]
893    fn test_sparse_utf8_varbinview_non_nullable() {
894        let strings =
895            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
896                .into_array();
897
898        let array = SparseArray::try_new(
899            buffer![0u16, 3, 4, 5, 8].into_array(),
900            strings,
901            9,
902            Scalar::from("123".to_owned()),
903        )
904        .unwrap();
905
906        let actual = array.to_varbinview().into_array();
907        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
908            Some("hello"),
909            Some("123"),
910            Some("123"),
911            Some("goodbye"),
912            Some("hello"),
913            Some("bonjour"),
914            Some("123"),
915            Some("123"),
916            Some("你好"),
917        ])
918        .into_array();
919
920        let actual = actual.into_arrow_preferred().unwrap();
921        let expected = expected.into_arrow_preferred().unwrap();
922
923        assert_eq!(actual.data_type(), expected.data_type());
924        assert_eq!(&actual, &expected);
925    }
926
927    #[test]
928    fn test_sparse_utf8_varbin_null_fill() {
929        let strings = <VarBinArray as FromIterator<_>>::from_iter([
930            Some("hello"),
931            Some("goodbye"),
932            Some("hello"),
933            None,
934            Some("bonjour"),
935            Some("你好"),
936            None,
937        ])
938        .into_array();
939
940        let array = SparseArray::try_new(
941            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
942            strings,
943            12,
944            Scalar::null(DType::Utf8(Nullable)),
945        )
946        .unwrap();
947
948        let actual = array.to_varbinview().into_array();
949        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
950            Some("hello"),
951            None,
952            None,
953            Some("goodbye"),
954            Some("hello"),
955            None,
956            None,
957            Some("bonjour"),
958            None,
959            Some("你好"),
960            None,
961            None,
962        ])
963        .into_array();
964
965        let actual = actual.into_arrow_preferred().unwrap();
966        let expected = expected.into_arrow_preferred().unwrap();
967
968        assert_eq!(actual.data_type(), expected.data_type());
969        assert_eq!(&actual, &expected);
970    }
971
972    #[test]
973    fn test_sparse_binary_varbinview_non_null_fill() {
974        let binaries = VarBinViewArray::from_iter_nullable_bin([
975            Some(b"hello" as &[u8]),
976            Some(b"goodbye"),
977            Some(b"hello"),
978            None,
979            Some(b"\x00"),
980            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
981            None,
982        ])
983        .into_array();
984
985        let array = SparseArray::try_new(
986            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
987            binaries,
988            12,
989            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
990        )
991        .unwrap();
992
993        let actual = array.to_varbinview().into_array();
994        let expected = VarBinViewArray::from_iter_nullable_bin([
995            Some(b"hello" as &[u8]),
996            Some(b"123"),
997            Some(b"123"),
998            Some(b"goodbye"),
999            Some(b"hello"),
1000            None,
1001            Some(b"123"),
1002            Some(b"\x00"),
1003            Some(b"123"),
1004            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1005            None,
1006            Some(b"123"),
1007        ])
1008        .into_array();
1009
1010        let actual = actual.into_arrow_preferred().unwrap();
1011        let expected = expected.into_arrow_preferred().unwrap();
1012
1013        assert_eq!(actual.data_type(), expected.data_type());
1014        assert_eq!(&actual, &expected);
1015    }
1016
1017    #[test]
1018    fn test_sparse_list_null_fill() {
1019        // Use ListViewArray consistently
1020        let elements = buffer![1i32, 2, 1, 2].into_array();
1021        // Create ListView with offsets and sizes
1022        // List 0: [1] at offset 0, size 1
1023        // List 1: [2] at offset 1, size 1
1024        // List 2: [1] at offset 2, size 1
1025        // List 3: [2] at offset 3, size 1
1026        let offsets = buffer![0u32, 1, 2, 3].into_array();
1027        let sizes = buffer![1u32, 1, 1, 1].into_array();
1028        let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1029            .unwrap()
1030            .into_array();
1031
1032        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1033        let fill_value = Scalar::null(lists.dtype().clone());
1034        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1035            .unwrap()
1036            .into_array();
1037
1038        let actual = sparse.to_canonical().into_array();
1039        let result_listview = actual.to_listview();
1040
1041        // Check the structure
1042        assert_eq!(result_listview.len(), 6);
1043
1044        // Verify sizes: positions 0,3,4,5 have data, positions 1,2 are null
1045        assert_eq!(result_listview.size_at(0), 1); // [1]
1046        assert_eq!(result_listview.size_at(1), 0); // null
1047        assert_eq!(result_listview.size_at(2), 0); // null
1048        assert_eq!(result_listview.size_at(3), 1); // [2]
1049        assert_eq!(result_listview.size_at(4), 1); // [1]
1050        assert_eq!(result_listview.size_at(5), 1); // [2]
1051
1052        // Verify actual values
1053        let elements_array = result_listview.elements().to_primitive();
1054        let elements_slice = elements_array.as_slice::<i32>();
1055
1056        let list0_offset = result_listview.offset_at(0);
1057        assert_eq!(elements_slice[list0_offset], 1);
1058
1059        let list3_offset = result_listview.offset_at(3);
1060        assert_eq!(elements_slice[list3_offset], 2);
1061
1062        let list4_offset = result_listview.offset_at(4);
1063        assert_eq!(elements_slice[list4_offset], 1);
1064
1065        let list5_offset = result_listview.offset_at(5);
1066        assert_eq!(elements_slice[list5_offset], 2);
1067    }
1068
1069    #[test]
1070    fn test_sparse_list_null_fill_sliced_sparse_values() {
1071        // Create ListViewArray with 8 elements forming 8 single-element lists
1072        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1073        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
1074        let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
1075        let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1076            .unwrap()
1077            .into_array();
1078
1079        // Slice to get lists 2..6, which are: [1], [2], [1], [2]
1080        let lists = lists.slice(2..6);
1081
1082        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1083        let fill_value = Scalar::null(lists.dtype().clone());
1084        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1085            .unwrap()
1086            .into_array();
1087
1088        let actual = sparse.to_canonical().into_array();
1089        let result_listview = actual.to_listview();
1090
1091        // Check the structure
1092        assert_eq!(result_listview.len(), 6);
1093
1094        // Verify sizes: positions 0,3,4,5 have data (from the sliced lists), positions 1,2 are null
1095        assert_eq!(result_listview.size_at(0), 1); // [1] - from slice index 0 (original index 2)
1096        assert_eq!(result_listview.size_at(1), 0); // null
1097        assert_eq!(result_listview.size_at(2), 0); // null
1098        assert_eq!(result_listview.size_at(3), 1); // [2] - from slice index 3 (original index 5)
1099        assert_eq!(result_listview.size_at(4), 1); // [1] - extra element beyond original slice
1100        assert_eq!(result_listview.size_at(5), 1); // [2] - extra element beyond original slice
1101
1102        // Verify actual values
1103        let elements_array = result_listview.elements().to_primitive();
1104        let elements_slice = elements_array.as_slice::<i32>();
1105
1106        let list0_offset = result_listview.offset_at(0);
1107        assert_eq!(elements_slice[list0_offset], 1);
1108
1109        let list3_offset = result_listview.offset_at(3);
1110        assert_eq!(elements_slice[list3_offset], 2);
1111    }
1112
1113    #[test]
1114    fn test_sparse_list_non_null_fill() {
1115        // Create ListViewArray with 4 single-element lists
1116        let elements = buffer![1i32, 2, 1, 2].into_array();
1117        let offsets = buffer![0u32, 1, 2, 3].into_array();
1118        let sizes = buffer![1u32, 1, 1, 1].into_array();
1119        let lists = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1120            .unwrap()
1121            .into_array();
1122
1123        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1124        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1125        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1126            .unwrap()
1127            .into_array();
1128
1129        let actual = sparse.to_canonical().into_array();
1130        let result_listview = actual.to_listview();
1131
1132        // Check the structure
1133        assert_eq!(result_listview.len(), 6);
1134
1135        // Verify sizes: positions 0,3,4,5 have sparse data, positions 1,2 have fill values
1136        assert_eq!(result_listview.size_at(0), 1); // [1] from sparse
1137        assert_eq!(result_listview.size_at(1), 4); // [5,6,7,8] fill value
1138        assert_eq!(result_listview.size_at(2), 4); // [5,6,7,8] fill value
1139        assert_eq!(result_listview.size_at(3), 1); // [2] from sparse
1140        assert_eq!(result_listview.size_at(4), 1); // [1] from sparse
1141        assert_eq!(result_listview.size_at(5), 1); // [2] from sparse
1142
1143        // Verify actual values
1144        let elements_array = result_listview.elements().to_primitive();
1145        let elements_slice = elements_array.as_slice::<i32>();
1146
1147        // List 0: [1]
1148        let list0_offset = result_listview.offset_at(0) as usize;
1149        assert_eq!(elements_slice[list0_offset], 1);
1150
1151        // List 1: [5,6,7,8]
1152        let list1_offset = result_listview.offset_at(1) as usize;
1153        let list1_size = result_listview.size_at(1) as usize;
1154        assert_eq!(
1155            &elements_slice[list1_offset..list1_offset + list1_size],
1156            &[5, 6, 7, 8]
1157        );
1158
1159        // List 2: [5,6,7,8]
1160        let list2_offset = result_listview.offset_at(2) as usize;
1161        let list2_size = result_listview.size_at(2) as usize;
1162        assert_eq!(
1163            &elements_slice[list2_offset..list2_offset + list2_size],
1164            &[5, 6, 7, 8]
1165        );
1166
1167        // List 3: [2]
1168        let list3_offset = result_listview.offset_at(3) as usize;
1169        assert_eq!(elements_slice[list3_offset], 2);
1170
1171        // List 4: [1]
1172        let list4_offset = result_listview.offset_at(4) as usize;
1173        assert_eq!(elements_slice[list4_offset], 1);
1174
1175        // List 5: [2]
1176        let list5_offset = result_listview.offset_at(5) as usize;
1177        assert_eq!(elements_slice[list5_offset], 2);
1178    }
1179
1180    #[test]
1181    fn test_sparse_binary_varbin_null_fill() {
1182        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1183            Some(b"hello" as &[u8]),
1184            Some(b"goodbye"),
1185            Some(b"hello"),
1186            None,
1187            Some(b"\x00"),
1188            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1189            None,
1190        ])
1191        .into_array();
1192
1193        let array = SparseArray::try_new(
1194            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1195            strings,
1196            12,
1197            Scalar::null(DType::Binary(Nullable)),
1198        )
1199        .unwrap();
1200
1201        let actual = array.to_varbinview().into_array();
1202        let expected = VarBinViewArray::from_iter_nullable_bin([
1203            Some(b"hello" as &[u8]),
1204            None,
1205            None,
1206            Some(b"goodbye"),
1207            Some(b"hello"),
1208            None,
1209            None,
1210            Some(b"\x00"),
1211            None,
1212            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1213            None,
1214            None,
1215        ])
1216        .into_array();
1217        let actual = actual.into_arrow_preferred().unwrap();
1218        let expected = expected.into_arrow_preferred().unwrap();
1219
1220        assert_eq!(actual.data_type(), expected.data_type());
1221        assert_eq!(&actual, &expected);
1222    }
1223
1224    #[test]
1225    fn test_sparse_fixed_size_list_null_fill() {
1226        // Create a FixedSizeListArray with 3 lists of size 3.
1227        let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1228        let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1229            .unwrap()
1230            .into_array();
1231
1232        let indices = buffer![0u8, 2u8, 3u8].into_array();
1233        let fill_value = Scalar::null(DType::FixedSizeList(
1234            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1235            3,
1236            Nullable,
1237        ));
1238        let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1239            .unwrap()
1240            .into_array();
1241
1242        let actual = sparse.to_canonical().into_array();
1243
1244        // Expected: [1,2,3], null, [4,5,6], [7,8,9], null.
1245        let expected_elements =
1246            buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1247        let expected = FixedSizeListArray::try_new(
1248            expected_elements,
1249            3,
1250            Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1251            5,
1252        )
1253        .unwrap()
1254        .into_array();
1255
1256        let actual = actual.into_arrow_preferred().unwrap();
1257        let expected = expected.into_arrow_preferred().unwrap();
1258
1259        assert_eq!(actual.data_type(), expected.data_type());
1260        assert_eq!(&actual, &expected);
1261    }
1262
1263    #[test]
1264    fn test_sparse_fixed_size_list_non_null_fill() {
1265        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1266        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1267            .unwrap()
1268            .into_array();
1269
1270        let indices = buffer![0u8, 2u8, 4u8].into_array();
1271        let fill_value = Scalar::fixed_size_list(
1272            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1273            vec![
1274                Scalar::primitive(99i32, NonNullable),
1275                Scalar::primitive(88i32, NonNullable),
1276            ],
1277            NonNullable,
1278        );
1279        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1280            .unwrap()
1281            .into_array();
1282
1283        let actual = sparse.to_canonical().into_array();
1284
1285        // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88].
1286        let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1287        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1288            .unwrap()
1289            .into_array();
1290
1291        let actual = actual.into_arrow_preferred().unwrap();
1292        let expected = expected.into_arrow_preferred().unwrap();
1293
1294        assert_eq!(actual.data_type(), expected.data_type());
1295        assert_eq!(&actual, &expected);
1296    }
1297
1298    #[test]
1299    fn test_sparse_fixed_size_list_with_validity() {
1300        // Create FSL values with some nulls.
1301        let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1302        let fsl = FixedSizeListArray::try_new(
1303            elements,
1304            2,
1305            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1306            3,
1307        )
1308        .unwrap()
1309        .into_array();
1310
1311        let indices = buffer![1u16, 3u16, 4u16].into_array();
1312        let fill_value = Scalar::fixed_size_list(
1313            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1314            vec![
1315                Scalar::primitive(7i32, NonNullable),
1316                Scalar::primitive(8i32, NonNullable),
1317            ],
1318            Nullable,
1319        );
1320        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1321            .unwrap()
1322            .into_array();
1323
1324        let actual = sparse.to_canonical().into_array();
1325
1326        // Expected validity: [true, true, true, false, true, true].
1327        // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8].
1328        let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1329        let expected = FixedSizeListArray::try_new(
1330            expected_elements,
1331            2,
1332            Validity::Array(
1333                BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1334            ),
1335            6,
1336        )
1337        .unwrap()
1338        .into_array();
1339
1340        let actual = actual.into_arrow_preferred().unwrap();
1341        let expected = expected.into_arrow_preferred().unwrap();
1342
1343        assert_eq!(actual.data_type(), expected.data_type());
1344        assert_eq!(&actual, &expected);
1345    }
1346
1347    #[test]
1348    fn test_sparse_fixed_size_list_truly_sparse() {
1349        // Test with a truly sparse array where most values are the fill value.
1350        // This demonstrates the compression benefit of sparse encoding.
1351
1352        // Create patch values: only 3 distinct lists out of 100 total positions.
1353        let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1354        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1355            .unwrap()
1356            .into_array();
1357
1358        // Patches at positions 5, 50, and 95 out of 100.
1359        let indices = buffer![5u32, 50, 95].into_array();
1360
1361        // Fill value [99, 99] will appear 97 times but stored only once.
1362        let fill_value = Scalar::fixed_size_list(
1363            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1364            vec![
1365                Scalar::primitive(99i32, NonNullable),
1366                Scalar::primitive(99i32, NonNullable),
1367            ],
1368            NonNullable,
1369        );
1370
1371        let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1372            .unwrap()
1373            .into_array();
1374
1375        let actual = sparse.to_canonical().into_array();
1376
1377        // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95.
1378        let mut expected_elements_vec = Vec::with_capacity(200);
1379        // Positions 0-4: fill values
1380        for _ in 0..5 {
1381            expected_elements_vec.extend([99i32, 99]);
1382        }
1383        // Position 5: first patch [10, 11]
1384        expected_elements_vec.extend([10, 11]);
1385        // Positions 6-49: fill values
1386        for _ in 6..50 {
1387            expected_elements_vec.extend([99, 99]);
1388        }
1389        // Position 50: second patch [20, 21]
1390        expected_elements_vec.extend([20, 21]);
1391        // Positions 51-94: fill values
1392        for _ in 51..95 {
1393            expected_elements_vec.extend([99, 99]);
1394        }
1395        // Position 95: third patch [30, 31]
1396        expected_elements_vec.extend([30, 31]);
1397        // Positions 96-99: fill values
1398        for _ in 96..100 {
1399            expected_elements_vec.extend([99, 99]);
1400        }
1401        let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1402        let expected =
1403            FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1404                .unwrap()
1405                .into_array();
1406
1407        let actual = actual.into_arrow_preferred().unwrap();
1408        let expected = expected.into_arrow_preferred().unwrap();
1409
1410        assert_eq!(actual.data_type(), expected.data_type());
1411        assert_eq!(&actual, &expected);
1412    }
1413
1414    #[test]
1415    fn test_sparse_fixed_size_list_single_element() {
1416        // Test with a single element FSL array.
1417        let elements = buffer![42i32, 43].into_array();
1418        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1419            .unwrap()
1420            .into_array();
1421
1422        let indices = buffer![0u32].into_array();
1423        let fill_value = Scalar::fixed_size_list(
1424            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1425            vec![
1426                Scalar::primitive(1i32, NonNullable),
1427                Scalar::primitive(2i32, NonNullable),
1428            ],
1429            NonNullable,
1430        );
1431        let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1432            .unwrap()
1433            .into_array();
1434
1435        let actual = sparse.to_canonical().into_array();
1436
1437        // Expected: just [42, 43].
1438        let expected_elements = buffer![42i32, 43].into_array();
1439        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1440            .unwrap()
1441            .into_array();
1442
1443        let actual = actual.into_arrow_preferred().unwrap();
1444        let expected = expected.into_arrow_preferred().unwrap();
1445
1446        assert_eq!(actual.data_type(), expected.data_type());
1447        assert_eq!(&actual, &expected);
1448    }
1449
1450    #[test]
1451    fn test_sparse_list_grows_offset_type() {
1452        let elements = buffer![1i32, 2, 1, 2].into_array();
1453        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1454        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1455            .unwrap()
1456            .into_array();
1457
1458        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1459        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1460        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1461            .unwrap()
1462            .into_array();
1463
1464        let actual = sparse.to_canonical().into_array();
1465        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1466        expected_elements.extend(buffer![42i32; 252]);
1467        let expected = ListArray::try_new(
1468            expected_elements.freeze().into_array(),
1469            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1470            Validity::AllValid,
1471        )
1472        .unwrap()
1473        .into_array();
1474
1475        assert_eq!(
1476            actual.to_listview().offsets().dtype(),
1477            &DType::Primitive(PType::U16, NonNullable)
1478        );
1479
1480        // Note that the preferred arrow list representation is `List` (not `ListView`).
1481        let arrow_dtype = expected.dtype().to_arrow_dtype().unwrap();
1482        let actual = actual.into_arrow(&arrow_dtype).unwrap();
1483        let expected = expected.into_arrow(&arrow_dtype).unwrap();
1484
1485        assert_eq!(actual.data_type(), expected.data_type());
1486        // TODO(connor): Equality not implemented for Arrow's `ListView` yet.
1487        // assert_eq!(&actual, &expected);
1488    }
1489
1490    #[test]
1491    fn test_sparse_listview_null_fill_with_gaps() {
1492        // This test specifically catches the bug where the old implementation
1493        // incorrectly tracked `last_valid_offset` as the START of the last list
1494        // instead of properly handling ListView's offset/size pairs.
1495
1496        // Create a ListViewArray with non-trivial offsets and sizes.
1497        // Elements: [10, 11, 12, 20, 21, 30, 31, 32, 33]
1498        // List 0: elements[0..3] = [10, 11, 12]
1499        // List 1: elements[3..5] = [20, 21]
1500        // List 2: elements[5..9] = [30, 31, 32, 33]
1501        let elements = buffer![10i32, 11, 12, 20, 21, 30, 31, 32, 33].into_array();
1502        let offsets = buffer![0u32, 3, 5].into_array();
1503        let sizes = buffer![3u32, 2, 4].into_array();
1504
1505        let list_view =
1506            ListViewArray::try_new(elements.clone(), offsets, sizes, Validity::AllValid).unwrap();
1507
1508        let list_dtype = list_view.dtype().clone();
1509
1510        // Create sparse array with indices [1, 4, 7] and length 10
1511        // This means we have:
1512        // - Index 0: null
1513        // - Index 1: List 0 [10, 11, 12]
1514        // - Index 2-3: null
1515        // - Index 4: List 1 [20, 21]
1516        // - Index 5-6: null
1517        // - Index 7: List 2 [30, 31, 32, 33]
1518        // - Index 8-9: null
1519        let indices = buffer![1u8, 4, 7].into_array();
1520        let sparse = SparseArray::try_new(
1521            indices,
1522            list_view.into_array(),
1523            10,
1524            Scalar::null(list_dtype),
1525        )
1526        .unwrap();
1527
1528        // Convert to canonical form - this triggers the function we're testing
1529        let canonical = sparse.to_canonical().into_array();
1530        let result_listview = canonical.to_listview();
1531
1532        // Verify the structure
1533        assert_eq!(result_listview.len(), 10);
1534
1535        // Helper to get list values at an index
1536        let get_list_values = |idx: usize| -> Vec<i32> {
1537            let offset = result_listview.offset_at(idx);
1538            let size = result_listview.size_at(idx);
1539            if size == 0 {
1540                vec![] // null/empty list
1541            } else {
1542                let elements = result_listview.elements().to_primitive();
1543                let slice = elements.as_slice::<i32>();
1544                slice[offset..offset + size].to_vec()
1545            }
1546        };
1547
1548        let empty: Vec<i32> = vec![];
1549
1550        // Verify all list values
1551        assert_eq!(get_list_values(0), empty); // null
1552        assert_eq!(get_list_values(1), vec![10, 11, 12]); // sparse index 0
1553        assert_eq!(get_list_values(2), empty); // null
1554        assert_eq!(get_list_values(3), empty); // null
1555        assert_eq!(get_list_values(4), vec![20, 21]); // sparse index 1
1556        assert_eq!(get_list_values(5), empty); // null
1557        assert_eq!(get_list_values(6), empty); // null
1558        assert_eq!(get_list_values(7), vec![30, 31, 32, 33]); // sparse index 2
1559        assert_eq!(get_list_values(8), empty); // null
1560        assert_eq!(get_list_values(9), empty); // null
1561    }
1562
1563    #[test]
1564    fn test_sparse_listview_sliced_values_null_fill() {
1565        // This test uses sliced ListView values to ensure proper handling
1566        // of non-zero starting offsets in the source data.
1567
1568        // Create a larger ListViewArray and then slice it
1569        // Original elements: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1570        let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1571
1572        // Create 5 lists with different offsets and sizes
1573        // List 0: [0, 1] at offset 0
1574        // List 1: [2, 3, 4] at offset 2
1575        // List 2: [5] at offset 5
1576        // List 3: [6, 7] at offset 6
1577        // List 4: [8, 9] at offset 8
1578        let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
1579        let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
1580
1581        let full_listview = ListViewArray::try_new(elements, offsets, sizes, Validity::AllValid)
1582            .unwrap()
1583            .into_array();
1584
1585        // Slice to get lists 1, 2, 3 (indices 1..4)
1586        // This gives us lists with elements:
1587        // - Index 0: [2, 3, 4] (original list 1)
1588        // - Index 1: [5] (original list 2)
1589        // - Index 2: [6, 7] (original list 3)
1590        let sliced = full_listview.slice(1..4);
1591
1592        // Create sparse array with indices [0, 1] and length 5
1593        // Expected result:
1594        // - Index 0: [2, 3, 4] (from sliced[0])
1595        // - Index 1: [5] (from sliced[1])
1596        // - Index 2: null
1597        // - Index 3: null
1598        // - Index 4: null
1599        let indices = buffer![0u8, 1].into_array();
1600        // Extract only the values we need from the sliced array
1601        let values = sliced.slice(0..2);
1602        let sparse =
1603            SparseArray::try_new(indices, values, 5, Scalar::null(sliced.dtype().clone())).unwrap();
1604
1605        let canonical = sparse.to_canonical().into_array();
1606        let result_listview = canonical.to_listview();
1607
1608        assert_eq!(result_listview.len(), 5);
1609
1610        // Helper to get list values at an index
1611        let get_list_values = |idx: usize| -> Vec<i32> {
1612            let offset = result_listview.offset_at(idx);
1613            let size = result_listview.size_at(idx);
1614            if size == 0 {
1615                vec![] // null/empty list
1616            } else {
1617                let elements = result_listview.elements().to_primitive();
1618                let slice = elements.as_slice::<i32>();
1619                slice[offset..offset + size].to_vec()
1620            }
1621        };
1622
1623        let empty: Vec<i32> = vec![];
1624
1625        // Verify all list values
1626        // Original slice had lists at indices 1,2,3 which were: [2,3,4], [5], [6,7]
1627        // We take indices 0 and 1 from the slice
1628        assert_eq!(get_list_values(0), vec![2, 3, 4]); // From slice index 0 (original list 1)
1629        assert_eq!(get_list_values(1), vec![5]); // From slice index 1 (original list 2)
1630        assert_eq!(get_list_values(2), empty); // null
1631        assert_eq!(get_list_values(3), empty); // null
1632        assert_eq!(get_list_values(4), empty); // null
1633
1634        // The bug in the old implementation would have incorrectly used
1635        // offsets[sparse_index] for filling nulls, which would be wrong
1636        // when dealing with sliced arrays that have non-zero starting offsets.
1637    }
1638}