vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BoolArray, ConstantArray, FixedSizeListArray, ListViewArray, NullArray, PrimitiveArray,
10    StructArray, VarBinViewArray,
11};
12use vortex_array::builders::{
13    ArrayBuilder, DecimalBuilder, ListViewBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
18use vortex_array::{Array, Canonical, ToCanonical};
19use vortex_buffer::{BitBuffer, Buffer, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, DecimalType, IntegerPType, NativeDecimalType, NativePType, Nullability,
22    StructFields, match_each_decimal_value_type, match_each_integer_ptype, match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect, vortex_panic};
25use vortex_scalar::{DecimalScalar, ListScalar, Scalar, StructScalar};
26use vortex_vector::binaryview::BinaryView;
27
28use crate::{SparseArray, SparseVTable};
29
30impl CanonicalVTable<SparseVTable> for SparseVTable {
31    fn canonicalize(array: &SparseArray) -> Canonical {
32        if array.patches().num_patches() == 0 {
33            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
34        }
35
36        match array.dtype() {
37            DType::Null => {
38                assert!(array.fill_scalar().is_null());
39                Canonical::Null(NullArray::new(array.len()))
40            }
41            DType::Bool(..) => {
42                let resolved_patches = array.resolved_patches();
43                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
44            }
45            DType::Primitive(ptype, ..) => {
46                let resolved_patches = array.resolved_patches();
47                match_each_native_ptype!(ptype, |P| {
48                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
49                })
50            }
51            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
52                struct_fields,
53                array.fill_scalar().as_struct(),
54                array.dtype(),
55                array.patches(),
56                array.len(),
57            ),
58            DType::Decimal(decimal_dtype, nullability) => {
59                let canonical_decimal_value_type =
60                    DecimalType::smallest_decimal_value_type(decimal_dtype);
61                let fill_value = array.fill_scalar().as_decimal();
62                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
63                    canonicalize_sparse_decimal::<D>(
64                        *decimal_dtype,
65                        *nullability,
66                        fill_value,
67                        array.patches(),
68                        array.len(),
69                    )
70                })
71            }
72            dtype @ DType::Utf8(..) => {
73                let fill_value = array.fill_scalar().as_utf8().value();
74                let fill_value = fill_value.map(BufferString::into_inner);
75                canonicalize_varbin(array, dtype.clone(), fill_value)
76            }
77            dtype @ DType::Binary(..) => {
78                let fill_value = array.fill_scalar().as_binary().value();
79                canonicalize_varbin(array, dtype.clone(), fill_value)
80            }
81            DType::List(values_dtype, nullability) => {
82                canonicalize_sparse_lists(array, values_dtype.clone(), *nullability)
83            }
84            DType::FixedSizeList(.., nullability) => {
85                canonicalize_sparse_fixed_size_list(array, *nullability)
86            }
87            DType::Extension(_ext_dtype) => todo!(),
88        }
89    }
90}
91
92#[allow(clippy::cognitive_complexity)]
93fn canonicalize_sparse_lists(
94    array: &SparseArray,
95    values_dtype: Arc<DType>,
96    nullability: Nullability,
97) -> Canonical {
98    // TODO(connor): We should move this to `vortex-dtype` so that we can use this elsewhere.
99    macro_rules! match_smallest_offset_type {
100        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
101            let n_elements = $n_elements;
102            if n_elements <= u8::MAX as usize {
103                type $offset_type = u8;
104                $body
105            } else if n_elements <= u16::MAX as usize {
106                type $offset_type = u16;
107                $body
108            } else if n_elements <= u32::MAX as usize {
109                type $offset_type = u32;
110                $body
111            } else {
112                assert!(u64::try_from(n_elements).is_ok());
113                type $offset_type = u64;
114                $body
115            }
116        }};
117    }
118
119    let resolved_patches = array.resolved_patches();
120
121    let indices = resolved_patches.indices().to_primitive();
122    let values = resolved_patches.values().to_listview();
123    let fill_value = array.fill_scalar().as_list();
124
125    let n_filled = array.len() - resolved_patches.num_patches();
126    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
127
128    let validity = Validity::from_mask(array.validity_mask(), nullability);
129
130    match_each_integer_ptype!(indices.ptype(), |I| {
131        match_smallest_offset_type!(total_canonical_values, |O| {
132            canonicalize_sparse_lists_inner::<I, O>(
133                indices.as_slice(),
134                values,
135                fill_value,
136                values_dtype,
137                array.len(),
138                total_canonical_values,
139                validity,
140            )
141        })
142    })
143}
144
145fn canonicalize_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
146    patch_indices: &[I],
147    patch_values: ListViewArray,
148    fill_value: ListScalar,
149    values_dtype: Arc<DType>,
150    len: usize,
151    total_canonical_values: usize,
152    validity: Validity,
153) -> Canonical {
154    // Create the builder with appropriate types. It is easy to just use the same type for both
155    // `offsets` and `sizes` since we have no other constraints.
156    let mut builder = ListViewBuilder::<O, O>::with_capacity(
157        values_dtype,
158        validity.nullability(),
159        total_canonical_values,
160        len,
161    );
162
163    let mut patch_idx = 0;
164
165    // Loop over the patch indices and set them to the corresponding scalar values. For positions
166    // that are not patched, use the fill value.
167    for position in 0..len {
168        let position_is_patched = patch_idx < patch_indices.len()
169            && patch_indices[patch_idx]
170                .to_usize()
171                .vortex_expect("patch index must fit in usize")
172                == position;
173
174        if position_is_patched {
175            // Set with the patch value.
176            builder
177                .append_value(patch_values.scalar_at(patch_idx).as_list())
178                .vortex_expect("Failed to append sparse value");
179            patch_idx += 1;
180        } else {
181            // Set with the fill value.
182            builder
183                .append_value(fill_value.clone())
184                .vortex_expect("Failed to append fill value");
185        }
186    }
187
188    builder.finish_into_canonical()
189}
190
191/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
192fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
193    let resolved_patches = array.resolved_patches();
194    let indices = resolved_patches.indices().to_primitive();
195    let values = resolved_patches.values().to_fixed_size_list();
196    let fill_value = array.fill_scalar().as_list();
197
198    let validity = Validity::from_mask(array.validity_mask(), nullability);
199
200    match_each_integer_ptype!(indices.ptype(), |I| {
201        canonicalize_sparse_fixed_size_list_inner::<I>(
202            indices.as_slice(),
203            values,
204            fill_value,
205            array.len(),
206            validity,
207        )
208    })
209}
210
211/// Build a canonical [`FixedSizeListArray`] from sparse patches by interleaving patch values with
212/// fill values.
213///
214/// This algorithm walks through the sparse indices sequentially, filling gaps with the fill value's
215/// elements (or defaults if null). Since all lists have the same size, we can directly append
216/// elements without tracking offsets.
217fn canonicalize_sparse_fixed_size_list_inner<I: IntegerPType>(
218    indices: &[I],
219    values: FixedSizeListArray,
220    fill_value: ListScalar,
221    array_len: usize,
222    validity: Validity,
223) -> Canonical {
224    let list_size = values.list_size();
225    let element_dtype = values.elements().dtype();
226    let total_elements = array_len * list_size as usize;
227    let mut builder = builder_with_capacity(element_dtype, total_elements);
228    let fill_elements = fill_value.elements();
229
230    let mut next_index = 0;
231    let indices = indices
232        .iter()
233        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
234
235    for (patch_idx, sparse_idx) in indices.enumerate() {
236        // Fill gap before this patch with fill values.
237        append_n_lists(
238            &mut *builder,
239            fill_elements.as_deref(),
240            list_size,
241            sparse_idx - next_index,
242        );
243
244        // Append the patch value, handling null patches by appending defaults.
245        if values.validity().is_valid(patch_idx) {
246            let patch_list = values.fixed_size_list_elements_at(patch_idx);
247            for i in 0..list_size as usize {
248                builder
249                    .append_scalar(&patch_list.scalar_at(i))
250                    .vortex_expect("element dtype must match");
251            }
252        } else {
253            builder.append_defaults(list_size as usize);
254        }
255
256        next_index = sparse_idx + 1;
257    }
258
259    // Fill remaining positions after last patch.
260    append_n_lists(
261        &mut *builder,
262        fill_elements.as_deref(),
263        list_size,
264        array_len - next_index,
265    );
266
267    let elements = builder.finish();
268
269    // SAFETY: elements.len() == array_len * list_size, validity length matches array_len.
270    Canonical::FixedSizeList(unsafe {
271        FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
272    })
273}
274
275/// Append `count` copies of a fixed-size list to the builder.
276///
277/// If `fill_elements` is `Some`, appends those elements `count` times.
278/// If `fill_elements` is `None` (null fill), appends `list_size` default elements `count` times.
279fn append_n_lists(
280    builder: &mut dyn ArrayBuilder,
281    fill_elements: Option<&[Scalar]>,
282    list_size: u32,
283    count: usize,
284) {
285    for _ in 0..count {
286        if let Some(fill_elems) = fill_elements {
287            for elem in fill_elems {
288                builder
289                    .append_scalar(elem)
290                    .vortex_expect("element dtype must match");
291            }
292        } else {
293            builder.append_defaults(list_size as usize);
294        }
295    }
296}
297
298fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
299    let (fill_bool, validity) = if fill_value.is_null() {
300        (false, Validity::AllInvalid)
301    } else {
302        (
303            fill_value
304                .try_into()
305                .vortex_expect("Fill value must convert to bool"),
306            if patches.dtype().nullability() == Nullability::NonNullable {
307                Validity::NonNullable
308            } else {
309                Validity::AllValid
310            },
311        )
312    };
313
314    let bools =
315        BoolArray::from_bit_buffer(BitBuffer::full(fill_bool, patches.array_len()), validity);
316
317    Canonical::Bool(bools.patch(patches))
318}
319
320fn canonicalize_sparse_primitives<
321    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
322>(
323    patches: &Patches,
324    fill_value: &Scalar,
325) -> Canonical {
326    let (primitive_fill, validity) = if fill_value.is_null() {
327        (T::default(), Validity::AllInvalid)
328    } else {
329        (
330            fill_value
331                .try_into()
332                .vortex_expect("Fill value must convert to target T"),
333            if patches.dtype().nullability() == Nullability::NonNullable {
334                Validity::NonNullable
335            } else {
336                Validity::AllValid
337            },
338        )
339    };
340
341    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
342
343    Canonical::Primitive(parray.patch(patches))
344}
345
346fn canonicalize_sparse_struct(
347    struct_fields: &StructFields,
348    fill_struct: StructScalar,
349    dtype: &DType,
350    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
351    unresolved_patches: &Patches,
352    len: usize,
353) -> Canonical {
354    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
355        Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
356        None => (
357            struct_fields
358                .fields()
359                .map(Scalar::default_value)
360                .collect::<Vec<_>>(),
361            Validity::AllInvalid,
362        ),
363    };
364    let patch_values_as_struct = unresolved_patches.values().to_struct();
365    let columns_patch_values = patch_values_as_struct.fields();
366    let names = patch_values_as_struct.names();
367    let validity = if dtype.is_nullable() {
368        top_level_fill_validity.patch(
369            len,
370            unresolved_patches.offset(),
371            unresolved_patches.indices(),
372            &Validity::from_mask(
373                unresolved_patches.values().validity_mask(),
374                Nullability::Nullable,
375            ),
376        )
377    } else {
378        top_level_fill_validity
379            .into_non_nullable(len)
380            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
381    };
382
383    StructArray::try_from_iter_with_validity(
384        names.iter().zip_eq(
385            columns_patch_values
386                .iter()
387                .cloned()
388                .zip_eq(fill_values)
389                .map(|(patch_values, fill_value)| unsafe {
390                    SparseArray::new_unchecked(
391                        unresolved_patches
392                            .clone()
393                            .map_values(|_| Ok(patch_values))
394                            .vortex_expect("Replacing patch values"),
395                        fill_value,
396                    )
397                }),
398        ),
399        validity,
400    )
401    .map(Canonical::Struct)
402    .vortex_expect("Creating struct array")
403}
404
405fn canonicalize_sparse_decimal<D: NativeDecimalType>(
406    decimal_dtype: DecimalDType,
407    nullability: Nullability,
408    fill_value: DecimalScalar,
409    patches: &Patches,
410    len: usize,
411) -> Canonical {
412    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
413    match fill_value.decimal_value() {
414        Some(fill_value) => {
415            let fill_value = fill_value
416                .cast::<D>()
417                .vortex_expect("unexpected value type");
418            for _ in 0..len {
419                builder.append_value(fill_value)
420            }
421        }
422        None => {
423            builder.append_nulls(len);
424        }
425    }
426    let filled_array = builder.finish_into_decimal();
427    let array = filled_array.patch(patches);
428    Canonical::Decimal(array)
429}
430
431fn canonicalize_varbin(
432    array: &SparseArray,
433    dtype: DType,
434    fill_value: Option<ByteBuffer>,
435) -> Canonical {
436    let patches = array.resolved_patches();
437    let indices = patches.indices().to_primitive();
438    let values = patches.values().to_varbinview();
439    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
440    let len = array.len();
441
442    match_each_integer_ptype!(indices.ptype(), |I| {
443        let indices = indices.buffer::<I>();
444        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
445    })
446}
447
448fn canonicalize_varbin_inner<I: IntegerPType>(
449    fill_value: Option<ByteBuffer>,
450    indices: Buffer<I>,
451    values: VarBinViewArray,
452    dtype: DType,
453    validity: Validity,
454    len: usize,
455) -> Canonical {
456    assert_eq!(dtype.nullability(), validity.nullability());
457
458    let n_patch_buffers = values.buffers().len();
459    let mut buffers = values.buffers().to_vec();
460
461    let fill = if let Some(buffer) = &fill_value {
462        buffers.push(buffer.clone());
463        BinaryView::make_view(
464            buffer.as_ref(),
465            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
466            0,
467        )
468    } else {
469        // any <=12 character value will do
470        BinaryView::make_view(&[], 0, 0)
471    };
472
473    let mut views = buffer_mut![fill; len];
474    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
475        let patch_index_usize = <usize as NumCast>::from(patch_index)
476            .vortex_expect("var bin view indices must fit in usize");
477        views[patch_index_usize] = patch;
478    }
479
480    // SAFETY: views are constructed to maintain the invariants
481    let array = unsafe {
482        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
483    };
484
485    Canonical::VarBinView(array)
486}
487
488#[cfg(test)]
489mod test {
490    use std::sync::Arc;
491
492    use rstest::rstest;
493    use vortex_array::arrays::{
494        BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, PrimitiveArray,
495        StructArray, VarBinArray, VarBinViewArray,
496    };
497    use vortex_array::arrow::IntoArrowArray as _;
498    use vortex_array::validity::Validity;
499    use vortex_array::{IntoArray, ToCanonical, assert_arrays_eq};
500    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
501    use vortex_dtype::Nullability::{NonNullable, Nullable};
502    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
503    use vortex_mask::Mask;
504    use vortex_scalar::{DecimalValue, Scalar};
505
506    use crate::SparseArray;
507
508    #[rstest]
509    #[case(Some(true))]
510    #[case(Some(false))]
511    #[case(None)]
512    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
513        let indices = buffer![0u64, 1, 7].into_array();
514        let values = BoolArray::from_iter([Some(true), None, Some(false)]).into_array();
515        let sparse_bools =
516            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
517        let actual = sparse_bools.to_bool();
518
519        let expected = BoolArray::from_iter([
520            Some(true),
521            None,
522            fill_value,
523            fill_value,
524            fill_value,
525            fill_value,
526            fill_value,
527            Some(false),
528            fill_value,
529            fill_value,
530        ]);
531
532        assert_arrays_eq!(actual, expected);
533    }
534
535    #[rstest]
536    #[case(Some(0i32))]
537    #[case(Some(-1i32))]
538    #[case(None)]
539    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
540        let indices = buffer![0u64, 1, 7].into_array();
541        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
542        let sparse_ints =
543            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
544        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
545
546        let flat_ints = sparse_ints.to_primitive();
547        let expected = PrimitiveArray::from_option_iter([
548            Some(0i32),
549            None,
550            fill_value,
551            fill_value,
552            fill_value,
553            fill_value,
554            fill_value,
555            Some(1),
556            fill_value,
557            fill_value,
558        ]);
559
560        assert_arrays_eq!(&flat_ints, &expected);
561    }
562
563    #[test]
564    fn test_sparse_struct_valid_fill() {
565        let field_names = FieldNames::from_iter(["a", "b"]);
566        let field_types = vec![
567            DType::Primitive(PType::I32, Nullable),
568            DType::Primitive(PType::I32, Nullable),
569        ];
570        let struct_fields = StructFields::new(field_names, field_types);
571        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
572
573        let indices = buffer![0u64, 1, 7, 8].into_array();
574        let patch_values_a =
575            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
576        let patch_values_b =
577            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
578        let patch_values = StructArray::try_new_with_dtype(
579            vec![patch_values_a, patch_values_b],
580            struct_fields.clone(),
581            4,
582            Validity::Array(
583                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
584            ),
585        )
586        .unwrap()
587        .into_array();
588
589        let fill_scalar = Scalar::struct_(
590            struct_dtype,
591            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
592        );
593        let len = 10;
594        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
595
596        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
597            if i == 0 {
598                Some(10)
599            } else if i == 1 {
600                None
601            } else if i == 7 {
602                Some(20)
603            } else {
604                Some(-10)
605            }
606        }));
607        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
608            if i == 0 {
609                Some(1i32)
610            } else if i == 1 {
611                Some(2)
612            } else if i == 7 {
613                None
614            } else {
615                Some(-1)
616            }
617        }));
618
619        let expected = StructArray::try_new_with_dtype(
620            vec![expected_a.into_array(), expected_b.into_array()],
621            struct_fields,
622            len,
623            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
624            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
625        )
626        .unwrap()
627        .to_array();
628
629        let actual = sparse_struct.to_struct();
630        assert_arrays_eq!(actual, expected);
631    }
632
633    #[test]
634    fn test_sparse_struct_invalid_fill() {
635        let field_names = FieldNames::from_iter(["a", "b"]);
636        let field_types = vec![
637            DType::Primitive(PType::I32, Nullable),
638            DType::Primitive(PType::I32, Nullable),
639        ];
640        let struct_fields = StructFields::new(field_names, field_types);
641        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
642
643        let indices = buffer![0u64, 1, 7, 8].into_array();
644        let patch_values_a =
645            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
646        let patch_values_b =
647            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
648        let patch_values = StructArray::try_new_with_dtype(
649            vec![patch_values_a, patch_values_b],
650            struct_fields.clone(),
651            4,
652            Validity::Array(
653                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
654            ),
655        )
656        .unwrap()
657        .into_array();
658
659        let fill_scalar = Scalar::null(struct_dtype);
660        let len = 10;
661        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
662
663        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
664            if i == 0 {
665                Some(10)
666            } else if i == 1 {
667                None
668            } else if i == 7 {
669                Some(20)
670            } else {
671                Some(-10)
672            }
673        }));
674        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
675            if i == 0 {
676                Some(1i32)
677            } else if i == 1 {
678                Some(2)
679            } else if i == 7 {
680                None
681            } else {
682                Some(-1)
683            }
684        }));
685
686        let expected = StructArray::try_new_with_dtype(
687            vec![expected_a.into_array(), expected_b.into_array()],
688            struct_fields,
689            len,
690            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
691            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
692        )
693        .unwrap()
694        .to_array();
695
696        let actual = sparse_struct.to_struct();
697        assert_arrays_eq!(actual, expected);
698    }
699
700    #[test]
701    fn test_sparse_decimal() {
702        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
703        let decimal_dtype = DecimalDType::new(3, 2);
704        let patch_values = DecimalArray::new(
705            buffer![100i128, 200i128, 300i128, 4000i128],
706            decimal_dtype,
707            Validity::from_iter([true, true, true, false]),
708        )
709        .to_array();
710        let len = 10;
711        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
712        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
713
714        let expected = DecimalArray::new(
715            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
716            decimal_dtype,
717            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
718            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
719        )
720        .to_array()
721        .into_arrow_preferred()
722        .unwrap();
723
724        let actual = sparse_struct
725            .to_decimal()
726            .to_array()
727            .into_arrow_preferred()
728            .unwrap();
729
730        assert_eq!(expected.data_type(), actual.data_type());
731        assert_eq!(&expected, &actual);
732    }
733
734    #[test]
735    fn test_sparse_utf8_varbinview_non_null_fill() {
736        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
737            Some("hello"),
738            Some("goodbye"),
739            Some("hello"),
740            None,
741            Some("bonjour"),
742            Some("你好"),
743            None,
744        ])
745        .into_array();
746
747        let array = SparseArray::try_new(
748            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
749            strings,
750            12,
751            Scalar::from(Some("123".to_owned())),
752        )
753        .unwrap();
754
755        let actual = array.to_varbinview().into_array();
756        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
757            Some("hello"),
758            Some("123"),
759            Some("123"),
760            Some("goodbye"),
761            Some("hello"),
762            None,
763            Some("123"),
764            Some("bonjour"),
765            Some("123"),
766            Some("你好"),
767            None,
768            Some("123"),
769        ])
770        .into_array();
771
772        assert_arrays_eq!(actual, expected);
773    }
774
775    #[test]
776    fn test_sparse_utf8_varbinview_null_fill() {
777        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
778            Some("hello"),
779            Some("goodbye"),
780            Some("hello"),
781            None,
782            Some("bonjour"),
783            Some("你好"),
784            None,
785        ])
786        .into_array();
787
788        let array = SparseArray::try_new(
789            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
790            strings,
791            12,
792            Scalar::null(DType::Utf8(Nullable)),
793        )
794        .unwrap();
795
796        let actual = array.to_varbinview().into_array();
797        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
798            Some("hello"),
799            None,
800            None,
801            Some("goodbye"),
802            Some("hello"),
803            None,
804            None,
805            Some("bonjour"),
806            None,
807            Some("你好"),
808            None,
809            None,
810        ])
811        .into_array();
812
813        assert_arrays_eq!(actual, expected);
814    }
815
816    #[test]
817    fn test_sparse_utf8_varbinview_non_nullable() {
818        let strings =
819            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
820                .into_array();
821
822        let array = SparseArray::try_new(
823            buffer![0u16, 3, 4, 5, 8].into_array(),
824            strings,
825            9,
826            Scalar::from("123".to_owned()),
827        )
828        .unwrap();
829
830        let actual = array.to_varbinview().into_array();
831        let expected = VarBinViewArray::from_iter_str([
832            "hello", "123", "123", "goodbye", "hello", "bonjour", "123", "123", "你好",
833        ])
834        .into_array();
835
836        assert_arrays_eq!(actual, expected);
837    }
838
839    #[test]
840    fn test_sparse_utf8_varbin_null_fill() {
841        let strings = <VarBinArray as FromIterator<_>>::from_iter([
842            Some("hello"),
843            Some("goodbye"),
844            Some("hello"),
845            None,
846            Some("bonjour"),
847            Some("你好"),
848            None,
849        ])
850        .into_array();
851
852        let array = SparseArray::try_new(
853            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
854            strings,
855            12,
856            Scalar::null(DType::Utf8(Nullable)),
857        )
858        .unwrap();
859
860        let actual = array.to_varbinview().into_array();
861        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
862            Some("hello"),
863            None,
864            None,
865            Some("goodbye"),
866            Some("hello"),
867            None,
868            None,
869            Some("bonjour"),
870            None,
871            Some("你好"),
872            None,
873            None,
874        ])
875        .into_array();
876
877        assert_arrays_eq!(actual, expected);
878    }
879
880    #[test]
881    fn test_sparse_binary_varbinview_non_null_fill() {
882        let binaries = VarBinViewArray::from_iter_nullable_bin([
883            Some(b"hello" as &[u8]),
884            Some(b"goodbye"),
885            Some(b"hello"),
886            None,
887            Some(b"\x00"),
888            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
889            None,
890        ])
891        .into_array();
892
893        let array = SparseArray::try_new(
894            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
895            binaries,
896            12,
897            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
898        )
899        .unwrap();
900
901        let actual = array.to_varbinview().into_array();
902        let expected = VarBinViewArray::from_iter_nullable_bin([
903            Some(b"hello" as &[u8]),
904            Some(b"123"),
905            Some(b"123"),
906            Some(b"goodbye"),
907            Some(b"hello"),
908            None,
909            Some(b"123"),
910            Some(b"\x00"),
911            Some(b"123"),
912            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
913            None,
914            Some(b"123"),
915        ])
916        .into_array();
917
918        assert_arrays_eq!(actual, expected);
919    }
920
921    #[test]
922    fn test_sparse_list_null_fill() {
923        // Use ListViewArray consistently
924        let elements = buffer![1i32, 2, 1, 2].into_array();
925        // Create ListView with offsets and sizes
926        // List 0: [1] at offset 0, size 1
927        // List 1: [2] at offset 1, size 1
928        // List 2: [1] at offset 2, size 1
929        // List 3: [2] at offset 3, size 1
930        let offsets = buffer![0u32, 1, 2, 3].into_array();
931        let sizes = buffer![1u32, 1, 1, 1].into_array();
932        let lists = unsafe {
933            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
934                .with_zero_copy_to_list(true)
935        }
936        .into_array();
937
938        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
939        let fill_value = Scalar::null(lists.dtype().clone());
940        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
941            .unwrap()
942            .into_array();
943
944        let actual = sparse.to_canonical().into_array();
945        let result_listview = actual.to_listview();
946
947        // Check the structure
948        assert_eq!(result_listview.len(), 6);
949
950        // Verify sizes: positions 0,3,4,5 have data, positions 1,2 are null
951        assert_eq!(result_listview.size_at(0), 1); // [1]
952        assert_eq!(result_listview.size_at(1), 0); // null
953        assert_eq!(result_listview.size_at(2), 0); // null
954        assert_eq!(result_listview.size_at(3), 1); // [2]
955        assert_eq!(result_listview.size_at(4), 1); // [1]
956        assert_eq!(result_listview.size_at(5), 1); // [2]
957
958        // Verify actual values
959        let elements_array = result_listview.elements().to_primitive();
960        let elements_slice = elements_array.as_slice::<i32>();
961
962        let list0_offset = result_listview.offset_at(0);
963        assert_eq!(elements_slice[list0_offset], 1);
964
965        let list3_offset = result_listview.offset_at(3);
966        assert_eq!(elements_slice[list3_offset], 2);
967
968        let list4_offset = result_listview.offset_at(4);
969        assert_eq!(elements_slice[list4_offset], 1);
970
971        let list5_offset = result_listview.offset_at(5);
972        assert_eq!(elements_slice[list5_offset], 2);
973    }
974
975    #[test]
976    fn test_sparse_list_null_fill_sliced_sparse_values() {
977        // Create ListViewArray with 8 elements forming 8 single-element lists
978        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
979        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
980        let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
981        let lists = unsafe {
982            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
983                .with_zero_copy_to_list(true)
984        }
985        .into_array();
986
987        // Slice to get lists 2..6, which are: [1], [2], [1], [2]
988        let lists = lists.slice(2..6);
989
990        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
991        let fill_value = Scalar::null(lists.dtype().clone());
992        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
993            .unwrap()
994            .into_array();
995
996        let actual = sparse.to_canonical().into_array();
997        let result_listview = actual.to_listview();
998
999        // Check the structure
1000        assert_eq!(result_listview.len(), 6);
1001
1002        // Verify sizes: positions 0,3,4,5 have data (from the sliced lists), positions 1,2 are null
1003        assert_eq!(result_listview.size_at(0), 1); // [1] - from slice index 0 (original index 2)
1004        assert_eq!(result_listview.size_at(1), 0); // null
1005        assert_eq!(result_listview.size_at(2), 0); // null
1006        assert_eq!(result_listview.size_at(3), 1); // [2] - from slice index 3 (original index 5)
1007        assert_eq!(result_listview.size_at(4), 1); // [1] - extra element beyond original slice
1008        assert_eq!(result_listview.size_at(5), 1); // [2] - extra element beyond original slice
1009
1010        // Verify actual values
1011        let elements_array = result_listview.elements().to_primitive();
1012        let elements_slice = elements_array.as_slice::<i32>();
1013
1014        let list0_offset = result_listview.offset_at(0);
1015        assert_eq!(elements_slice[list0_offset], 1);
1016
1017        let list3_offset = result_listview.offset_at(3);
1018        assert_eq!(elements_slice[list3_offset], 2);
1019    }
1020
1021    #[test]
1022    fn test_sparse_list_non_null_fill() {
1023        // Create ListViewArray with 4 single-element lists
1024        let elements = buffer![1i32, 2, 1, 2].into_array();
1025        let offsets = buffer![0u32, 1, 2, 3].into_array();
1026        let sizes = buffer![1u32, 1, 1, 1].into_array();
1027        let lists = unsafe {
1028            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1029                .with_zero_copy_to_list(true)
1030        }
1031        .into_array();
1032
1033        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1034        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1035        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1036            .unwrap()
1037            .into_array();
1038
1039        let actual = sparse.to_canonical().into_array();
1040        let result_listview = actual.to_listview();
1041
1042        // Check the structure
1043        assert_eq!(result_listview.len(), 6);
1044
1045        // Verify sizes: positions 0,3,4,5 have sparse data, positions 1,2 have fill values
1046        assert_eq!(result_listview.size_at(0), 1); // [1] from sparse
1047        assert_eq!(result_listview.size_at(1), 4); // [5,6,7,8] fill value
1048        assert_eq!(result_listview.size_at(2), 4); // [5,6,7,8] fill value
1049        assert_eq!(result_listview.size_at(3), 1); // [2] from sparse
1050        assert_eq!(result_listview.size_at(4), 1); // [1] from sparse
1051        assert_eq!(result_listview.size_at(5), 1); // [2] from sparse
1052
1053        // Verify actual values
1054        let elements_array = result_listview.elements().to_primitive();
1055        let elements_slice = elements_array.as_slice::<i32>();
1056
1057        // List 0: [1]
1058        let list0_offset = result_listview.offset_at(0) as usize;
1059        assert_eq!(elements_slice[list0_offset], 1);
1060
1061        // List 1: [5,6,7,8]
1062        let list1_offset = result_listview.offset_at(1) as usize;
1063        let list1_size = result_listview.size_at(1) as usize;
1064        assert_eq!(
1065            &elements_slice[list1_offset..list1_offset + list1_size],
1066            &[5, 6, 7, 8]
1067        );
1068
1069        // List 2: [5,6,7,8]
1070        let list2_offset = result_listview.offset_at(2) as usize;
1071        let list2_size = result_listview.size_at(2) as usize;
1072        assert_eq!(
1073            &elements_slice[list2_offset..list2_offset + list2_size],
1074            &[5, 6, 7, 8]
1075        );
1076
1077        // List 3: [2]
1078        let list3_offset = result_listview.offset_at(3) as usize;
1079        assert_eq!(elements_slice[list3_offset], 2);
1080
1081        // List 4: [1]
1082        let list4_offset = result_listview.offset_at(4) as usize;
1083        assert_eq!(elements_slice[list4_offset], 1);
1084
1085        // List 5: [2]
1086        let list5_offset = result_listview.offset_at(5) as usize;
1087        assert_eq!(elements_slice[list5_offset], 2);
1088    }
1089
1090    #[test]
1091    fn test_sparse_binary_varbin_null_fill() {
1092        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1093            Some(b"hello" as &[u8]),
1094            Some(b"goodbye"),
1095            Some(b"hello"),
1096            None,
1097            Some(b"\x00"),
1098            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1099            None,
1100        ])
1101        .into_array();
1102
1103        let array = SparseArray::try_new(
1104            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1105            strings,
1106            12,
1107            Scalar::null(DType::Binary(Nullable)),
1108        )
1109        .unwrap();
1110
1111        let actual = array.to_varbinview().into_array();
1112        let expected = VarBinViewArray::from_iter_nullable_bin([
1113            Some(b"hello" as &[u8]),
1114            None,
1115            None,
1116            Some(b"goodbye"),
1117            Some(b"hello"),
1118            None,
1119            None,
1120            Some(b"\x00"),
1121            None,
1122            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1123            None,
1124            None,
1125        ])
1126        .into_array();
1127
1128        assert_arrays_eq!(actual, expected);
1129    }
1130
1131    #[test]
1132    fn test_sparse_fixed_size_list_null_fill() {
1133        // Create a FixedSizeListArray with 3 lists of size 3.
1134        let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1135        let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1136            .unwrap()
1137            .into_array();
1138
1139        let indices = buffer![0u8, 2u8, 3u8].into_array();
1140        let fill_value = Scalar::null(DType::FixedSizeList(
1141            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1142            3,
1143            Nullable,
1144        ));
1145        let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1146            .unwrap()
1147            .into_array();
1148
1149        let actual = sparse.to_canonical().into_array();
1150
1151        // Expected: [1,2,3], null, [4,5,6], [7,8,9], null.
1152        let expected_elements =
1153            buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1154        let expected = FixedSizeListArray::try_new(
1155            expected_elements,
1156            3,
1157            Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1158            5,
1159        )
1160        .unwrap()
1161        .into_array();
1162
1163        assert_arrays_eq!(actual, expected);
1164    }
1165
1166    #[test]
1167    fn test_sparse_fixed_size_list_non_null_fill() {
1168        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1169        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1170            .unwrap()
1171            .into_array();
1172
1173        let indices = buffer![0u8, 2u8, 4u8].into_array();
1174        let fill_value = Scalar::fixed_size_list(
1175            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1176            vec![
1177                Scalar::primitive(99i32, NonNullable),
1178                Scalar::primitive(88i32, NonNullable),
1179            ],
1180            NonNullable,
1181        );
1182        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1183            .unwrap()
1184            .into_array();
1185
1186        let actual = sparse.to_canonical().into_array();
1187
1188        // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88].
1189        let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1190        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1191            .unwrap()
1192            .into_array();
1193
1194        assert_arrays_eq!(actual, expected);
1195    }
1196
1197    #[test]
1198    fn test_sparse_fixed_size_list_with_validity() {
1199        // Create FSL values with some nulls.
1200        let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1201        let fsl = FixedSizeListArray::try_new(
1202            elements,
1203            2,
1204            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1205            3,
1206        )
1207        .unwrap()
1208        .into_array();
1209
1210        let indices = buffer![1u16, 3u16, 4u16].into_array();
1211        let fill_value = Scalar::fixed_size_list(
1212            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1213            vec![
1214                Scalar::primitive(7i32, NonNullable),
1215                Scalar::primitive(8i32, NonNullable),
1216            ],
1217            Nullable,
1218        );
1219        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1220            .unwrap()
1221            .into_array();
1222
1223        let actual = sparse.to_canonical().into_array();
1224
1225        // Expected validity: [true, true, true, false, true, true].
1226        // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8].
1227        let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1228        let expected = FixedSizeListArray::try_new(
1229            expected_elements,
1230            2,
1231            Validity::Array(
1232                BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1233            ),
1234            6,
1235        )
1236        .unwrap()
1237        .into_array();
1238
1239        assert_arrays_eq!(actual, expected);
1240    }
1241
1242    #[test]
1243    fn test_sparse_fixed_size_list_truly_sparse() {
1244        // Test with a truly sparse array where most values are the fill value.
1245        // This demonstrates the compression benefit of sparse encoding.
1246
1247        // Create patch values: only 3 distinct lists out of 100 total positions.
1248        let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1249        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1250            .unwrap()
1251            .into_array();
1252
1253        // Patches at positions 5, 50, and 95 out of 100.
1254        let indices = buffer![5u32, 50, 95].into_array();
1255
1256        // Fill value [99, 99] will appear 97 times but stored only once.
1257        let fill_value = Scalar::fixed_size_list(
1258            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1259            vec![
1260                Scalar::primitive(99i32, NonNullable),
1261                Scalar::primitive(99i32, NonNullable),
1262            ],
1263            NonNullable,
1264        );
1265
1266        let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1267            .unwrap()
1268            .into_array();
1269
1270        let actual = sparse.to_canonical().into_array();
1271
1272        // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95.
1273        let mut expected_elements_vec = Vec::with_capacity(200);
1274        // Positions 0-4: fill values
1275        for _ in 0..5 {
1276            expected_elements_vec.extend([99i32, 99]);
1277        }
1278        // Position 5: first patch [10, 11]
1279        expected_elements_vec.extend([10, 11]);
1280        // Positions 6-49: fill values
1281        for _ in 6..50 {
1282            expected_elements_vec.extend([99, 99]);
1283        }
1284        // Position 50: second patch [20, 21]
1285        expected_elements_vec.extend([20, 21]);
1286        // Positions 51-94: fill values
1287        for _ in 51..95 {
1288            expected_elements_vec.extend([99, 99]);
1289        }
1290        // Position 95: third patch [30, 31]
1291        expected_elements_vec.extend([30, 31]);
1292        // Positions 96-99: fill values
1293        for _ in 96..100 {
1294            expected_elements_vec.extend([99, 99]);
1295        }
1296        let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1297        let expected =
1298            FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1299                .unwrap()
1300                .into_array();
1301
1302        assert_arrays_eq!(actual, expected);
1303    }
1304
1305    #[test]
1306    fn test_sparse_fixed_size_list_single_element() {
1307        // Test with a single element FSL array.
1308        let elements = buffer![42i32, 43].into_array();
1309        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1310            .unwrap()
1311            .into_array();
1312
1313        let indices = buffer![0u32].into_array();
1314        let fill_value = Scalar::fixed_size_list(
1315            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1316            vec![
1317                Scalar::primitive(1i32, NonNullable),
1318                Scalar::primitive(2i32, NonNullable),
1319            ],
1320            NonNullable,
1321        );
1322        let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1323            .unwrap()
1324            .into_array();
1325
1326        let actual = sparse.to_canonical().into_array();
1327
1328        // Expected: just [42, 43].
1329        let expected_elements = buffer![42i32, 43].into_array();
1330        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1331            .unwrap()
1332            .into_array();
1333
1334        assert_arrays_eq!(actual, expected);
1335    }
1336
1337    #[test]
1338    fn test_sparse_list_grows_offset_type() {
1339        let elements = buffer![1i32, 2, 1, 2].into_array();
1340        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1341        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1342            .unwrap()
1343            .into_array();
1344
1345        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1346        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1347        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1348            .unwrap()
1349            .into_array();
1350
1351        let actual = sparse.to_canonical().into_array();
1352        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1353        expected_elements.extend(buffer![42i32; 252]);
1354        let expected = ListArray::try_new(
1355            expected_elements.freeze().into_array(),
1356            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1357            Validity::AllValid,
1358        )
1359        .unwrap()
1360        .into_array();
1361
1362        assert_eq!(
1363            actual.to_listview().offsets().dtype(),
1364            &DType::Primitive(PType::U16, NonNullable)
1365        );
1366        assert_arrays_eq!(&actual, &expected);
1367
1368        // Note that the preferred arrow list representation is `List` (not `ListView`).
1369        let arrow_dtype = expected.dtype().to_arrow_dtype().unwrap();
1370        let actual = actual.into_arrow(&arrow_dtype).unwrap();
1371        let expected = expected.into_arrow(&arrow_dtype).unwrap();
1372
1373        assert_eq!(actual.data_type(), expected.data_type());
1374    }
1375
1376    #[test]
1377    fn test_sparse_listview_null_fill_with_gaps() {
1378        // This test specifically catches the bug where the old implementation
1379        // incorrectly tracked `last_valid_offset` as the START of the last list
1380        // instead of properly handling ListView's offset/size pairs.
1381
1382        // Create a ListViewArray with non-trivial offsets and sizes.
1383        // Elements: [10, 11, 12, 20, 21, 30, 31, 32, 33]
1384        // List 0: elements[0..3] = [10, 11, 12]
1385        // List 1: elements[3..5] = [20, 21]
1386        // List 2: elements[5..9] = [30, 31, 32, 33]
1387        let elements = buffer![10i32, 11, 12, 20, 21, 30, 31, 32, 33].into_array();
1388        let offsets = buffer![0u32, 3, 5].into_array();
1389        let sizes = buffer![3u32, 2, 4].into_array();
1390
1391        let list_view = unsafe {
1392            ListViewArray::new_unchecked(elements.clone(), offsets, sizes, Validity::AllValid)
1393                .with_zero_copy_to_list(true)
1394        };
1395
1396        let list_dtype = list_view.dtype().clone();
1397
1398        // Create sparse array with indices [1, 4, 7] and length 10
1399        // This means we have:
1400        // - Index 0: null
1401        // - Index 1: List 0 [10, 11, 12]
1402        // - Index 2-3: null
1403        // - Index 4: List 1 [20, 21]
1404        // - Index 5-6: null
1405        // - Index 7: List 2 [30, 31, 32, 33]
1406        // - Index 8-9: null
1407        let indices = buffer![1u8, 4, 7].into_array();
1408        let sparse = SparseArray::try_new(
1409            indices,
1410            list_view.into_array(),
1411            10,
1412            Scalar::null(list_dtype),
1413        )
1414        .unwrap();
1415
1416        // Convert to canonical form - this triggers the function we're testing
1417        let canonical = sparse.to_canonical().into_array();
1418        let result_listview = canonical.to_listview();
1419
1420        // Verify the structure
1421        assert_eq!(result_listview.len(), 10);
1422
1423        // Helper to get list values at an index
1424        let get_list_values = |idx: usize| -> Vec<i32> {
1425            let offset = result_listview.offset_at(idx);
1426            let size = result_listview.size_at(idx);
1427            if size == 0 {
1428                vec![] // null/empty list
1429            } else {
1430                let elements = result_listview.elements().to_primitive();
1431                let slice = elements.as_slice::<i32>();
1432                slice[offset..offset + size].to_vec()
1433            }
1434        };
1435
1436        let empty: Vec<i32> = vec![];
1437
1438        // Verify all list values
1439        assert_eq!(get_list_values(0), empty); // null
1440        assert_eq!(get_list_values(1), vec![10, 11, 12]); // sparse index 0
1441        assert_eq!(get_list_values(2), empty); // null
1442        assert_eq!(get_list_values(3), empty); // null
1443        assert_eq!(get_list_values(4), vec![20, 21]); // sparse index 1
1444        assert_eq!(get_list_values(5), empty); // null
1445        assert_eq!(get_list_values(6), empty); // null
1446        assert_eq!(get_list_values(7), vec![30, 31, 32, 33]); // sparse index 2
1447        assert_eq!(get_list_values(8), empty); // null
1448        assert_eq!(get_list_values(9), empty); // null
1449    }
1450
1451    #[test]
1452    fn test_sparse_listview_sliced_values_null_fill() {
1453        // This test uses sliced ListView values to ensure proper handling
1454        // of non-zero starting offsets in the source data.
1455
1456        // Create a larger ListViewArray and then slice it
1457        // Original elements: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1458        let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1459
1460        // Create 5 lists with different offsets and sizes
1461        // List 0: [0, 1] at offset 0
1462        // List 1: [2, 3, 4] at offset 2
1463        // List 2: [5] at offset 5
1464        // List 3: [6, 7] at offset 6
1465        // List 4: [8, 9] at offset 8
1466        let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
1467        let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
1468
1469        let full_listview = unsafe {
1470            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1471                .with_zero_copy_to_list(true)
1472        }
1473        .into_array();
1474
1475        // Slice to get lists 1, 2, 3 (indices 1..4)
1476        // This gives us lists with elements:
1477        // - Index 0: [2, 3, 4] (original list 1)
1478        // - Index 1: [5] (original list 2)
1479        // - Index 2: [6, 7] (original list 3)
1480        let sliced = full_listview.slice(1..4);
1481
1482        // Create sparse array with indices [0, 1] and length 5
1483        // Expected result:
1484        // - Index 0: [2, 3, 4] (from sliced[0])
1485        // - Index 1: [5] (from sliced[1])
1486        // - Index 2: null
1487        // - Index 3: null
1488        // - Index 4: null
1489        let indices = buffer![0u8, 1].into_array();
1490        // Extract only the values we need from the sliced array
1491        let values = sliced.slice(0..2);
1492        let sparse =
1493            SparseArray::try_new(indices, values, 5, Scalar::null(sliced.dtype().clone())).unwrap();
1494
1495        let canonical = sparse.to_canonical().into_array();
1496        let result_listview = canonical.to_listview();
1497
1498        assert_eq!(result_listview.len(), 5);
1499
1500        // Helper to get list values at an index
1501        let get_list_values = |idx: usize| -> Vec<i32> {
1502            let offset = result_listview.offset_at(idx);
1503            let size = result_listview.size_at(idx);
1504            if size == 0 {
1505                vec![] // null/empty list
1506            } else {
1507                let elements = result_listview.elements().to_primitive();
1508                let slice = elements.as_slice::<i32>();
1509                slice[offset..offset + size].to_vec()
1510            }
1511        };
1512
1513        let empty: Vec<i32> = vec![];
1514
1515        // Verify all list values
1516        // Original slice had lists at indices 1,2,3 which were: [2,3,4], [5], [6,7]
1517        // We take indices 0 and 1 from the slice
1518        assert_eq!(get_list_values(0), vec![2, 3, 4]); // From slice index 0 (original list 1)
1519        assert_eq!(get_list_values(1), vec![5]); // From slice index 1 (original list 2)
1520        assert_eq!(get_list_values(2), empty); // null
1521        assert_eq!(get_list_values(3), empty); // null
1522        assert_eq!(get_list_values(4), empty); // null
1523
1524        // The bug in the old implementation would have incorrectly used
1525        // offsets[sparse_index] for filling nulls, which would be wrong
1526        // when dealing with sliced arrays that have non-zero starting offsets.
1527    }
1528}